core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21    let a = a.as_i32x16();
22    let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
23    transmute(r)
24}
25
26/// Computes the absolute value of packed 32-bit integers in `a`, and store the
27/// unsigned results in `dst` using writemask `k` (elements are copied from
28/// `src` when the corresponding mask bit is not set).
29///
30/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
31#[inline]
32#[target_feature(enable = "avx512f")]
33#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34#[cfg_attr(test, assert_instr(vpabsd))]
35pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
36    let abs = _mm512_abs_epi32(a).as_i32x16();
37    transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
38}
39
40/// Computes the absolute value of packed 32-bit integers in `a`, and store the
41/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
42/// the corresponding mask bit is not set).
43///
44/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
45#[inline]
46#[target_feature(enable = "avx512f")]
47#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
48#[cfg_attr(test, assert_instr(vpabsd))]
49pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
50    let abs = _mm512_abs_epi32(a).as_i32x16();
51    transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
52}
53
54/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
55///
56/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
57#[inline]
58#[target_feature(enable = "avx512f,avx512vl")]
59#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
60#[cfg_attr(test, assert_instr(vpabsd))]
61pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
62    let abs = _mm256_abs_epi32(a).as_i32x8();
63    transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
64}
65
66/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
67///
68/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
69#[inline]
70#[target_feature(enable = "avx512f,avx512vl")]
71#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
72#[cfg_attr(test, assert_instr(vpabsd))]
73pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
74    let abs = _mm256_abs_epi32(a).as_i32x8();
75    transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
76}
77
78/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
81#[inline]
82#[target_feature(enable = "avx512f,avx512vl")]
83#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
84#[cfg_attr(test, assert_instr(vpabsd))]
85pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
86    let abs = _mm_abs_epi32(a).as_i32x4();
87    transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
88}
89
90/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
93#[inline]
94#[target_feature(enable = "avx512f,avx512vl")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpabsd))]
97pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
98    let abs = _mm_abs_epi32(a).as_i32x4();
99    transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
100}
101
102/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
105#[inline]
106#[target_feature(enable = "avx512f")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vpabsq))]
109pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
110    let a = a.as_i64x8();
111    let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
112    transmute(r)
113}
114
115/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
116///
117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
118#[inline]
119#[target_feature(enable = "avx512f")]
120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
121#[cfg_attr(test, assert_instr(vpabsq))]
122pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
123    let abs = _mm512_abs_epi64(a).as_i64x8();
124    transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
125}
126
127/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
130#[inline]
131#[target_feature(enable = "avx512f")]
132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
133#[cfg_attr(test, assert_instr(vpabsq))]
134pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
135    let abs = _mm512_abs_epi64(a).as_i64x8();
136    transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
137}
138
139/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
142#[inline]
143#[target_feature(enable = "avx512f,avx512vl")]
144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
145#[cfg_attr(test, assert_instr(vpabsq))]
146pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
147    let a = a.as_i64x4();
148    let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
149    transmute(r)
150}
151
152/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
153///
154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
155#[inline]
156#[target_feature(enable = "avx512f,avx512vl")]
157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
158#[cfg_attr(test, assert_instr(vpabsq))]
159pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
160    let abs = _mm256_abs_epi64(a).as_i64x4();
161    transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
162}
163
164/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
165///
166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
167#[inline]
168#[target_feature(enable = "avx512f,avx512vl")]
169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
170#[cfg_attr(test, assert_instr(vpabsq))]
171pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
172    let abs = _mm256_abs_epi64(a).as_i64x4();
173    transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
174}
175
176/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
179#[inline]
180#[target_feature(enable = "avx512f,avx512vl")]
181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182#[cfg_attr(test, assert_instr(vpabsq))]
183pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i {
184    let a = a.as_i64x2();
185    let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
186    transmute(r)
187}
188
189/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
190///
191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
192#[inline]
193#[target_feature(enable = "avx512f,avx512vl")]
194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
195#[cfg_attr(test, assert_instr(vpabsq))]
196pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
197    let abs = _mm_abs_epi64(a).as_i64x2();
198    transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
199}
200
201/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
202///
203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
204#[inline]
205#[target_feature(enable = "avx512f,avx512vl")]
206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
207#[cfg_attr(test, assert_instr(vpabsq))]
208pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
209    let abs = _mm_abs_epi64(a).as_i64x2();
210    transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
211}
212
213/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
214///
215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
216#[inline]
217#[target_feature(enable = "avx512f")]
218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
219#[cfg_attr(test, assert_instr(vpandd))]
220pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
221    simd_fabs(v2)
222}
223
224/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
225///
226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
227#[inline]
228#[target_feature(enable = "avx512f")]
229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
230#[cfg_attr(test, assert_instr(vpandd))]
231pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
232    simd_select_bitmask(k, simd_fabs(v2), src)
233}
234
235/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
238#[inline]
239#[target_feature(enable = "avx512f")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(test, assert_instr(vpandq))]
242pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
243    simd_fabs(v2)
244}
245
246/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
249#[inline]
250#[target_feature(enable = "avx512f")]
251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
252#[cfg_attr(test, assert_instr(vpandq))]
253pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
254    simd_select_bitmask(k, simd_fabs(v2), src)
255}
256
257/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
258///
259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
260#[inline]
261#[target_feature(enable = "avx512f")]
262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
263#[cfg_attr(test, assert_instr(vmovdqa32))]
264pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
265    let mov = a.as_i32x16();
266    transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
267}
268
269/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
270///
271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
272#[inline]
273#[target_feature(enable = "avx512f")]
274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
275#[cfg_attr(test, assert_instr(vmovdqa32))]
276pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
277    let mov = a.as_i32x16();
278    transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
279}
280
281/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
282///
283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
284#[inline]
285#[target_feature(enable = "avx512f,avx512vl")]
286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
287#[cfg_attr(test, assert_instr(vmovdqa32))]
288pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
289    let mov = a.as_i32x8();
290    transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
291}
292
293/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
294///
295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
296#[inline]
297#[target_feature(enable = "avx512f,avx512vl")]
298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
299#[cfg_attr(test, assert_instr(vmovdqa32))]
300pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
301    let mov = a.as_i32x8();
302    transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
303}
304
305/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
306///
307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
308#[inline]
309#[target_feature(enable = "avx512f,avx512vl")]
310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
311#[cfg_attr(test, assert_instr(vmovdqa32))]
312pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
313    let mov = a.as_i32x4();
314    transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
315}
316
317/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
325    let mov = a.as_i32x4();
326    transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
327}
328
329/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
330///
331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
332#[inline]
333#[target_feature(enable = "avx512f")]
334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
335#[cfg_attr(test, assert_instr(vmovdqa64))]
336pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
337    let mov = a.as_i64x8();
338    transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
339}
340
341/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
344#[inline]
345#[target_feature(enable = "avx512f")]
346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347#[cfg_attr(test, assert_instr(vmovdqa64))]
348pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
349    let mov = a.as_i64x8();
350    transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
351}
352
353/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
354///
355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
356#[inline]
357#[target_feature(enable = "avx512f,avx512vl")]
358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
359#[cfg_attr(test, assert_instr(vmovdqa64))]
360pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
361    let mov = a.as_i64x4();
362    transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
363}
364
365/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
366///
367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
368#[inline]
369#[target_feature(enable = "avx512f,avx512vl")]
370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
371#[cfg_attr(test, assert_instr(vmovdqa64))]
372pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
373    let mov = a.as_i64x4();
374    transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
375}
376
377/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
378///
379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
380#[inline]
381#[target_feature(enable = "avx512f,avx512vl")]
382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
383#[cfg_attr(test, assert_instr(vmovdqa64))]
384pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
385    let mov = a.as_i64x2();
386    transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
387}
388
389/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
390///
391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
392#[inline]
393#[target_feature(enable = "avx512f,avx512vl")]
394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
395#[cfg_attr(test, assert_instr(vmovdqa64))]
396pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
397    let mov = a.as_i64x2();
398    transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
399}
400
401/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
404#[inline]
405#[target_feature(enable = "avx512f")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vmovaps))]
408pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
409    let mov = a.as_f32x16();
410    transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
411}
412
413/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
414///
415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
416#[inline]
417#[target_feature(enable = "avx512f")]
418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
419#[cfg_attr(test, assert_instr(vmovaps))]
420pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
421    let mov = a.as_f32x16();
422    transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
423}
424
425/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
426///
427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
428#[inline]
429#[target_feature(enable = "avx512f,avx512vl")]
430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
431#[cfg_attr(test, assert_instr(vmovaps))]
432pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
433    let mov = a.as_f32x8();
434    transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
435}
436
437/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
440#[inline]
441#[target_feature(enable = "avx512f,avx512vl")]
442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
443#[cfg_attr(test, assert_instr(vmovaps))]
444pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
445    let mov = a.as_f32x8();
446    transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
447}
448
449/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
450///
451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
452#[inline]
453#[target_feature(enable = "avx512f,avx512vl")]
454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
455#[cfg_attr(test, assert_instr(vmovaps))]
456pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
457    let mov = a.as_f32x4();
458    transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
459}
460
461/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
462///
463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
464#[inline]
465#[target_feature(enable = "avx512f,avx512vl")]
466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
467#[cfg_attr(test, assert_instr(vmovaps))]
468pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
469    let mov = a.as_f32x4();
470    transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
471}
472
473/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
474///
475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
476#[inline]
477#[target_feature(enable = "avx512f")]
478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
479#[cfg_attr(test, assert_instr(vmovapd))]
480pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
481    let mov = a.as_f64x8();
482    transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
483}
484
485/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
488#[inline]
489#[target_feature(enable = "avx512f")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vmovapd))]
492pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
493    let mov = a.as_f64x8();
494    transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
495}
496
497/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
498///
499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
500#[inline]
501#[target_feature(enable = "avx512f,avx512vl")]
502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
503#[cfg_attr(test, assert_instr(vmovapd))]
504pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
505    let mov = a.as_f64x4();
506    transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
507}
508
509/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
510///
511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
512#[inline]
513#[target_feature(enable = "avx512f,avx512vl")]
514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
515#[cfg_attr(test, assert_instr(vmovapd))]
516pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
517    let mov = a.as_f64x4();
518    transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
519}
520
521/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
522///
523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
524#[inline]
525#[target_feature(enable = "avx512f,avx512vl")]
526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
527#[cfg_attr(test, assert_instr(vmovapd))]
528pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
529    let mov = a.as_f64x2();
530    transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
531}
532
533/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
534///
535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
536#[inline]
537#[target_feature(enable = "avx512f,avx512vl")]
538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
539#[cfg_attr(test, assert_instr(vmovapd))]
540pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
541    let mov = a.as_f64x2();
542    transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
543}
544
545/// Add packed 32-bit integers in a and b, and store the results in dst.
546///
547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
548#[inline]
549#[target_feature(enable = "avx512f")]
550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
551#[cfg_attr(test, assert_instr(vpaddd))]
552pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
553    transmute(simd_add(a.as_i32x16(), b.as_i32x16()))
554}
555
556/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
557///
558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
559#[inline]
560#[target_feature(enable = "avx512f")]
561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
562#[cfg_attr(test, assert_instr(vpaddd))]
563pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
564    let add = _mm512_add_epi32(a, b).as_i32x16();
565    transmute(simd_select_bitmask(k, add, src.as_i32x16()))
566}
567
568/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
569///
570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
571#[inline]
572#[target_feature(enable = "avx512f")]
573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
574#[cfg_attr(test, assert_instr(vpaddd))]
575pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
576    let add = _mm512_add_epi32(a, b).as_i32x16();
577    transmute(simd_select_bitmask(k, add, i32x16::ZERO))
578}
579
580/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
581///
582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
583#[inline]
584#[target_feature(enable = "avx512f,avx512vl")]
585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
586#[cfg_attr(test, assert_instr(vpaddd))]
587pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
588    let add = _mm256_add_epi32(a, b).as_i32x8();
589    transmute(simd_select_bitmask(k, add, src.as_i32x8()))
590}
591
592/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
593///
594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
595#[inline]
596#[target_feature(enable = "avx512f,avx512vl")]
597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
598#[cfg_attr(test, assert_instr(vpaddd))]
599pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
600    let add = _mm256_add_epi32(a, b).as_i32x8();
601    transmute(simd_select_bitmask(k, add, i32x8::ZERO))
602}
603
604/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
605///
606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
607#[inline]
608#[target_feature(enable = "avx512f,avx512vl")]
609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
610#[cfg_attr(test, assert_instr(vpaddd))]
611pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
612    let add = _mm_add_epi32(a, b).as_i32x4();
613    transmute(simd_select_bitmask(k, add, src.as_i32x4()))
614}
615
616/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
617///
618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
619#[inline]
620#[target_feature(enable = "avx512f,avx512vl")]
621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
622#[cfg_attr(test, assert_instr(vpaddd))]
623pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
624    let add = _mm_add_epi32(a, b).as_i32x4();
625    transmute(simd_select_bitmask(k, add, i32x4::ZERO))
626}
627
628/// Add packed 64-bit integers in a and b, and store the results in dst.
629///
630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
631#[inline]
632#[target_feature(enable = "avx512f")]
633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
634#[cfg_attr(test, assert_instr(vpaddq))]
635pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
636    transmute(simd_add(a.as_i64x8(), b.as_i64x8()))
637}
638
639/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
642#[inline]
643#[target_feature(enable = "avx512f")]
644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
645#[cfg_attr(test, assert_instr(vpaddq))]
646pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
647    let add = _mm512_add_epi64(a, b).as_i64x8();
648    transmute(simd_select_bitmask(k, add, src.as_i64x8()))
649}
650
651/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
652///
653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
654#[inline]
655#[target_feature(enable = "avx512f")]
656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
657#[cfg_attr(test, assert_instr(vpaddq))]
658pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
659    let add = _mm512_add_epi64(a, b).as_i64x8();
660    transmute(simd_select_bitmask(k, add, i64x8::ZERO))
661}
662
663/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
664///
665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
666#[inline]
667#[target_feature(enable = "avx512f,avx512vl")]
668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
669#[cfg_attr(test, assert_instr(vpaddq))]
670pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
671    let add = _mm256_add_epi64(a, b).as_i64x4();
672    transmute(simd_select_bitmask(k, add, src.as_i64x4()))
673}
674
675/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
676///
677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
678#[inline]
679#[target_feature(enable = "avx512f,avx512vl")]
680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
681#[cfg_attr(test, assert_instr(vpaddq))]
682pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
683    let add = _mm256_add_epi64(a, b).as_i64x4();
684    transmute(simd_select_bitmask(k, add, i64x4::ZERO))
685}
686
687/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
688///
689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
690#[inline]
691#[target_feature(enable = "avx512f,avx512vl")]
692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
693#[cfg_attr(test, assert_instr(vpaddq))]
694pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
695    let add = _mm_add_epi64(a, b).as_i64x2();
696    transmute(simd_select_bitmask(k, add, src.as_i64x2()))
697}
698
699/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
700///
701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
702#[inline]
703#[target_feature(enable = "avx512f,avx512vl")]
704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
705#[cfg_attr(test, assert_instr(vpaddq))]
706pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
707    let add = _mm_add_epi64(a, b).as_i64x2();
708    transmute(simd_select_bitmask(k, add, i64x2::ZERO))
709}
710
711/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
714#[inline]
715#[target_feature(enable = "avx512f")]
716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
717#[cfg_attr(test, assert_instr(vaddps))]
718pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
719    transmute(simd_add(a.as_f32x16(), b.as_f32x16()))
720}
721
722/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
723///
724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
725#[inline]
726#[target_feature(enable = "avx512f")]
727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
728#[cfg_attr(test, assert_instr(vaddps))]
729pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
730    let add = _mm512_add_ps(a, b).as_f32x16();
731    transmute(simd_select_bitmask(k, add, src.as_f32x16()))
732}
733
734/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
735///
736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
737#[inline]
738#[target_feature(enable = "avx512f")]
739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
740#[cfg_attr(test, assert_instr(vaddps))]
741pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
742    let add = _mm512_add_ps(a, b).as_f32x16();
743    transmute(simd_select_bitmask(k, add, f32x16::ZERO))
744}
745
746/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
747///
748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
749#[inline]
750#[target_feature(enable = "avx512f,avx512vl")]
751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
752#[cfg_attr(test, assert_instr(vaddps))]
753pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
754    let add = _mm256_add_ps(a, b).as_f32x8();
755    transmute(simd_select_bitmask(k, add, src.as_f32x8()))
756}
757
758/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
759///
760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
761#[inline]
762#[target_feature(enable = "avx512f,avx512vl")]
763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
764#[cfg_attr(test, assert_instr(vaddps))]
765pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
766    let add = _mm256_add_ps(a, b).as_f32x8();
767    transmute(simd_select_bitmask(k, add, f32x8::ZERO))
768}
769
770/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
771///
772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
773#[inline]
774#[target_feature(enable = "avx512f,avx512vl")]
775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
776#[cfg_attr(test, assert_instr(vaddps))]
777pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
778    let add = _mm_add_ps(a, b).as_f32x4();
779    transmute(simd_select_bitmask(k, add, src.as_f32x4()))
780}
781
782/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
783///
784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
785#[inline]
786#[target_feature(enable = "avx512f,avx512vl")]
787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
788#[cfg_attr(test, assert_instr(vaddps))]
789pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
790    let add = _mm_add_ps(a, b).as_f32x4();
791    transmute(simd_select_bitmask(k, add, f32x4::ZERO))
792}
793
794/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
795///
796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
797#[inline]
798#[target_feature(enable = "avx512f")]
799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
800#[cfg_attr(test, assert_instr(vaddpd))]
801pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
802    transmute(simd_add(a.as_f64x8(), b.as_f64x8()))
803}
804
805/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
806///
807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
808#[inline]
809#[target_feature(enable = "avx512f")]
810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
811#[cfg_attr(test, assert_instr(vaddpd))]
812pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
813    let add = _mm512_add_pd(a, b).as_f64x8();
814    transmute(simd_select_bitmask(k, add, src.as_f64x8()))
815}
816
817/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
818///
819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
820#[inline]
821#[target_feature(enable = "avx512f")]
822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
823#[cfg_attr(test, assert_instr(vaddpd))]
824pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
825    let add = _mm512_add_pd(a, b).as_f64x8();
826    transmute(simd_select_bitmask(k, add, f64x8::ZERO))
827}
828
829/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
830///
831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
832#[inline]
833#[target_feature(enable = "avx512f,avx512vl")]
834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
835#[cfg_attr(test, assert_instr(vaddpd))]
836pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
837    let add = _mm256_add_pd(a, b).as_f64x4();
838    transmute(simd_select_bitmask(k, add, src.as_f64x4()))
839}
840
841/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
842///
843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
844#[inline]
845#[target_feature(enable = "avx512f,avx512vl")]
846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
847#[cfg_attr(test, assert_instr(vaddpd))]
848pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
849    let add = _mm256_add_pd(a, b).as_f64x4();
850    transmute(simd_select_bitmask(k, add, f64x4::ZERO))
851}
852
853/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
854///
855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
856#[inline]
857#[target_feature(enable = "avx512f,avx512vl")]
858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
859#[cfg_attr(test, assert_instr(vaddpd))]
860pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
861    let add = _mm_add_pd(a, b).as_f64x2();
862    transmute(simd_select_bitmask(k, add, src.as_f64x2()))
863}
864
865/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
866///
867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
868#[inline]
869#[target_feature(enable = "avx512f,avx512vl")]
870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871#[cfg_attr(test, assert_instr(vaddpd))]
872pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
873    let add = _mm_add_pd(a, b).as_f64x2();
874    transmute(simd_select_bitmask(k, add, f64x2::ZERO))
875}
876
877/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
878///
879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
880#[inline]
881#[target_feature(enable = "avx512f")]
882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
883#[cfg_attr(test, assert_instr(vpsubd))]
884pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
885    transmute(simd_sub(a.as_i32x16(), b.as_i32x16()))
886}
887
888/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
889///
890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
891#[inline]
892#[target_feature(enable = "avx512f")]
893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
894#[cfg_attr(test, assert_instr(vpsubd))]
895pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
896    let sub = _mm512_sub_epi32(a, b).as_i32x16();
897    transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
898}
899
900/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
903#[inline]
904#[target_feature(enable = "avx512f")]
905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
906#[cfg_attr(test, assert_instr(vpsubd))]
907pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
908    let sub = _mm512_sub_epi32(a, b).as_i32x16();
909    transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
910}
911
912/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
915#[inline]
916#[target_feature(enable = "avx512f,avx512vl")]
917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
918#[cfg_attr(test, assert_instr(vpsubd))]
919pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
920    let sub = _mm256_sub_epi32(a, b).as_i32x8();
921    transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
922}
923
924/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
925///
926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
927#[inline]
928#[target_feature(enable = "avx512f,avx512vl")]
929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
930#[cfg_attr(test, assert_instr(vpsubd))]
931pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
932    let sub = _mm256_sub_epi32(a, b).as_i32x8();
933    transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
934}
935
936/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
937///
938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
939#[inline]
940#[target_feature(enable = "avx512f,avx512vl")]
941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
942#[cfg_attr(test, assert_instr(vpsubd))]
943pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
944    let sub = _mm_sub_epi32(a, b).as_i32x4();
945    transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
946}
947
948/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
951#[inline]
952#[target_feature(enable = "avx512f,avx512vl")]
953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
954#[cfg_attr(test, assert_instr(vpsubd))]
955pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
956    let sub = _mm_sub_epi32(a, b).as_i32x4();
957    transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
958}
959
960/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
963#[inline]
964#[target_feature(enable = "avx512f")]
965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
966#[cfg_attr(test, assert_instr(vpsubq))]
967pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
968    transmute(simd_sub(a.as_i64x8(), b.as_i64x8()))
969}
970
971/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
972///
973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
974#[inline]
975#[target_feature(enable = "avx512f")]
976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
977#[cfg_attr(test, assert_instr(vpsubq))]
978pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
979    let sub = _mm512_sub_epi64(a, b).as_i64x8();
980    transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
981}
982
983/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
984///
985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
986#[inline]
987#[target_feature(enable = "avx512f")]
988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
989#[cfg_attr(test, assert_instr(vpsubq))]
990pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
991    let sub = _mm512_sub_epi64(a, b).as_i64x8();
992    transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
993}
994
995/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
996///
997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
998#[inline]
999#[target_feature(enable = "avx512f,avx512vl")]
1000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1001#[cfg_attr(test, assert_instr(vpsubq))]
1002pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1003    let sub = _mm256_sub_epi64(a, b).as_i64x4();
1004    transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1005}
1006
1007/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1008///
1009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1010#[inline]
1011#[target_feature(enable = "avx512f,avx512vl")]
1012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1013#[cfg_attr(test, assert_instr(vpsubq))]
1014pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1015    let sub = _mm256_sub_epi64(a, b).as_i64x4();
1016    transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1017}
1018
1019/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1020///
1021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1022#[inline]
1023#[target_feature(enable = "avx512f,avx512vl")]
1024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1025#[cfg_attr(test, assert_instr(vpsubq))]
1026pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1027    let sub = _mm_sub_epi64(a, b).as_i64x2();
1028    transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1029}
1030
1031/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1032///
1033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1034#[inline]
1035#[target_feature(enable = "avx512f,avx512vl")]
1036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1037#[cfg_attr(test, assert_instr(vpsubq))]
1038pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1039    let sub = _mm_sub_epi64(a, b).as_i64x2();
1040    transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1041}
1042
1043/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1044///
1045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1046#[inline]
1047#[target_feature(enable = "avx512f")]
1048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1049#[cfg_attr(test, assert_instr(vsubps))]
1050pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1051    transmute(simd_sub(a.as_f32x16(), b.as_f32x16()))
1052}
1053
1054/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1055///
1056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1057#[inline]
1058#[target_feature(enable = "avx512f")]
1059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1060#[cfg_attr(test, assert_instr(vsubps))]
1061pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1062    let sub = _mm512_sub_ps(a, b).as_f32x16();
1063    transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1064}
1065
1066/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1067///
1068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1069#[inline]
1070#[target_feature(enable = "avx512f")]
1071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1072#[cfg_attr(test, assert_instr(vsubps))]
1073pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1074    let sub = _mm512_sub_ps(a, b).as_f32x16();
1075    transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1076}
1077
1078/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1079///
1080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1081#[inline]
1082#[target_feature(enable = "avx512f,avx512vl")]
1083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1084#[cfg_attr(test, assert_instr(vsubps))]
1085pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1086    let sub = _mm256_sub_ps(a, b).as_f32x8();
1087    transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1088}
1089
1090/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1091///
1092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1093#[inline]
1094#[target_feature(enable = "avx512f,avx512vl")]
1095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1096#[cfg_attr(test, assert_instr(vsubps))]
1097pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1098    let sub = _mm256_sub_ps(a, b).as_f32x8();
1099    transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1100}
1101
1102/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1103///
1104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1105#[inline]
1106#[target_feature(enable = "avx512f,avx512vl")]
1107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1108#[cfg_attr(test, assert_instr(vsubps))]
1109pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1110    let sub = _mm_sub_ps(a, b).as_f32x4();
1111    transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1112}
1113
1114/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1115///
1116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1117#[inline]
1118#[target_feature(enable = "avx512f,avx512vl")]
1119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1120#[cfg_attr(test, assert_instr(vsubps))]
1121pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1122    let sub = _mm_sub_ps(a, b).as_f32x4();
1123    transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1124}
1125
1126/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1127///
1128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1129#[inline]
1130#[target_feature(enable = "avx512f")]
1131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1132#[cfg_attr(test, assert_instr(vsubpd))]
1133pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1134    transmute(simd_sub(a.as_f64x8(), b.as_f64x8()))
1135}
1136
1137/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1138///
1139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1140#[inline]
1141#[target_feature(enable = "avx512f")]
1142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1143#[cfg_attr(test, assert_instr(vsubpd))]
1144pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1145    let sub = _mm512_sub_pd(a, b).as_f64x8();
1146    transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1147}
1148
1149/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1150///
1151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1152#[inline]
1153#[target_feature(enable = "avx512f")]
1154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1155#[cfg_attr(test, assert_instr(vsubpd))]
1156pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1157    let sub = _mm512_sub_pd(a, b).as_f64x8();
1158    transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1159}
1160
1161/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1162///
1163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1164#[inline]
1165#[target_feature(enable = "avx512f,avx512vl")]
1166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1167#[cfg_attr(test, assert_instr(vsubpd))]
1168pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1169    let sub = _mm256_sub_pd(a, b).as_f64x4();
1170    transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1171}
1172
1173/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1174///
1175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1176#[inline]
1177#[target_feature(enable = "avx512f,avx512vl")]
1178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1179#[cfg_attr(test, assert_instr(vsubpd))]
1180pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1181    let sub = _mm256_sub_pd(a, b).as_f64x4();
1182    transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1183}
1184
1185/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1186///
1187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1188#[inline]
1189#[target_feature(enable = "avx512f,avx512vl")]
1190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1191#[cfg_attr(test, assert_instr(vsubpd))]
1192pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1193    let sub = _mm_sub_pd(a, b).as_f64x2();
1194    transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1195}
1196
1197/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1198///
1199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1200#[inline]
1201#[target_feature(enable = "avx512f,avx512vl")]
1202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1203#[cfg_attr(test, assert_instr(vsubpd))]
1204pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1205    let sub = _mm_sub_pd(a, b).as_f64x2();
1206    transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1207}
1208
1209/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1210///
1211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1212#[inline]
1213#[target_feature(enable = "avx512f")]
1214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1215#[cfg_attr(test, assert_instr(vpmuldq))]
1216pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1217    let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1218    let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1219    transmute(simd_mul(a, b))
1220}
1221
1222/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1223///
1224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1225#[inline]
1226#[target_feature(enable = "avx512f")]
1227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1228#[cfg_attr(test, assert_instr(vpmuldq))]
1229pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1230    let mul = _mm512_mul_epi32(a, b).as_i64x8();
1231    transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1232}
1233
1234/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1237#[inline]
1238#[target_feature(enable = "avx512f")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vpmuldq))]
1241pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1242    let mul = _mm512_mul_epi32(a, b).as_i64x8();
1243    transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1244}
1245
1246/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1247///
1248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1249#[inline]
1250#[target_feature(enable = "avx512f,avx512vl")]
1251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1252#[cfg_attr(test, assert_instr(vpmuldq))]
1253pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1254    let mul = _mm256_mul_epi32(a, b).as_i64x4();
1255    transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1256}
1257
1258/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1259///
1260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1261#[inline]
1262#[target_feature(enable = "avx512f,avx512vl")]
1263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1264#[cfg_attr(test, assert_instr(vpmuldq))]
1265pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1266    let mul = _mm256_mul_epi32(a, b).as_i64x4();
1267    transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1268}
1269
1270/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1271///
1272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1273#[inline]
1274#[target_feature(enable = "avx512f,avx512vl")]
1275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1276#[cfg_attr(test, assert_instr(vpmuldq))]
1277pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1278    let mul = _mm_mul_epi32(a, b).as_i64x2();
1279    transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1280}
1281
1282/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1283///
1284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1285#[inline]
1286#[target_feature(enable = "avx512f,avx512vl")]
1287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1288#[cfg_attr(test, assert_instr(vpmuldq))]
1289pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1290    let mul = _mm_mul_epi32(a, b).as_i64x2();
1291    transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1292}
1293
1294/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1295///
1296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1297#[inline]
1298#[target_feature(enable = "avx512f")]
1299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1300#[cfg_attr(test, assert_instr(vpmulld))]
1301pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1302    transmute(simd_mul(a.as_i32x16(), b.as_i32x16()))
1303}
1304
1305/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1306///
1307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1308#[inline]
1309#[target_feature(enable = "avx512f")]
1310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1311#[cfg_attr(test, assert_instr(vpmulld))]
1312pub unsafe fn _mm512_mask_mullo_epi32(
1313    src: __m512i,
1314    k: __mmask16,
1315    a: __m512i,
1316    b: __m512i,
1317) -> __m512i {
1318    let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1319    transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1320}
1321
1322/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1323///
1324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1325#[inline]
1326#[target_feature(enable = "avx512f")]
1327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1328#[cfg_attr(test, assert_instr(vpmulld))]
1329pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1330    let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1331    transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1332}
1333
1334/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1335///
1336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1337#[inline]
1338#[target_feature(enable = "avx512f,avx512vl")]
1339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1340#[cfg_attr(test, assert_instr(vpmulld))]
1341pub unsafe fn _mm256_mask_mullo_epi32(
1342    src: __m256i,
1343    k: __mmask8,
1344    a: __m256i,
1345    b: __m256i,
1346) -> __m256i {
1347    let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1348    transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1349}
1350
1351/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1352///
1353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1354#[inline]
1355#[target_feature(enable = "avx512f,avx512vl")]
1356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1357#[cfg_attr(test, assert_instr(vpmulld))]
1358pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1359    let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1360    transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1361}
1362
1363/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1364///
1365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1366#[inline]
1367#[target_feature(enable = "avx512f,avx512vl")]
1368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1369#[cfg_attr(test, assert_instr(vpmulld))]
1370pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1371    let mul = _mm_mullo_epi32(a, b).as_i32x4();
1372    transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1373}
1374
1375/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1376///
1377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1378#[inline]
1379#[target_feature(enable = "avx512f,avx512vl")]
1380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1381#[cfg_attr(test, assert_instr(vpmulld))]
1382pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1383    let mul = _mm_mullo_epi32(a, b).as_i32x4();
1384    transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1385}
1386
1387/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1388///
1389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1390///
1391/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1392#[inline]
1393#[target_feature(enable = "avx512f")]
1394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1395pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1396    transmute(simd_mul(a.as_i64x8(), b.as_i64x8()))
1397}
1398
1399/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1400///
1401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1402///
1403/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1404#[inline]
1405#[target_feature(enable = "avx512f")]
1406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1407pub unsafe fn _mm512_mask_mullox_epi64(
1408    src: __m512i,
1409    k: __mmask8,
1410    a: __m512i,
1411    b: __m512i,
1412) -> __m512i {
1413    let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1414    transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1415}
1416
1417/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1418///
1419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1420#[inline]
1421#[target_feature(enable = "avx512f")]
1422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1423#[cfg_attr(test, assert_instr(vpmuludq))]
1424pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1425    let a = a.as_u64x8();
1426    let b = b.as_u64x8();
1427    let mask = u64x8::splat(u32::MAX.into());
1428    transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1429}
1430
1431/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1432///
1433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1434#[inline]
1435#[target_feature(enable = "avx512f")]
1436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1437#[cfg_attr(test, assert_instr(vpmuludq))]
1438pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1439    let mul = _mm512_mul_epu32(a, b).as_u64x8();
1440    transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1441}
1442
1443/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1444///
1445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1446#[inline]
1447#[target_feature(enable = "avx512f")]
1448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1449#[cfg_attr(test, assert_instr(vpmuludq))]
1450pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1451    let mul = _mm512_mul_epu32(a, b).as_u64x8();
1452    transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1453}
1454
1455/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1458#[inline]
1459#[target_feature(enable = "avx512f,avx512vl")]
1460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1461#[cfg_attr(test, assert_instr(vpmuludq))]
1462pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1463    let mul = _mm256_mul_epu32(a, b).as_u64x4();
1464    transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1465}
1466
1467/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1468///
1469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1470#[inline]
1471#[target_feature(enable = "avx512f,avx512vl")]
1472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1473#[cfg_attr(test, assert_instr(vpmuludq))]
1474pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1475    let mul = _mm256_mul_epu32(a, b).as_u64x4();
1476    transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1477}
1478
1479/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1480///
1481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1482#[inline]
1483#[target_feature(enable = "avx512f,avx512vl")]
1484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1485#[cfg_attr(test, assert_instr(vpmuludq))]
1486pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1487    let mul = _mm_mul_epu32(a, b).as_u64x2();
1488    transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1489}
1490
1491/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1492///
1493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1494#[inline]
1495#[target_feature(enable = "avx512f,avx512vl")]
1496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1497#[cfg_attr(test, assert_instr(vpmuludq))]
1498pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1499    let mul = _mm_mul_epu32(a, b).as_u64x2();
1500    transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1501}
1502
1503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1504///
1505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1506#[inline]
1507#[target_feature(enable = "avx512f")]
1508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1509#[cfg_attr(test, assert_instr(vmulps))]
1510pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1511    transmute(simd_mul(a.as_f32x16(), b.as_f32x16()))
1512}
1513
1514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1515///
1516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1517#[inline]
1518#[target_feature(enable = "avx512f")]
1519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1520#[cfg_attr(test, assert_instr(vmulps))]
1521pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1522    let mul = _mm512_mul_ps(a, b).as_f32x16();
1523    transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1524}
1525
1526/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1527///
1528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1529#[inline]
1530#[target_feature(enable = "avx512f")]
1531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1532#[cfg_attr(test, assert_instr(vmulps))]
1533pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1534    let mul = _mm512_mul_ps(a, b).as_f32x16();
1535    transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1536}
1537
1538/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1539///
1540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1541#[inline]
1542#[target_feature(enable = "avx512f,avx512vl")]
1543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1544#[cfg_attr(test, assert_instr(vmulps))]
1545pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1546    let mul = _mm256_mul_ps(a, b).as_f32x8();
1547    transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1548}
1549
1550/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1551///
1552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1553#[inline]
1554#[target_feature(enable = "avx512f,avx512vl")]
1555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1556#[cfg_attr(test, assert_instr(vmulps))]
1557pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1558    let mul = _mm256_mul_ps(a, b).as_f32x8();
1559    transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1560}
1561
1562/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1563///
1564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1565#[inline]
1566#[target_feature(enable = "avx512f,avx512vl")]
1567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1568#[cfg_attr(test, assert_instr(vmulps))]
1569pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1570    let mul = _mm_mul_ps(a, b).as_f32x4();
1571    transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1572}
1573
1574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1575///
1576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1577#[inline]
1578#[target_feature(enable = "avx512f,avx512vl")]
1579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1580#[cfg_attr(test, assert_instr(vmulps))]
1581pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1582    let mul = _mm_mul_ps(a, b).as_f32x4();
1583    transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1584}
1585
1586/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1587///
1588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1589#[inline]
1590#[target_feature(enable = "avx512f")]
1591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1592#[cfg_attr(test, assert_instr(vmulpd))]
1593pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1594    transmute(simd_mul(a.as_f64x8(), b.as_f64x8()))
1595}
1596
1597/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1598///
1599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1600#[inline]
1601#[target_feature(enable = "avx512f")]
1602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1603#[cfg_attr(test, assert_instr(vmulpd))]
1604pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1605    let mul = _mm512_mul_pd(a, b).as_f64x8();
1606    transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1607}
1608
1609/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1610///
1611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1612#[inline]
1613#[target_feature(enable = "avx512f")]
1614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1615#[cfg_attr(test, assert_instr(vmulpd))]
1616pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1617    let mul = _mm512_mul_pd(a, b).as_f64x8();
1618    transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1619}
1620
1621/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1622///
1623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1624#[inline]
1625#[target_feature(enable = "avx512f,avx512vl")]
1626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1627#[cfg_attr(test, assert_instr(vmulpd))]
1628pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1629    let mul = _mm256_mul_pd(a, b).as_f64x4();
1630    transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1631}
1632
1633/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1634///
1635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1636#[inline]
1637#[target_feature(enable = "avx512f,avx512vl")]
1638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1639#[cfg_attr(test, assert_instr(vmulpd))]
1640pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1641    let mul = _mm256_mul_pd(a, b).as_f64x4();
1642    transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
1643}
1644
1645/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1646///
1647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1648#[inline]
1649#[target_feature(enable = "avx512f,avx512vl")]
1650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1651#[cfg_attr(test, assert_instr(vmulpd))]
1652pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1653    let mul = _mm_mul_pd(a, b).as_f64x2();
1654    transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
1655}
1656
1657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1658///
1659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1660#[inline]
1661#[target_feature(enable = "avx512f,avx512vl")]
1662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1663#[cfg_attr(test, assert_instr(vmulpd))]
1664pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1665    let mul = _mm_mul_pd(a, b).as_f64x2();
1666    transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
1667}
1668
1669/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1670///
1671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1672#[inline]
1673#[target_feature(enable = "avx512f")]
1674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1675#[cfg_attr(test, assert_instr(vdivps))]
1676pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1677    transmute(simd_div(a.as_f32x16(), b.as_f32x16()))
1678}
1679
1680/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1683#[inline]
1684#[target_feature(enable = "avx512f")]
1685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1686#[cfg_attr(test, assert_instr(vdivps))]
1687pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1688    let div = _mm512_div_ps(a, b).as_f32x16();
1689    transmute(simd_select_bitmask(k, div, src.as_f32x16()))
1690}
1691
1692/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1695#[inline]
1696#[target_feature(enable = "avx512f")]
1697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698#[cfg_attr(test, assert_instr(vdivps))]
1699pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1700    let div = _mm512_div_ps(a, b).as_f32x16();
1701    transmute(simd_select_bitmask(k, div, f32x16::ZERO))
1702}
1703
1704/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1705///
1706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1707#[inline]
1708#[target_feature(enable = "avx512f,avx512vl")]
1709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1710#[cfg_attr(test, assert_instr(vdivps))]
1711pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1712    let div = _mm256_div_ps(a, b).as_f32x8();
1713    transmute(simd_select_bitmask(k, div, src.as_f32x8()))
1714}
1715
1716/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1717///
1718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1719#[inline]
1720#[target_feature(enable = "avx512f,avx512vl")]
1721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1722#[cfg_attr(test, assert_instr(vdivps))]
1723pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1724    let div = _mm256_div_ps(a, b).as_f32x8();
1725    transmute(simd_select_bitmask(k, div, f32x8::ZERO))
1726}
1727
1728/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1729///
1730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1731#[inline]
1732#[target_feature(enable = "avx512f,avx512vl")]
1733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1734#[cfg_attr(test, assert_instr(vdivps))]
1735pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1736    let div = _mm_div_ps(a, b).as_f32x4();
1737    transmute(simd_select_bitmask(k, div, src.as_f32x4()))
1738}
1739
1740/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1741///
1742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1743#[inline]
1744#[target_feature(enable = "avx512f,avx512vl")]
1745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1746#[cfg_attr(test, assert_instr(vdivps))]
1747pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1748    let div = _mm_div_ps(a, b).as_f32x4();
1749    transmute(simd_select_bitmask(k, div, f32x4::ZERO))
1750}
1751
1752/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1753///
1754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1755#[inline]
1756#[target_feature(enable = "avx512f")]
1757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1758#[cfg_attr(test, assert_instr(vdivpd))]
1759pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1760    transmute(simd_div(a.as_f64x8(), b.as_f64x8()))
1761}
1762
1763/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1764///
1765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
1766#[inline]
1767#[target_feature(enable = "avx512f")]
1768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1769#[cfg_attr(test, assert_instr(vdivpd))]
1770pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1771    let div = _mm512_div_pd(a, b).as_f64x8();
1772    transmute(simd_select_bitmask(k, div, src.as_f64x8()))
1773}
1774
1775/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
1778#[inline]
1779#[target_feature(enable = "avx512f")]
1780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1781#[cfg_attr(test, assert_instr(vdivpd))]
1782pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1783    let div = _mm512_div_pd(a, b).as_f64x8();
1784    transmute(simd_select_bitmask(k, div, f64x8::ZERO))
1785}
1786
1787/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vdivpd))]
1794pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1795    let div = _mm256_div_pd(a, b).as_f64x4();
1796    transmute(simd_select_bitmask(k, div, src.as_f64x4()))
1797}
1798
1799/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1800///
1801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
1802#[inline]
1803#[target_feature(enable = "avx512f,avx512vl")]
1804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1805#[cfg_attr(test, assert_instr(vdivpd))]
1806pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1807    let div = _mm256_div_pd(a, b).as_f64x4();
1808    transmute(simd_select_bitmask(k, div, f64x4::ZERO))
1809}
1810
1811/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1812///
1813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
1814#[inline]
1815#[target_feature(enable = "avx512f,avx512vl")]
1816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1817#[cfg_attr(test, assert_instr(vdivpd))]
1818pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1819    let div = _mm_div_pd(a, b).as_f64x2();
1820    transmute(simd_select_bitmask(k, div, src.as_f64x2()))
1821}
1822
1823/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1824///
1825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
1826#[inline]
1827#[target_feature(enable = "avx512f,avx512vl")]
1828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1829#[cfg_attr(test, assert_instr(vdivpd))]
1830pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1831    let div = _mm_div_pd(a, b).as_f64x2();
1832    transmute(simd_select_bitmask(k, div, f64x2::ZERO))
1833}
1834
1835/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
1836///
1837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
1838#[inline]
1839#[target_feature(enable = "avx512f")]
1840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1841#[cfg_attr(test, assert_instr(vpmaxsd))]
1842pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
1843    let a = a.as_i32x16();
1844    let b = b.as_i32x16();
1845    transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
1846}
1847
1848/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1849///
1850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
1851#[inline]
1852#[target_feature(enable = "avx512f")]
1853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1854#[cfg_attr(test, assert_instr(vpmaxsd))]
1855pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1856    let max = _mm512_max_epi32(a, b).as_i32x16();
1857    transmute(simd_select_bitmask(k, max, src.as_i32x16()))
1858}
1859
1860/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1861///
1862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
1863#[inline]
1864#[target_feature(enable = "avx512f")]
1865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1866#[cfg_attr(test, assert_instr(vpmaxsd))]
1867pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1868    let max = _mm512_max_epi32(a, b).as_i32x16();
1869    transmute(simd_select_bitmask(k, max, i32x16::ZERO))
1870}
1871
1872/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1873///
1874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
1875#[inline]
1876#[target_feature(enable = "avx512f,avx512vl")]
1877#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1878#[cfg_attr(test, assert_instr(vpmaxsd))]
1879pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1880    let max = _mm256_max_epi32(a, b).as_i32x8();
1881    transmute(simd_select_bitmask(k, max, src.as_i32x8()))
1882}
1883
1884/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1885///
1886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
1887#[inline]
1888#[target_feature(enable = "avx512f,avx512vl")]
1889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1890#[cfg_attr(test, assert_instr(vpmaxsd))]
1891pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1892    let max = _mm256_max_epi32(a, b).as_i32x8();
1893    transmute(simd_select_bitmask(k, max, i32x8::ZERO))
1894}
1895
1896/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
1899#[inline]
1900#[target_feature(enable = "avx512f,avx512vl")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vpmaxsd))]
1903pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1904    let max = _mm_max_epi32(a, b).as_i32x4();
1905    transmute(simd_select_bitmask(k, max, src.as_i32x4()))
1906}
1907
1908/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
1911#[inline]
1912#[target_feature(enable = "avx512f,avx512vl")]
1913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1914#[cfg_attr(test, assert_instr(vpmaxsd))]
1915pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1916    let max = _mm_max_epi32(a, b).as_i32x4();
1917    transmute(simd_select_bitmask(k, max, i32x4::ZERO))
1918}
1919
1920/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
1921///
1922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
1923#[inline]
1924#[target_feature(enable = "avx512f")]
1925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1926#[cfg_attr(test, assert_instr(vpmaxsq))]
1927pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
1928    let a = a.as_i64x8();
1929    let b = b.as_i64x8();
1930    transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
1931}
1932
1933/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1934///
1935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
1936#[inline]
1937#[target_feature(enable = "avx512f")]
1938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1939#[cfg_attr(test, assert_instr(vpmaxsq))]
1940pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1941    let max = _mm512_max_epi64(a, b).as_i64x8();
1942    transmute(simd_select_bitmask(k, max, src.as_i64x8()))
1943}
1944
1945/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1946///
1947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
1948#[inline]
1949#[target_feature(enable = "avx512f")]
1950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1951#[cfg_attr(test, assert_instr(vpmaxsq))]
1952pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1953    let max = _mm512_max_epi64(a, b).as_i64x8();
1954    transmute(simd_select_bitmask(k, max, i64x8::ZERO))
1955}
1956
1957/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
1958///
1959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
1960#[inline]
1961#[target_feature(enable = "avx512f,avx512vl")]
1962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1963#[cfg_attr(test, assert_instr(vpmaxsq))]
1964pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
1965    let a = a.as_i64x4();
1966    let b = b.as_i64x4();
1967    transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
1968}
1969
1970/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1971///
1972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
1973#[inline]
1974#[target_feature(enable = "avx512f,avx512vl")]
1975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1976#[cfg_attr(test, assert_instr(vpmaxsq))]
1977pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1978    let max = _mm256_max_epi64(a, b).as_i64x4();
1979    transmute(simd_select_bitmask(k, max, src.as_i64x4()))
1980}
1981
1982/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1983///
1984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
1985#[inline]
1986#[target_feature(enable = "avx512f,avx512vl")]
1987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1988#[cfg_attr(test, assert_instr(vpmaxsq))]
1989pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1990    let max = _mm256_max_epi64(a, b).as_i64x4();
1991    transmute(simd_select_bitmask(k, max, i64x4::ZERO))
1992}
1993
1994/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
1995///
1996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
1997#[inline]
1998#[target_feature(enable = "avx512f,avx512vl")]
1999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2000#[cfg_attr(test, assert_instr(vpmaxsq))]
2001pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2002    let a = a.as_i64x2();
2003    let b = b.as_i64x2();
2004    transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2005}
2006
2007/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2008///
2009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2010#[inline]
2011#[target_feature(enable = "avx512f,avx512vl")]
2012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2013#[cfg_attr(test, assert_instr(vpmaxsq))]
2014pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2015    let max = _mm_max_epi64(a, b).as_i64x2();
2016    transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2017}
2018
2019/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2020///
2021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2022#[inline]
2023#[target_feature(enable = "avx512f,avx512vl")]
2024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2025#[cfg_attr(test, assert_instr(vpmaxsq))]
2026pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2027    let max = _mm_max_epi64(a, b).as_i64x2();
2028    transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2029}
2030
2031/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2032///
2033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2034#[inline]
2035#[target_feature(enable = "avx512f")]
2036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2037#[cfg_attr(test, assert_instr(vmaxps))]
2038pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2039    transmute(vmaxps(
2040        a.as_f32x16(),
2041        b.as_f32x16(),
2042        _MM_FROUND_CUR_DIRECTION,
2043    ))
2044}
2045
2046/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2049#[inline]
2050#[target_feature(enable = "avx512f")]
2051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2052#[cfg_attr(test, assert_instr(vmaxps))]
2053pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2054    let max = _mm512_max_ps(a, b).as_f32x16();
2055    transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2056}
2057
2058/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2061#[inline]
2062#[target_feature(enable = "avx512f")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vmaxps))]
2065pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2066    let max = _mm512_max_ps(a, b).as_f32x16();
2067    transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2068}
2069
2070/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2073#[inline]
2074#[target_feature(enable = "avx512f,avx512vl")]
2075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2076#[cfg_attr(test, assert_instr(vmaxps))]
2077pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2078    let max = _mm256_max_ps(a, b).as_f32x8();
2079    transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2080}
2081
2082/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2083///
2084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2085#[inline]
2086#[target_feature(enable = "avx512f,avx512vl")]
2087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2088#[cfg_attr(test, assert_instr(vmaxps))]
2089pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2090    let max = _mm256_max_ps(a, b).as_f32x8();
2091    transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2092}
2093
2094/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2095///
2096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2097#[inline]
2098#[target_feature(enable = "avx512f,avx512vl")]
2099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2100#[cfg_attr(test, assert_instr(vmaxps))]
2101pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2102    let max = _mm_max_ps(a, b).as_f32x4();
2103    transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2104}
2105
2106/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2109#[inline]
2110#[target_feature(enable = "avx512f,avx512vl")]
2111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2112#[cfg_attr(test, assert_instr(vmaxps))]
2113pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2114    let max = _mm_max_ps(a, b).as_f32x4();
2115    transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2116}
2117
2118/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2119///
2120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2121#[inline]
2122#[target_feature(enable = "avx512f")]
2123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2124#[cfg_attr(test, assert_instr(vmaxpd))]
2125pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2126    transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
2127}
2128
2129/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2132#[inline]
2133#[target_feature(enable = "avx512f")]
2134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135#[cfg_attr(test, assert_instr(vmaxpd))]
2136pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2137    let max = _mm512_max_pd(a, b).as_f64x8();
2138    transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2139}
2140
2141/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2142///
2143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2144#[inline]
2145#[target_feature(enable = "avx512f")]
2146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2147#[cfg_attr(test, assert_instr(vmaxpd))]
2148pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2149    let max = _mm512_max_pd(a, b).as_f64x8();
2150    transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2151}
2152
2153/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2154///
2155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2156#[inline]
2157#[target_feature(enable = "avx512f,avx512vl")]
2158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2159#[cfg_attr(test, assert_instr(vmaxpd))]
2160pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2161    let max = _mm256_max_pd(a, b).as_f64x4();
2162    transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2163}
2164
2165/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2166///
2167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2168#[inline]
2169#[target_feature(enable = "avx512f,avx512vl")]
2170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2171#[cfg_attr(test, assert_instr(vmaxpd))]
2172pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2173    let max = _mm256_max_pd(a, b).as_f64x4();
2174    transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2175}
2176
2177/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2178///
2179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2180#[inline]
2181#[target_feature(enable = "avx512f,avx512vl")]
2182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2183#[cfg_attr(test, assert_instr(vmaxpd))]
2184pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2185    let max = _mm_max_pd(a, b).as_f64x2();
2186    transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2187}
2188
2189/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2190///
2191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2192#[inline]
2193#[target_feature(enable = "avx512f,avx512vl")]
2194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2195#[cfg_attr(test, assert_instr(vmaxpd))]
2196pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2197    let max = _mm_max_pd(a, b).as_f64x2();
2198    transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2199}
2200
2201/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2202///
2203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2204#[inline]
2205#[target_feature(enable = "avx512f")]
2206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2207#[cfg_attr(test, assert_instr(vpmaxud))]
2208pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2209    let a = a.as_u32x16();
2210    let b = b.as_u32x16();
2211    transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
2212}
2213
2214/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220#[cfg_attr(test, assert_instr(vpmaxud))]
2221pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2222    let max = _mm512_max_epu32(a, b).as_u32x16();
2223    transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2224}
2225
2226/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2227///
2228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2229#[inline]
2230#[target_feature(enable = "avx512f")]
2231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2232#[cfg_attr(test, assert_instr(vpmaxud))]
2233pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2234    let max = _mm512_max_epu32(a, b).as_u32x16();
2235    transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2236}
2237
2238/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2239///
2240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2241#[inline]
2242#[target_feature(enable = "avx512f,avx512vl")]
2243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2244#[cfg_attr(test, assert_instr(vpmaxud))]
2245pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2246    let max = _mm256_max_epu32(a, b).as_u32x8();
2247    transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2248}
2249
2250/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2251///
2252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2253#[inline]
2254#[target_feature(enable = "avx512f,avx512vl")]
2255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2256#[cfg_attr(test, assert_instr(vpmaxud))]
2257pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2258    let max = _mm256_max_epu32(a, b).as_u32x8();
2259    transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2260}
2261
2262/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2265#[inline]
2266#[target_feature(enable = "avx512f,avx512vl")]
2267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2268#[cfg_attr(test, assert_instr(vpmaxud))]
2269pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2270    let max = _mm_max_epu32(a, b).as_u32x4();
2271    transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2272}
2273
2274/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2275///
2276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2277#[inline]
2278#[target_feature(enable = "avx512f,avx512vl")]
2279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2280#[cfg_attr(test, assert_instr(vpmaxud))]
2281pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2282    let max = _mm_max_epu32(a, b).as_u32x4();
2283    transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2284}
2285
2286/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2289#[inline]
2290#[target_feature(enable = "avx512f")]
2291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292#[cfg_attr(test, assert_instr(vpmaxuq))]
2293pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2294    let a = a.as_u64x8();
2295    let b = b.as_u64x8();
2296    transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
2297}
2298
2299/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2300///
2301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2302#[inline]
2303#[target_feature(enable = "avx512f")]
2304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2305#[cfg_attr(test, assert_instr(vpmaxuq))]
2306pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2307    let max = _mm512_max_epu64(a, b).as_u64x8();
2308    transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2309}
2310
2311/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2312///
2313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2314#[inline]
2315#[target_feature(enable = "avx512f")]
2316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2317#[cfg_attr(test, assert_instr(vpmaxuq))]
2318pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2319    let max = _mm512_max_epu64(a, b).as_u64x8();
2320    transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2321}
2322
2323/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2324///
2325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2326#[inline]
2327#[target_feature(enable = "avx512f,avx512vl")]
2328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2329#[cfg_attr(test, assert_instr(vpmaxuq))]
2330pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2331    let a = a.as_u64x4();
2332    let b = b.as_u64x4();
2333    transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
2334}
2335
2336/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2337///
2338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2339#[inline]
2340#[target_feature(enable = "avx512f,avx512vl")]
2341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2342#[cfg_attr(test, assert_instr(vpmaxuq))]
2343pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2344    let max = _mm256_max_epu64(a, b).as_u64x4();
2345    transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2346}
2347
2348/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2349///
2350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2351#[inline]
2352#[target_feature(enable = "avx512f,avx512vl")]
2353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2354#[cfg_attr(test, assert_instr(vpmaxuq))]
2355pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2356    let max = _mm256_max_epu64(a, b).as_u64x4();
2357    transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2358}
2359
2360/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2361///
2362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2363#[inline]
2364#[target_feature(enable = "avx512f,avx512vl")]
2365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2366#[cfg_attr(test, assert_instr(vpmaxuq))]
2367pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2368    let a = a.as_u64x2();
2369    let b = b.as_u64x2();
2370    transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
2371}
2372
2373/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379#[cfg_attr(test, assert_instr(vpmaxuq))]
2380pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2381    let max = _mm_max_epu64(a, b).as_u64x2();
2382    transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2383}
2384
2385/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2386///
2387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2388#[inline]
2389#[target_feature(enable = "avx512f,avx512vl")]
2390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2391#[cfg_attr(test, assert_instr(vpmaxuq))]
2392pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2393    let max = _mm_max_epu64(a, b).as_u64x2();
2394    transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2395}
2396
2397/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2400#[inline]
2401#[target_feature(enable = "avx512f")]
2402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2403#[cfg_attr(test, assert_instr(vpminsd))]
2404pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2405    let a = a.as_i32x16();
2406    let b = b.as_i32x16();
2407    transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2408}
2409
2410/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2411///
2412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2413#[inline]
2414#[target_feature(enable = "avx512f")]
2415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2416#[cfg_attr(test, assert_instr(vpminsd))]
2417pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2418    let min = _mm512_min_epi32(a, b).as_i32x16();
2419    transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2420}
2421
2422/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2423///
2424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2425#[inline]
2426#[target_feature(enable = "avx512f")]
2427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2428#[cfg_attr(test, assert_instr(vpminsd))]
2429pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2430    let min = _mm512_min_epi32(a, b).as_i32x16();
2431    transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2432}
2433
2434/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2435///
2436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2437#[inline]
2438#[target_feature(enable = "avx512f,avx512vl")]
2439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2440#[cfg_attr(test, assert_instr(vpminsd))]
2441pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2442    let min = _mm256_min_epi32(a, b).as_i32x8();
2443    transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2444}
2445
2446/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2447///
2448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2449#[inline]
2450#[target_feature(enable = "avx512f,avx512vl")]
2451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2452#[cfg_attr(test, assert_instr(vpminsd))]
2453pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2454    let min = _mm256_min_epi32(a, b).as_i32x8();
2455    transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2456}
2457
2458/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2459///
2460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2461#[inline]
2462#[target_feature(enable = "avx512f,avx512vl")]
2463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2464#[cfg_attr(test, assert_instr(vpminsd))]
2465pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2466    let min = _mm_min_epi32(a, b).as_i32x4();
2467    transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2468}
2469
2470/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2471///
2472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2473#[inline]
2474#[target_feature(enable = "avx512f,avx512vl")]
2475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2476#[cfg_attr(test, assert_instr(vpminsd))]
2477pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2478    let min = _mm_min_epi32(a, b).as_i32x4();
2479    transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2480}
2481
2482/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2485#[inline]
2486#[target_feature(enable = "avx512f")]
2487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488#[cfg_attr(test, assert_instr(vpminsq))]
2489pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2490    let a = a.as_i64x8();
2491    let b = b.as_i64x8();
2492    transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2493}
2494
2495/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2496///
2497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2498#[inline]
2499#[target_feature(enable = "avx512f")]
2500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2501#[cfg_attr(test, assert_instr(vpminsq))]
2502pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2503    let min = _mm512_min_epi64(a, b).as_i64x8();
2504    transmute(simd_select_bitmask(k, min, src.as_i64x8()))
2505}
2506
2507/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2508///
2509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2510#[inline]
2511#[target_feature(enable = "avx512f")]
2512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2513#[cfg_attr(test, assert_instr(vpminsq))]
2514pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2515    let min = _mm512_min_epi64(a, b).as_i64x8();
2516    transmute(simd_select_bitmask(k, min, i64x8::ZERO))
2517}
2518
2519/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2520///
2521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2522#[inline]
2523#[target_feature(enable = "avx512f,avx512vl")]
2524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2525#[cfg_attr(test, assert_instr(vpminsq))]
2526pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2527    let a = a.as_i64x4();
2528    let b = b.as_i64x4();
2529    transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2530}
2531
2532/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2533///
2534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2535#[inline]
2536#[target_feature(enable = "avx512f,avx512vl")]
2537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2538#[cfg_attr(test, assert_instr(vpminsq))]
2539pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2540    let min = _mm256_min_epi64(a, b).as_i64x4();
2541    transmute(simd_select_bitmask(k, min, src.as_i64x4()))
2542}
2543
2544/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2545///
2546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2547#[inline]
2548#[target_feature(enable = "avx512f,avx512vl")]
2549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2550#[cfg_attr(test, assert_instr(vpminsq))]
2551pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2552    let min = _mm256_min_epi64(a, b).as_i64x4();
2553    transmute(simd_select_bitmask(k, min, i64x4::ZERO))
2554}
2555
2556/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2557///
2558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2559#[inline]
2560#[target_feature(enable = "avx512f,avx512vl")]
2561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2562#[cfg_attr(test, assert_instr(vpminsq))]
2563pub unsafe fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2564    let a = a.as_i64x2();
2565    let b = b.as_i64x2();
2566    transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2567}
2568
2569/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2570///
2571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2572#[inline]
2573#[target_feature(enable = "avx512f,avx512vl")]
2574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2575#[cfg_attr(test, assert_instr(vpminsq))]
2576pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2577    let min = _mm_min_epi64(a, b).as_i64x2();
2578    transmute(simd_select_bitmask(k, min, src.as_i64x2()))
2579}
2580
2581/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587#[cfg_attr(test, assert_instr(vpminsq))]
2588pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589    let min = _mm_min_epi64(a, b).as_i64x2();
2590    transmute(simd_select_bitmask(k, min, i64x2::ZERO))
2591}
2592
2593/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2594///
2595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2596#[inline]
2597#[target_feature(enable = "avx512f")]
2598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2599#[cfg_attr(test, assert_instr(vminps))]
2600pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2601    transmute(vminps(
2602        a.as_f32x16(),
2603        b.as_f32x16(),
2604        _MM_FROUND_CUR_DIRECTION,
2605    ))
2606}
2607
2608/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2609///
2610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2611#[inline]
2612#[target_feature(enable = "avx512f")]
2613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2614#[cfg_attr(test, assert_instr(vminps))]
2615pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2616    let min = _mm512_min_ps(a, b).as_f32x16();
2617    transmute(simd_select_bitmask(k, min, src.as_f32x16()))
2618}
2619
2620/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2621///
2622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
2623#[inline]
2624#[target_feature(enable = "avx512f")]
2625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2626#[cfg_attr(test, assert_instr(vminps))]
2627pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2628    let min = _mm512_min_ps(a, b).as_f32x16();
2629    transmute(simd_select_bitmask(k, min, f32x16::ZERO))
2630}
2631
2632/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2633///
2634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
2635#[inline]
2636#[target_feature(enable = "avx512f,avx512vl")]
2637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2638#[cfg_attr(test, assert_instr(vminps))]
2639pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2640    let min = _mm256_min_ps(a, b).as_f32x8();
2641    transmute(simd_select_bitmask(k, min, src.as_f32x8()))
2642}
2643
2644/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2645///
2646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
2647#[inline]
2648#[target_feature(enable = "avx512f,avx512vl")]
2649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2650#[cfg_attr(test, assert_instr(vminps))]
2651pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2652    let min = _mm256_min_ps(a, b).as_f32x8();
2653    transmute(simd_select_bitmask(k, min, f32x8::ZERO))
2654}
2655
2656/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2657///
2658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
2659#[inline]
2660#[target_feature(enable = "avx512f,avx512vl")]
2661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2662#[cfg_attr(test, assert_instr(vminps))]
2663pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2664    let min = _mm_min_ps(a, b).as_f32x4();
2665    transmute(simd_select_bitmask(k, min, src.as_f32x4()))
2666}
2667
2668/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2669///
2670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
2671#[inline]
2672#[target_feature(enable = "avx512f,avx512vl")]
2673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2674#[cfg_attr(test, assert_instr(vminps))]
2675pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2676    let min = _mm_min_ps(a, b).as_f32x4();
2677    transmute(simd_select_bitmask(k, min, f32x4::ZERO))
2678}
2679
2680/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
2681///
2682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
2683#[inline]
2684#[target_feature(enable = "avx512f")]
2685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2686#[cfg_attr(test, assert_instr(vminpd))]
2687pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
2688    transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
2689}
2690
2691/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2692///
2693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
2694#[inline]
2695#[target_feature(enable = "avx512f")]
2696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2697#[cfg_attr(test, assert_instr(vminpd))]
2698pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2699    let min = _mm512_min_pd(a, b).as_f64x8();
2700    transmute(simd_select_bitmask(k, min, src.as_f64x8()))
2701}
2702
2703/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
2706#[inline]
2707#[target_feature(enable = "avx512f")]
2708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2709#[cfg_attr(test, assert_instr(vminpd))]
2710pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2711    let min = _mm512_min_pd(a, b).as_f64x8();
2712    transmute(simd_select_bitmask(k, min, f64x8::ZERO))
2713}
2714
2715/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2716///
2717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
2718#[inline]
2719#[target_feature(enable = "avx512f,avx512vl")]
2720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2721#[cfg_attr(test, assert_instr(vminpd))]
2722pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2723    let min = _mm256_min_pd(a, b).as_f64x4();
2724    transmute(simd_select_bitmask(k, min, src.as_f64x4()))
2725}
2726
2727/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
2730#[inline]
2731#[target_feature(enable = "avx512f,avx512vl")]
2732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2733#[cfg_attr(test, assert_instr(vminpd))]
2734pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2735    let min = _mm256_min_pd(a, b).as_f64x4();
2736    transmute(simd_select_bitmask(k, min, f64x4::ZERO))
2737}
2738
2739/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
2742#[inline]
2743#[target_feature(enable = "avx512f,avx512vl")]
2744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2745#[cfg_attr(test, assert_instr(vminpd))]
2746pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2747    let min = _mm_min_pd(a, b).as_f64x2();
2748    transmute(simd_select_bitmask(k, min, src.as_f64x2()))
2749}
2750
2751/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2752///
2753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
2754#[inline]
2755#[target_feature(enable = "avx512f,avx512vl")]
2756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2757#[cfg_attr(test, assert_instr(vminpd))]
2758pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2759    let min = _mm_min_pd(a, b).as_f64x2();
2760    transmute(simd_select_bitmask(k, min, f64x2::ZERO))
2761}
2762
2763/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
2764///
2765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
2766#[inline]
2767#[target_feature(enable = "avx512f")]
2768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2769#[cfg_attr(test, assert_instr(vpminud))]
2770pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
2771    let a = a.as_u32x16();
2772    let b = b.as_u32x16();
2773    transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
2774}
2775
2776/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2777///
2778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
2779#[inline]
2780#[target_feature(enable = "avx512f")]
2781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2782#[cfg_attr(test, assert_instr(vpminud))]
2783pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2784    let min = _mm512_min_epu32(a, b).as_u32x16();
2785    transmute(simd_select_bitmask(k, min, src.as_u32x16()))
2786}
2787
2788/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2789///
2790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
2791#[inline]
2792#[target_feature(enable = "avx512f")]
2793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2794#[cfg_attr(test, assert_instr(vpminud))]
2795pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2796    let min = _mm512_min_epu32(a, b).as_u32x16();
2797    transmute(simd_select_bitmask(k, min, u32x16::ZERO))
2798}
2799
2800/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2801///
2802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
2803#[inline]
2804#[target_feature(enable = "avx512f,avx512vl")]
2805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2806#[cfg_attr(test, assert_instr(vpminud))]
2807pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2808    let min = _mm256_min_epu32(a, b).as_u32x8();
2809    transmute(simd_select_bitmask(k, min, src.as_u32x8()))
2810}
2811
2812/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2813///
2814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
2815#[inline]
2816#[target_feature(enable = "avx512f,avx512vl")]
2817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2818#[cfg_attr(test, assert_instr(vpminud))]
2819pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2820    let min = _mm256_min_epu32(a, b).as_u32x8();
2821    transmute(simd_select_bitmask(k, min, u32x8::ZERO))
2822}
2823
2824/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2825///
2826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
2827#[inline]
2828#[target_feature(enable = "avx512f,avx512vl")]
2829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2830#[cfg_attr(test, assert_instr(vpminud))]
2831pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2832    let min = _mm_min_epu32(a, b).as_u32x4();
2833    transmute(simd_select_bitmask(k, min, src.as_u32x4()))
2834}
2835
2836/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2837///
2838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
2839#[inline]
2840#[target_feature(enable = "avx512f,avx512vl")]
2841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2842#[cfg_attr(test, assert_instr(vpminud))]
2843pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2844    let min = _mm_min_epu32(a, b).as_u32x4();
2845    transmute(simd_select_bitmask(k, min, u32x4::ZERO))
2846}
2847
2848/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2849///
2850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
2851#[inline]
2852#[target_feature(enable = "avx512f")]
2853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2854#[cfg_attr(test, assert_instr(vpminuq))]
2855pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
2856    let a = a.as_u64x8();
2857    let b = b.as_u64x8();
2858    transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
2859}
2860
2861/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2862///
2863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
2864#[inline]
2865#[target_feature(enable = "avx512f")]
2866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2867#[cfg_attr(test, assert_instr(vpminuq))]
2868pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2869    let min = _mm512_min_epu64(a, b).as_u64x8();
2870    transmute(simd_select_bitmask(k, min, src.as_u64x8()))
2871}
2872
2873/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2874///
2875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
2876#[inline]
2877#[target_feature(enable = "avx512f")]
2878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2879#[cfg_attr(test, assert_instr(vpminuq))]
2880pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2881    let min = _mm512_min_epu64(a, b).as_u64x8();
2882    transmute(simd_select_bitmask(k, min, u64x8::ZERO))
2883}
2884
2885/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2886///
2887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
2888#[inline]
2889#[target_feature(enable = "avx512f,avx512vl")]
2890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2891#[cfg_attr(test, assert_instr(vpminuq))]
2892pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
2893    let a = a.as_u64x4();
2894    let b = b.as_u64x4();
2895    transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
2896}
2897
2898/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2899///
2900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
2901#[inline]
2902#[target_feature(enable = "avx512f,avx512vl")]
2903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2904#[cfg_attr(test, assert_instr(vpminuq))]
2905pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2906    let min = _mm256_min_epu64(a, b).as_u64x4();
2907    transmute(simd_select_bitmask(k, min, src.as_u64x4()))
2908}
2909
2910/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2911///
2912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
2913#[inline]
2914#[target_feature(enable = "avx512f,avx512vl")]
2915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2916#[cfg_attr(test, assert_instr(vpminuq))]
2917pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2918    let min = _mm256_min_epu64(a, b).as_u64x4();
2919    transmute(simd_select_bitmask(k, min, u64x4::ZERO))
2920}
2921
2922/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
2923///
2924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
2925#[inline]
2926#[target_feature(enable = "avx512f,avx512vl")]
2927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2928#[cfg_attr(test, assert_instr(vpminuq))]
2929pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
2930    let a = a.as_u64x2();
2931    let b = b.as_u64x2();
2932    transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
2933}
2934
2935/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2936///
2937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
2938#[inline]
2939#[target_feature(enable = "avx512f,avx512vl")]
2940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2941#[cfg_attr(test, assert_instr(vpminuq))]
2942pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2943    let min = _mm_min_epu64(a, b).as_u64x2();
2944    transmute(simd_select_bitmask(k, min, src.as_u64x2()))
2945}
2946
2947/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2948///
2949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
2950#[inline]
2951#[target_feature(enable = "avx512f,avx512vl")]
2952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2953#[cfg_attr(test, assert_instr(vpminuq))]
2954pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2955    let min = _mm_min_epu64(a, b).as_u64x2();
2956    transmute(simd_select_bitmask(k, min, u64x2::ZERO))
2957}
2958
2959/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
2960///
2961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
2962#[inline]
2963#[target_feature(enable = "avx512f")]
2964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2965#[cfg_attr(test, assert_instr(vsqrtps))]
2966pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
2967    simd_fsqrt(a)
2968}
2969
2970/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2971///
2972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
2973#[inline]
2974#[target_feature(enable = "avx512f")]
2975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2976#[cfg_attr(test, assert_instr(vsqrtps))]
2977pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
2978    simd_select_bitmask(k, simd_fsqrt(a), src)
2979}
2980
2981/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2982///
2983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
2984#[inline]
2985#[target_feature(enable = "avx512f")]
2986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2987#[cfg_attr(test, assert_instr(vsqrtps))]
2988pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
2989    simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps())
2990}
2991
2992/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2993///
2994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
2995#[inline]
2996#[target_feature(enable = "avx512f,avx512vl")]
2997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2998#[cfg_attr(test, assert_instr(vsqrtps))]
2999pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3000    simd_select_bitmask(k, simd_fsqrt(a), src)
3001}
3002
3003/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3004///
3005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3006#[inline]
3007#[target_feature(enable = "avx512f,avx512vl")]
3008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3009#[cfg_attr(test, assert_instr(vsqrtps))]
3010pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3011    simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps())
3012}
3013
3014/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3017#[inline]
3018#[target_feature(enable = "avx512f,avx512vl")]
3019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3020#[cfg_attr(test, assert_instr(vsqrtps))]
3021pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3022    simd_select_bitmask(k, simd_fsqrt(a), src)
3023}
3024
3025/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031#[cfg_attr(test, assert_instr(vsqrtps))]
3032pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3033    simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps())
3034}
3035
3036/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3037///
3038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3039#[inline]
3040#[target_feature(enable = "avx512f")]
3041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3042#[cfg_attr(test, assert_instr(vsqrtpd))]
3043pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3044    simd_fsqrt(a)
3045}
3046
3047/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3048///
3049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3050#[inline]
3051#[target_feature(enable = "avx512f")]
3052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3053#[cfg_attr(test, assert_instr(vsqrtpd))]
3054pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3055    simd_select_bitmask(k, simd_fsqrt(a), src)
3056}
3057
3058/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3059///
3060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3061#[inline]
3062#[target_feature(enable = "avx512f")]
3063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3064#[cfg_attr(test, assert_instr(vsqrtpd))]
3065pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3066    simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd())
3067}
3068
3069/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3070///
3071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3072#[inline]
3073#[target_feature(enable = "avx512f,avx512vl")]
3074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3075#[cfg_attr(test, assert_instr(vsqrtpd))]
3076pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3077    simd_select_bitmask(k, simd_fsqrt(a), src)
3078}
3079
3080/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3081///
3082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3083#[inline]
3084#[target_feature(enable = "avx512f,avx512vl")]
3085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3086#[cfg_attr(test, assert_instr(vsqrtpd))]
3087pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3088    simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd())
3089}
3090
3091/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3092///
3093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3094#[inline]
3095#[target_feature(enable = "avx512f,avx512vl")]
3096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3097#[cfg_attr(test, assert_instr(vsqrtpd))]
3098pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3099    simd_select_bitmask(k, simd_fsqrt(a), src)
3100}
3101
3102/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3103///
3104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3105#[inline]
3106#[target_feature(enable = "avx512f,avx512vl")]
3107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3108#[cfg_attr(test, assert_instr(vsqrtpd))]
3109pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3110    simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd())
3111}
3112
3113/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3114///
3115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3116#[inline]
3117#[target_feature(enable = "avx512f")]
3118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3119#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3120pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3121    simd_fma(a, b, c)
3122}
3123
3124/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3125///
3126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3127#[inline]
3128#[target_feature(enable = "avx512f")]
3129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3130#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3131pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3132    simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a)
3133}
3134
3135/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3136///
3137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3138#[inline]
3139#[target_feature(enable = "avx512f")]
3140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3141#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3142pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3143    simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps())
3144}
3145
3146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3147///
3148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3149#[inline]
3150#[target_feature(enable = "avx512f")]
3151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3152#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3153pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3154    simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c)
3155}
3156
3157/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3158///
3159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3160#[inline]
3161#[target_feature(enable = "avx512f,avx512vl")]
3162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3163#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3164pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3165    simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a)
3166}
3167
3168/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3169///
3170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3171#[inline]
3172#[target_feature(enable = "avx512f,avx512vl")]
3173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3174#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3175pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3176    simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps())
3177}
3178
3179/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3180///
3181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3182#[inline]
3183#[target_feature(enable = "avx512f,avx512vl")]
3184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3185#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3186pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3187    simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c)
3188}
3189
3190/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3191///
3192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3193#[inline]
3194#[target_feature(enable = "avx512f,avx512vl")]
3195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3196#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3197pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3198    simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a)
3199}
3200
3201/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3202///
3203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3204#[inline]
3205#[target_feature(enable = "avx512f,avx512vl")]
3206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3207#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3208pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3209    simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps())
3210}
3211
3212/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3213///
3214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3215#[inline]
3216#[target_feature(enable = "avx512f,avx512vl")]
3217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3218#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3219pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3220    simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c)
3221}
3222
3223/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3224///
3225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3226#[inline]
3227#[target_feature(enable = "avx512f")]
3228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3229#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3230pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3231    simd_fma(a, b, c)
3232}
3233
3234/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3235///
3236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3237#[inline]
3238#[target_feature(enable = "avx512f")]
3239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3240#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3241pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3242    simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a)
3243}
3244
3245/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3246///
3247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3248#[inline]
3249#[target_feature(enable = "avx512f")]
3250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3251#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3252pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3253    simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd())
3254}
3255
3256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3257///
3258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3259#[inline]
3260#[target_feature(enable = "avx512f")]
3261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3262#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3263pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3264    simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c)
3265}
3266
3267/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3270#[inline]
3271#[target_feature(enable = "avx512f,avx512vl")]
3272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3273#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3274pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3275    simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a)
3276}
3277
3278/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3279///
3280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3281#[inline]
3282#[target_feature(enable = "avx512f,avx512vl")]
3283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3284#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3285pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3286    simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd())
3287}
3288
3289/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3290///
3291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3292#[inline]
3293#[target_feature(enable = "avx512f,avx512vl")]
3294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3295#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3296pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3297    simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c)
3298}
3299
3300/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3301///
3302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3303#[inline]
3304#[target_feature(enable = "avx512f,avx512vl")]
3305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3306#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3307pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3308    simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a)
3309}
3310
3311/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3312///
3313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3314#[inline]
3315#[target_feature(enable = "avx512f,avx512vl")]
3316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3317#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3318pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3319    simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd())
3320}
3321
3322/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3323///
3324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3325#[inline]
3326#[target_feature(enable = "avx512f,avx512vl")]
3327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3328#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3329pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3330    simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c)
3331}
3332
3333/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3336#[inline]
3337#[target_feature(enable = "avx512f")]
3338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3340pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3341    simd_fma(a, b, simd_neg(c))
3342}
3343
3344/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3345///
3346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3347#[inline]
3348#[target_feature(enable = "avx512f")]
3349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3350#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3351pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3352    simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a)
3353}
3354
3355/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3356///
3357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3358#[inline]
3359#[target_feature(enable = "avx512f")]
3360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3361#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3362pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3363    simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps())
3364}
3365
3366/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3367///
3368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3369#[inline]
3370#[target_feature(enable = "avx512f")]
3371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3372#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3373pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3374    simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c)
3375}
3376
3377/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3378///
3379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3380#[inline]
3381#[target_feature(enable = "avx512f,avx512vl")]
3382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3383#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3384pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3385    simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a)
3386}
3387
3388/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3389///
3390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3391#[inline]
3392#[target_feature(enable = "avx512f,avx512vl")]
3393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3394#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3395pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3396    simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps())
3397}
3398
3399/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3400///
3401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3402#[inline]
3403#[target_feature(enable = "avx512f,avx512vl")]
3404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3405#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3406pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3407    simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c)
3408}
3409
3410/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3413#[inline]
3414#[target_feature(enable = "avx512f,avx512vl")]
3415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3416#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3417pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3418    simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a)
3419}
3420
3421/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3422///
3423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3424#[inline]
3425#[target_feature(enable = "avx512f,avx512vl")]
3426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3427#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3428pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3429    simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps())
3430}
3431
3432/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3433///
3434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3435#[inline]
3436#[target_feature(enable = "avx512f,avx512vl")]
3437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3438#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3439pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3440    simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c)
3441}
3442
3443/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3444///
3445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3446#[inline]
3447#[target_feature(enable = "avx512f")]
3448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3449#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3450pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3451    simd_fma(a, b, simd_neg(c))
3452}
3453
3454/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3455///
3456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3457#[inline]
3458#[target_feature(enable = "avx512f")]
3459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3460#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3461pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3462    simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a)
3463}
3464
3465/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3466///
3467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3468#[inline]
3469#[target_feature(enable = "avx512f")]
3470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3471#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3472pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3473    simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd())
3474}
3475
3476/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3477///
3478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3479#[inline]
3480#[target_feature(enable = "avx512f")]
3481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3482#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3483pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3484    simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c)
3485}
3486
3487/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3488///
3489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3490#[inline]
3491#[target_feature(enable = "avx512f,avx512vl")]
3492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3493#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3494pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3495    simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a)
3496}
3497
3498/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3499///
3500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3501#[inline]
3502#[target_feature(enable = "avx512f,avx512vl")]
3503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3504#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3505pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3506    simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd())
3507}
3508
3509/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3512#[inline]
3513#[target_feature(enable = "avx512f,avx512vl")]
3514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3515#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3516pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3517    simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c)
3518}
3519
3520/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3521///
3522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3523#[inline]
3524#[target_feature(enable = "avx512f,avx512vl")]
3525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3526#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3527pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3528    simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a)
3529}
3530
3531/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3532///
3533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3534#[inline]
3535#[target_feature(enable = "avx512f,avx512vl")]
3536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3537#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3538pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3539    simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd())
3540}
3541
3542/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3543///
3544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3545#[inline]
3546#[target_feature(enable = "avx512f,avx512vl")]
3547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3548#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3549pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3550    simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c)
3551}
3552
3553/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3554///
3555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3556#[inline]
3557#[target_feature(enable = "avx512f")]
3558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3559#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3560pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3561    let add = simd_fma(a, b, c);
3562    let sub = simd_fma(a, b, simd_neg(c));
3563    simd_shuffle!(
3564        add,
3565        sub,
3566        [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3567    )
3568}
3569
3570/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3571///
3572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
3573#[inline]
3574#[target_feature(enable = "avx512f")]
3575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3576#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3577pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3578    simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a)
3579}
3580
3581/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3582///
3583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
3584#[inline]
3585#[target_feature(enable = "avx512f")]
3586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3587#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3588pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3589    simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps())
3590}
3591
3592/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3593///
3594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
3595#[inline]
3596#[target_feature(enable = "avx512f")]
3597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3598#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3599pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3600    simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c)
3601}
3602
3603/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3604///
3605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
3606#[inline]
3607#[target_feature(enable = "avx512f,avx512vl")]
3608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3609#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3610pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3611    simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a)
3612}
3613
3614/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3615///
3616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
3617#[inline]
3618#[target_feature(enable = "avx512f,avx512vl")]
3619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3620#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3621pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3622    simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps())
3623}
3624
3625/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3626///
3627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
3628#[inline]
3629#[target_feature(enable = "avx512f,avx512vl")]
3630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3631#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3632pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3633    simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c)
3634}
3635
3636/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3637///
3638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
3639#[inline]
3640#[target_feature(enable = "avx512f,avx512vl")]
3641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3642#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3643pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3644    simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a)
3645}
3646
3647/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3648///
3649/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
3650#[inline]
3651#[target_feature(enable = "avx512f,avx512vl")]
3652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3653#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3654pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3655    simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps())
3656}
3657
3658/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3659///
3660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
3661#[inline]
3662#[target_feature(enable = "avx512f,avx512vl")]
3663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3664#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3665pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3666    simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c)
3667}
3668
3669/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3670///
3671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
3672#[inline]
3673#[target_feature(enable = "avx512f")]
3674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3675#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3676pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3677    let add = simd_fma(a, b, c);
3678    let sub = simd_fma(a, b, simd_neg(c));
3679    simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
3680}
3681
3682/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3683///
3684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
3685#[inline]
3686#[target_feature(enable = "avx512f")]
3687#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3688#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3689pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3690    simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a)
3691}
3692
3693/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3694///
3695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
3696#[inline]
3697#[target_feature(enable = "avx512f")]
3698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3699#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3700pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3701    simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd())
3702}
3703
3704/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3705///
3706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
3707#[inline]
3708#[target_feature(enable = "avx512f")]
3709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3710#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3711pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3712    simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c)
3713}
3714
3715/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3716///
3717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
3718#[inline]
3719#[target_feature(enable = "avx512f,avx512vl")]
3720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3721#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3722pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3723    simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a)
3724}
3725
3726/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3727///
3728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
3729#[inline]
3730#[target_feature(enable = "avx512f,avx512vl")]
3731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3732#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3733pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3734    simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd())
3735}
3736
3737/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3738///
3739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
3740#[inline]
3741#[target_feature(enable = "avx512f,avx512vl")]
3742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3743#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3744pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3745    simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c)
3746}
3747
3748/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3749///
3750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
3751#[inline]
3752#[target_feature(enable = "avx512f,avx512vl")]
3753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3754#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3755pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3756    simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a)
3757}
3758
3759/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3760///
3761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
3762#[inline]
3763#[target_feature(enable = "avx512f,avx512vl")]
3764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3765#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3766pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3767    simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd())
3768}
3769
3770/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3771///
3772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
3773#[inline]
3774#[target_feature(enable = "avx512f,avx512vl")]
3775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3776#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3777pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3778    simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c)
3779}
3780
3781/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3782///
3783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
3784#[inline]
3785#[target_feature(enable = "avx512f")]
3786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3787#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3788pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3789    let add = simd_fma(a, b, c);
3790    let sub = simd_fma(a, b, simd_neg(c));
3791    simd_shuffle!(
3792        add,
3793        sub,
3794        [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
3795    )
3796}
3797
3798/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3799///
3800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
3801#[inline]
3802#[target_feature(enable = "avx512f")]
3803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3804#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3805pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3806    simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a)
3807}
3808
3809/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3810///
3811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
3812#[inline]
3813#[target_feature(enable = "avx512f")]
3814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3815#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3816pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3817    simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps())
3818}
3819
3820/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3821///
3822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
3823#[inline]
3824#[target_feature(enable = "avx512f")]
3825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3826#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3827pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3828    simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c)
3829}
3830
3831/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3832///
3833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
3834#[inline]
3835#[target_feature(enable = "avx512f,avx512vl")]
3836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3837#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3838pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3839    simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a)
3840}
3841
3842/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3843///
3844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
3845#[inline]
3846#[target_feature(enable = "avx512f,avx512vl")]
3847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3848#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3849pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3850    simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps())
3851}
3852
3853/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3854///
3855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
3856#[inline]
3857#[target_feature(enable = "avx512f,avx512vl")]
3858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3859#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3860pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3861    simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c)
3862}
3863
3864/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3865///
3866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
3867#[inline]
3868#[target_feature(enable = "avx512f,avx512vl")]
3869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3870#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3871pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3872    simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a)
3873}
3874
3875/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3876///
3877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
3878#[inline]
3879#[target_feature(enable = "avx512f,avx512vl")]
3880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3881#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3882pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3883    simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps())
3884}
3885
3886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3887///
3888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
3889#[inline]
3890#[target_feature(enable = "avx512f,avx512vl")]
3891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3892#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3893pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3894    simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c)
3895}
3896
3897/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3898///
3899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
3900#[inline]
3901#[target_feature(enable = "avx512f")]
3902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3903#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3904pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3905    let add = simd_fma(a, b, c);
3906    let sub = simd_fma(a, b, simd_neg(c));
3907    simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
3908}
3909
3910/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3911///
3912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
3913#[inline]
3914#[target_feature(enable = "avx512f")]
3915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3916#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3917pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3918    simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a)
3919}
3920
3921/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3922///
3923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
3924#[inline]
3925#[target_feature(enable = "avx512f")]
3926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3927#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3928pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3929    simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd())
3930}
3931
3932/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3933///
3934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
3935#[inline]
3936#[target_feature(enable = "avx512f")]
3937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3938#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3939pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3940    simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c)
3941}
3942
3943/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3944///
3945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
3946#[inline]
3947#[target_feature(enable = "avx512f,avx512vl")]
3948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3949#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3950pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3951    simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a)
3952}
3953
3954/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3955///
3956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
3957#[inline]
3958#[target_feature(enable = "avx512f,avx512vl")]
3959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3960#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3961pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3962    simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd())
3963}
3964
3965/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3966///
3967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
3968#[inline]
3969#[target_feature(enable = "avx512f,avx512vl")]
3970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3971#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3972pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3973    simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c)
3974}
3975
3976/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3977///
3978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
3979#[inline]
3980#[target_feature(enable = "avx512f,avx512vl")]
3981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3982#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3983pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3984    simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a)
3985}
3986
3987/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
3990#[inline]
3991#[target_feature(enable = "avx512f,avx512vl")]
3992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3993#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3994pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3995    simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd())
3996}
3997
3998/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3999///
4000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4001#[inline]
4002#[target_feature(enable = "avx512f,avx512vl")]
4003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4004#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4005pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4006    simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c)
4007}
4008
4009/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4010///
4011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4012#[inline]
4013#[target_feature(enable = "avx512f")]
4014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4015#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4016pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4017    simd_fma(simd_neg(a), b, c)
4018}
4019
4020/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4021///
4022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4023#[inline]
4024#[target_feature(enable = "avx512f")]
4025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4026#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4027pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4028    simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a)
4029}
4030
4031/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4032///
4033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4034#[inline]
4035#[target_feature(enable = "avx512f")]
4036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4037#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4038pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4039    simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps())
4040}
4041
4042/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4043///
4044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4045#[inline]
4046#[target_feature(enable = "avx512f")]
4047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4048#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4049pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4050    simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c)
4051}
4052
4053/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4054///
4055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4056#[inline]
4057#[target_feature(enable = "avx512f,avx512vl")]
4058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4059#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4060pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4061    simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a)
4062}
4063
4064/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4065///
4066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4067#[inline]
4068#[target_feature(enable = "avx512f,avx512vl")]
4069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4070#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4071pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4072    simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps())
4073}
4074
4075/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4076///
4077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4078#[inline]
4079#[target_feature(enable = "avx512f,avx512vl")]
4080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4081#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4082pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4083    simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c)
4084}
4085
4086/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4087///
4088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4089#[inline]
4090#[target_feature(enable = "avx512f,avx512vl")]
4091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4092#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4093pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4094    simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a)
4095}
4096
4097/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4098///
4099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4100#[inline]
4101#[target_feature(enable = "avx512f,avx512vl")]
4102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4103#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4104pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4105    simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps())
4106}
4107
4108/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4109///
4110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4111#[inline]
4112#[target_feature(enable = "avx512f,avx512vl")]
4113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4114#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4115pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4116    simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c)
4117}
4118
4119/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4120///
4121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4122#[inline]
4123#[target_feature(enable = "avx512f")]
4124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4125#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4126pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4127    simd_fma(simd_neg(a), b, c)
4128}
4129
4130/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4131///
4132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4133#[inline]
4134#[target_feature(enable = "avx512f")]
4135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4136#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4137pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4138    simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a)
4139}
4140
4141/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4142///
4143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4144#[inline]
4145#[target_feature(enable = "avx512f")]
4146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4147#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4148pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4149    simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd())
4150}
4151
4152/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4153///
4154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4155#[inline]
4156#[target_feature(enable = "avx512f")]
4157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4158#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4159pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4160    simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c)
4161}
4162
4163/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4164///
4165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4166#[inline]
4167#[target_feature(enable = "avx512f,avx512vl")]
4168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4169#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4170pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4171    simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a)
4172}
4173
4174/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4175///
4176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4177#[inline]
4178#[target_feature(enable = "avx512f,avx512vl")]
4179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4180#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4181pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4182    simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd())
4183}
4184
4185/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4186///
4187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4188#[inline]
4189#[target_feature(enable = "avx512f,avx512vl")]
4190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4191#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4192pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4193    simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c)
4194}
4195
4196/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4197///
4198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4199#[inline]
4200#[target_feature(enable = "avx512f,avx512vl")]
4201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4202#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4203pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4204    simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a)
4205}
4206
4207/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4208///
4209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4210#[inline]
4211#[target_feature(enable = "avx512f,avx512vl")]
4212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4213#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4214pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4215    simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd())
4216}
4217
4218/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4219///
4220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4221#[inline]
4222#[target_feature(enable = "avx512f,avx512vl")]
4223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4224#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4225pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4226    simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c)
4227}
4228
4229/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4230///
4231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4232#[inline]
4233#[target_feature(enable = "avx512f")]
4234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4235#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4236pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4237    simd_fma(simd_neg(a), b, simd_neg(c))
4238}
4239
4240/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4241///
4242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4243#[inline]
4244#[target_feature(enable = "avx512f")]
4245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4246#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4247pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4248    simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a)
4249}
4250
4251/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4252///
4253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4254#[inline]
4255#[target_feature(enable = "avx512f")]
4256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4257#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4258pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4259    simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps())
4260}
4261
4262/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4263///
4264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4265#[inline]
4266#[target_feature(enable = "avx512f")]
4267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4268#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4269pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4270    simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c)
4271}
4272
4273/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4274///
4275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4276#[inline]
4277#[target_feature(enable = "avx512f,avx512vl")]
4278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4279#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4280pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4281    simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a)
4282}
4283
4284/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4285///
4286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4287#[inline]
4288#[target_feature(enable = "avx512f,avx512vl")]
4289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4290#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4291pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4292    simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps())
4293}
4294
4295/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4296///
4297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4298#[inline]
4299#[target_feature(enable = "avx512f,avx512vl")]
4300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4301#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4302pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4303    simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c)
4304}
4305
4306/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4307///
4308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4309#[inline]
4310#[target_feature(enable = "avx512f,avx512vl")]
4311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4312#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4313pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4314    simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a)
4315}
4316
4317/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4318///
4319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4320#[inline]
4321#[target_feature(enable = "avx512f,avx512vl")]
4322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4323#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4324pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4325    simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps())
4326}
4327
4328/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4329///
4330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4331#[inline]
4332#[target_feature(enable = "avx512f,avx512vl")]
4333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4334#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4335pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4336    simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c)
4337}
4338
4339/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4340///
4341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4342#[inline]
4343#[target_feature(enable = "avx512f")]
4344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4345#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4346pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4347    simd_fma(simd_neg(a), b, simd_neg(c))
4348}
4349
4350/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4351///
4352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4353#[inline]
4354#[target_feature(enable = "avx512f")]
4355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4356#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4357pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4358    simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a)
4359}
4360
4361/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4362///
4363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4364#[inline]
4365#[target_feature(enable = "avx512f")]
4366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4367#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4368pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4369    simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd())
4370}
4371
4372/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4373///
4374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4375#[inline]
4376#[target_feature(enable = "avx512f")]
4377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4378#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4379pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4380    simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c)
4381}
4382
4383/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4384///
4385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4386#[inline]
4387#[target_feature(enable = "avx512f,avx512vl")]
4388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4389#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4390pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4391    simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a)
4392}
4393
4394/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4395///
4396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4397#[inline]
4398#[target_feature(enable = "avx512f,avx512vl")]
4399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4400#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4401pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4402    simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd())
4403}
4404
4405/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4406///
4407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4408#[inline]
4409#[target_feature(enable = "avx512f,avx512vl")]
4410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4411#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4412pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4413    simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c)
4414}
4415
4416/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4417///
4418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4419#[inline]
4420#[target_feature(enable = "avx512f,avx512vl")]
4421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4422#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4423pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4424    simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a)
4425}
4426
4427/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4428///
4429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4430#[inline]
4431#[target_feature(enable = "avx512f,avx512vl")]
4432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4433#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4434pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4435    simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd())
4436}
4437
4438/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4439///
4440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4441#[inline]
4442#[target_feature(enable = "avx512f,avx512vl")]
4443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4444#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4445pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4446    simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c)
4447}
4448
4449/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4450///
4451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4452#[inline]
4453#[target_feature(enable = "avx512f")]
4454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4455#[cfg_attr(test, assert_instr(vrcp14ps))]
4456pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4457    transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111))
4458}
4459
4460/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4461///
4462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4463#[inline]
4464#[target_feature(enable = "avx512f")]
4465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4466#[cfg_attr(test, assert_instr(vrcp14ps))]
4467pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4468    transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k))
4469}
4470
4471/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4472///
4473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4474#[inline]
4475#[target_feature(enable = "avx512f")]
4476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4477#[cfg_attr(test, assert_instr(vrcp14ps))]
4478pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4479    transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k))
4480}
4481
4482/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4483///
4484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4485#[inline]
4486#[target_feature(enable = "avx512f,avx512vl")]
4487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4488#[cfg_attr(test, assert_instr(vrcp14ps))]
4489pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4490    transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111))
4491}
4492
4493/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4494///
4495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4496#[inline]
4497#[target_feature(enable = "avx512f,avx512vl")]
4498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4499#[cfg_attr(test, assert_instr(vrcp14ps))]
4500pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4501    transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k))
4502}
4503
4504/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4505///
4506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4507#[inline]
4508#[target_feature(enable = "avx512f,avx512vl")]
4509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4510#[cfg_attr(test, assert_instr(vrcp14ps))]
4511pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4512    transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k))
4513}
4514
4515/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4516///
4517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4518#[inline]
4519#[target_feature(enable = "avx512f,avx512vl")]
4520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4521#[cfg_attr(test, assert_instr(vrcp14ps))]
4522pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 {
4523    transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111))
4524}
4525
4526/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4527///
4528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4529#[inline]
4530#[target_feature(enable = "avx512f,avx512vl")]
4531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4532#[cfg_attr(test, assert_instr(vrcp14ps))]
4533pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4534    transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k))
4535}
4536
4537/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4538///
4539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4540#[inline]
4541#[target_feature(enable = "avx512f,avx512vl")]
4542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4543#[cfg_attr(test, assert_instr(vrcp14ps))]
4544pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4545    transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k))
4546}
4547
4548/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4549///
4550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4551#[inline]
4552#[target_feature(enable = "avx512f")]
4553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4554#[cfg_attr(test, assert_instr(vrcp14pd))]
4555pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4556    transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111))
4557}
4558
4559/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4560///
4561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
4562#[inline]
4563#[target_feature(enable = "avx512f")]
4564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4565#[cfg_attr(test, assert_instr(vrcp14pd))]
4566pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
4567    transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k))
4568}
4569
4570/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4571///
4572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
4573#[inline]
4574#[target_feature(enable = "avx512f")]
4575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4576#[cfg_attr(test, assert_instr(vrcp14pd))]
4577pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
4578    transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k))
4579}
4580
4581/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
4584#[inline]
4585#[target_feature(enable = "avx512f,avx512vl")]
4586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4587#[cfg_attr(test, assert_instr(vrcp14pd))]
4588pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
4589    transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111))
4590}
4591
4592/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4593///
4594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
4595#[inline]
4596#[target_feature(enable = "avx512f,avx512vl")]
4597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4598#[cfg_attr(test, assert_instr(vrcp14pd))]
4599pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
4600    transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k))
4601}
4602
4603/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4604///
4605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
4606#[inline]
4607#[target_feature(enable = "avx512f,avx512vl")]
4608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4609#[cfg_attr(test, assert_instr(vrcp14pd))]
4610pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
4611    transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k))
4612}
4613
4614/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4615///
4616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
4617#[inline]
4618#[target_feature(enable = "avx512f,avx512vl")]
4619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4620#[cfg_attr(test, assert_instr(vrcp14pd))]
4621pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d {
4622    transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011))
4623}
4624
4625/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4626///
4627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
4628#[inline]
4629#[target_feature(enable = "avx512f,avx512vl")]
4630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4631#[cfg_attr(test, assert_instr(vrcp14pd))]
4632pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
4633    transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k))
4634}
4635
4636/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4637///
4638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
4639#[inline]
4640#[target_feature(enable = "avx512f,avx512vl")]
4641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4642#[cfg_attr(test, assert_instr(vrcp14pd))]
4643pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
4644    transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k))
4645}
4646
4647/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4648///
4649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
4650#[inline]
4651#[target_feature(enable = "avx512f")]
4652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4653#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4654pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
4655    transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111))
4656}
4657
4658/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4659///
4660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
4661#[inline]
4662#[target_feature(enable = "avx512f")]
4663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4664#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4665pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4666    transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k))
4667}
4668
4669/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4670///
4671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
4672#[inline]
4673#[target_feature(enable = "avx512f")]
4674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4675#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4676pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
4677    transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k))
4678}
4679
4680/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4681///
4682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
4683#[inline]
4684#[target_feature(enable = "avx512f,avx512vl")]
4685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4686#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4687pub unsafe fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
4688    transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111))
4689}
4690
4691/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4692///
4693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
4694#[inline]
4695#[target_feature(enable = "avx512f,avx512vl")]
4696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4697#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4698pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4699    transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k))
4700}
4701
4702/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4703///
4704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
4705#[inline]
4706#[target_feature(enable = "avx512f,avx512vl")]
4707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4708#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4709pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
4710    transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k))
4711}
4712
4713/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4714///
4715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
4716#[inline]
4717#[target_feature(enable = "avx512f,avx512vl")]
4718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4719#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4720pub unsafe fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
4721    transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111))
4722}
4723
4724/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4725///
4726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
4727#[inline]
4728#[target_feature(enable = "avx512f,avx512vl")]
4729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4730#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4731pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4732    transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k))
4733}
4734
4735/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4736///
4737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
4738#[inline]
4739#[target_feature(enable = "avx512f,avx512vl")]
4740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4741#[cfg_attr(test, assert_instr(vrsqrt14ps))]
4742pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
4743    transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k))
4744}
4745
4746/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4747///
4748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
4749#[inline]
4750#[target_feature(enable = "avx512f")]
4751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4752#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4753pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
4754    transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111))
4755}
4756
4757/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4758///
4759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
4760#[inline]
4761#[target_feature(enable = "avx512f")]
4762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4763#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4764pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
4765    transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k))
4766}
4767
4768/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4769///
4770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
4771#[inline]
4772#[target_feature(enable = "avx512f")]
4773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4774#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4775pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
4776    transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k))
4777}
4778
4779/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4780///
4781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
4782#[inline]
4783#[target_feature(enable = "avx512f,avx512vl")]
4784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4785#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4786pub unsafe fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
4787    transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111))
4788}
4789
4790/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4791///
4792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
4793#[inline]
4794#[target_feature(enable = "avx512f,avx512vl")]
4795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4796#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4797pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
4798    transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k))
4799}
4800
4801/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4802///
4803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
4804#[inline]
4805#[target_feature(enable = "avx512f,avx512vl")]
4806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4807#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4808pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
4809    transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k))
4810}
4811
4812/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4813///
4814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
4815#[inline]
4816#[target_feature(enable = "avx512f,avx512vl")]
4817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4818#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4819pub unsafe fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
4820    transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011))
4821}
4822
4823/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4824///
4825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
4826#[inline]
4827#[target_feature(enable = "avx512f,avx512vl")]
4828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4829#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4830pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
4831    transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k))
4832}
4833
4834/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4835///
4836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
4837#[inline]
4838#[target_feature(enable = "avx512f,avx512vl")]
4839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4840#[cfg_attr(test, assert_instr(vrsqrt14pd))]
4841pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
4842    transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k))
4843}
4844
4845/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
4846///
4847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
4848#[inline]
4849#[target_feature(enable = "avx512f")]
4850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4851#[cfg_attr(test, assert_instr(vgetexpps))]
4852pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
4853    transmute(vgetexpps(
4854        a.as_f32x16(),
4855        f32x16::ZERO,
4856        0b11111111_11111111,
4857        _MM_FROUND_CUR_DIRECTION,
4858    ))
4859}
4860
4861/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4862///
4863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
4864#[inline]
4865#[target_feature(enable = "avx512f")]
4866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4867#[cfg_attr(test, assert_instr(vgetexpps))]
4868pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4869    transmute(vgetexpps(
4870        a.as_f32x16(),
4871        src.as_f32x16(),
4872        k,
4873        _MM_FROUND_CUR_DIRECTION,
4874    ))
4875}
4876
4877/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
4880#[inline]
4881#[target_feature(enable = "avx512f")]
4882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883#[cfg_attr(test, assert_instr(vgetexpps))]
4884pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
4885    transmute(vgetexpps(
4886        a.as_f32x16(),
4887        f32x16::ZERO,
4888        k,
4889        _MM_FROUND_CUR_DIRECTION,
4890    ))
4891}
4892
4893/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
4894///
4895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
4896#[inline]
4897#[target_feature(enable = "avx512f,avx512vl")]
4898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4899#[cfg_attr(test, assert_instr(vgetexpps))]
4900pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 {
4901    transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111))
4902}
4903
4904/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4905///
4906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
4907#[inline]
4908#[target_feature(enable = "avx512f,avx512vl")]
4909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4910#[cfg_attr(test, assert_instr(vgetexpps))]
4911pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4912    transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k))
4913}
4914
4915/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4916///
4917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
4918#[inline]
4919#[target_feature(enable = "avx512f,avx512vl")]
4920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4921#[cfg_attr(test, assert_instr(vgetexpps))]
4922pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
4923    transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k))
4924}
4925
4926/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
4927///
4928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
4929#[inline]
4930#[target_feature(enable = "avx512f,avx512vl")]
4931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4932#[cfg_attr(test, assert_instr(vgetexpps))]
4933pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 {
4934    transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111))
4935}
4936
4937/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4938///
4939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
4940#[inline]
4941#[target_feature(enable = "avx512f,avx512vl")]
4942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4943#[cfg_attr(test, assert_instr(vgetexpps))]
4944pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4945    transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k))
4946}
4947
4948/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4949///
4950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
4951#[inline]
4952#[target_feature(enable = "avx512f,avx512vl")]
4953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4954#[cfg_attr(test, assert_instr(vgetexpps))]
4955pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
4956    transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k))
4957}
4958
4959/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
4960///
4961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
4962#[inline]
4963#[target_feature(enable = "avx512f")]
4964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4965#[cfg_attr(test, assert_instr(vgetexppd))]
4966pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
4967    transmute(vgetexppd(
4968        a.as_f64x8(),
4969        f64x8::ZERO,
4970        0b11111111,
4971        _MM_FROUND_CUR_DIRECTION,
4972    ))
4973}
4974
4975/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4976///
4977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
4978#[inline]
4979#[target_feature(enable = "avx512f")]
4980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4981#[cfg_attr(test, assert_instr(vgetexppd))]
4982pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
4983    transmute(vgetexppd(
4984        a.as_f64x8(),
4985        src.as_f64x8(),
4986        k,
4987        _MM_FROUND_CUR_DIRECTION,
4988    ))
4989}
4990
4991/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4992///
4993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
4994#[inline]
4995#[target_feature(enable = "avx512f")]
4996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4997#[cfg_attr(test, assert_instr(vgetexppd))]
4998pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
4999    transmute(vgetexppd(
5000        a.as_f64x8(),
5001        f64x8::ZERO,
5002        k,
5003        _MM_FROUND_CUR_DIRECTION,
5004    ))
5005}
5006
5007/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5008///
5009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5010#[inline]
5011#[target_feature(enable = "avx512f,avx512vl")]
5012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5013#[cfg_attr(test, assert_instr(vgetexppd))]
5014pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5015    transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111))
5016}
5017
5018/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5019///
5020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5021#[inline]
5022#[target_feature(enable = "avx512f,avx512vl")]
5023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5024#[cfg_attr(test, assert_instr(vgetexppd))]
5025pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5026    transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k))
5027}
5028
5029/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5030///
5031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5032#[inline]
5033#[target_feature(enable = "avx512f,avx512vl")]
5034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5035#[cfg_attr(test, assert_instr(vgetexppd))]
5036pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5037    transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k))
5038}
5039
5040/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5041///
5042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5043#[inline]
5044#[target_feature(enable = "avx512f,avx512vl")]
5045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5046#[cfg_attr(test, assert_instr(vgetexppd))]
5047pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d {
5048    transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011))
5049}
5050
5051/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5052///
5053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5054#[inline]
5055#[target_feature(enable = "avx512f,avx512vl")]
5056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5057#[cfg_attr(test, assert_instr(vgetexppd))]
5058pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5059    transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k))
5060}
5061
5062/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5063///
5064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5065#[inline]
5066#[target_feature(enable = "avx512f,avx512vl")]
5067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5068#[cfg_attr(test, assert_instr(vgetexppd))]
5069pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5070    transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k))
5071}
5072
5073/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5074/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5075/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5076/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5077/// * [`_MM_FROUND_TO_POS_INF`] : round up
5078/// * [`_MM_FROUND_TO_ZERO`] : truncate
5079/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5080///
5081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5082#[inline]
5083#[target_feature(enable = "avx512f")]
5084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5085#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5086#[rustc_legacy_const_generics(1)]
5087pub unsafe fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5088    static_assert_uimm_bits!(IMM8, 8);
5089    let a = a.as_f32x16();
5090    let r = vrndscaleps(
5091        a,
5092        IMM8,
5093        f32x16::ZERO,
5094        0b11111111_11111111,
5095        _MM_FROUND_CUR_DIRECTION,
5096    );
5097    transmute(r)
5098}
5099
5100/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5101/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5102/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5103/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5104/// * [`_MM_FROUND_TO_POS_INF`] : round up
5105/// * [`_MM_FROUND_TO_ZERO`] : truncate
5106/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5107///
5108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5109#[inline]
5110#[target_feature(enable = "avx512f")]
5111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5112#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5113#[rustc_legacy_const_generics(3)]
5114pub unsafe fn _mm512_mask_roundscale_ps<const IMM8: i32>(
5115    src: __m512,
5116    k: __mmask16,
5117    a: __m512,
5118) -> __m512 {
5119    static_assert_uimm_bits!(IMM8, 8);
5120    let a = a.as_f32x16();
5121    let src = src.as_f32x16();
5122    let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5123    transmute(r)
5124}
5125
5126/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5127/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5128/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5129/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5130/// * [`_MM_FROUND_TO_POS_INF`] : round up
5131/// * [`_MM_FROUND_TO_ZERO`] : truncate
5132/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5133///
5134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5135#[inline]
5136#[target_feature(enable = "avx512f")]
5137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5138#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5139#[rustc_legacy_const_generics(2)]
5140pub unsafe fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5141    static_assert_uimm_bits!(IMM8, 8);
5142    let a = a.as_f32x16();
5143    let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5144    transmute(r)
5145}
5146
5147/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5148/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5149/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5150/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5151/// * [`_MM_FROUND_TO_POS_INF`] : round up
5152/// * [`_MM_FROUND_TO_ZERO`] : truncate
5153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5154///
5155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5156#[inline]
5157#[target_feature(enable = "avx512f,avx512vl")]
5158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5159#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5160#[rustc_legacy_const_generics(1)]
5161pub unsafe fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5162    static_assert_uimm_bits!(IMM8, 8);
5163    let a = a.as_f32x8();
5164    let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5165    transmute(r)
5166}
5167
5168/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5169/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5170/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5171/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5172/// * [`_MM_FROUND_TO_POS_INF`] : round up
5173/// * [`_MM_FROUND_TO_ZERO`] : truncate
5174/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5181#[rustc_legacy_const_generics(3)]
5182pub unsafe fn _mm256_mask_roundscale_ps<const IMM8: i32>(
5183    src: __m256,
5184    k: __mmask8,
5185    a: __m256,
5186) -> __m256 {
5187    static_assert_uimm_bits!(IMM8, 8);
5188    let a = a.as_f32x8();
5189    let src = src.as_f32x8();
5190    let r = vrndscaleps256(a, IMM8, src, k);
5191    transmute(r)
5192}
5193
5194/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5195/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5196/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5197/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5198/// * [`_MM_FROUND_TO_POS_INF`] : round up
5199/// * [`_MM_FROUND_TO_ZERO`] : truncate
5200/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5201///
5202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5203#[inline]
5204#[target_feature(enable = "avx512f,avx512vl")]
5205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5206#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5207#[rustc_legacy_const_generics(2)]
5208pub unsafe fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5209    static_assert_uimm_bits!(IMM8, 8);
5210    let a = a.as_f32x8();
5211    let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5212    transmute(r)
5213}
5214
5215/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5216/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5217/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5218/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5219/// * [`_MM_FROUND_TO_POS_INF`] : round up
5220/// * [`_MM_FROUND_TO_ZERO`] : truncate
5221/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5222///
5223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5224#[inline]
5225#[target_feature(enable = "avx512f,avx512vl")]
5226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5227#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5228#[rustc_legacy_const_generics(1)]
5229pub unsafe fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5230    static_assert_uimm_bits!(IMM8, 8);
5231    let a = a.as_f32x4();
5232    let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5233    transmute(r)
5234}
5235
5236/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5240/// * [`_MM_FROUND_TO_POS_INF`] : round up
5241/// * [`_MM_FROUND_TO_ZERO`] : truncate
5242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5243///
5244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5245#[inline]
5246#[target_feature(enable = "avx512f,avx512vl")]
5247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5248#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5249#[rustc_legacy_const_generics(3)]
5250pub unsafe fn _mm_mask_roundscale_ps<const IMM8: i32>(
5251    src: __m128,
5252    k: __mmask8,
5253    a: __m128,
5254) -> __m128 {
5255    static_assert_uimm_bits!(IMM8, 8);
5256    let a = a.as_f32x4();
5257    let src = src.as_f32x4();
5258    let r = vrndscaleps128(a, IMM8, src, k);
5259    transmute(r)
5260}
5261
5262/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5263/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5264/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5265/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5266/// * [`_MM_FROUND_TO_POS_INF`] : round up
5267/// * [`_MM_FROUND_TO_ZERO`] : truncate
5268/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5269///
5270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5271#[inline]
5272#[target_feature(enable = "avx512f,avx512vl")]
5273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5274#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5275#[rustc_legacy_const_generics(2)]
5276pub unsafe fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5277    static_assert_uimm_bits!(IMM8, 8);
5278    let a = a.as_f32x4();
5279    let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
5280    transmute(r)
5281}
5282
5283/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5284/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5285/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5286/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5287/// * [`_MM_FROUND_TO_POS_INF`] : round up
5288/// * [`_MM_FROUND_TO_ZERO`] : truncate
5289/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5290///
5291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5292#[inline]
5293#[target_feature(enable = "avx512f")]
5294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5295#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5296#[rustc_legacy_const_generics(1)]
5297pub unsafe fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5298    static_assert_uimm_bits!(IMM8, 8);
5299    let a = a.as_f64x8();
5300    let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5301    transmute(r)
5302}
5303
5304/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5305/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5306/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5307/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5308/// * [`_MM_FROUND_TO_POS_INF`] : round up
5309/// * [`_MM_FROUND_TO_ZERO`] : truncate
5310/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5311///
5312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5313#[inline]
5314#[target_feature(enable = "avx512f")]
5315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5316#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5317#[rustc_legacy_const_generics(3)]
5318pub unsafe fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5319    src: __m512d,
5320    k: __mmask8,
5321    a: __m512d,
5322) -> __m512d {
5323    static_assert_uimm_bits!(IMM8, 8);
5324    let a = a.as_f64x8();
5325    let src = src.as_f64x8();
5326    let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5327    transmute(r)
5328}
5329
5330/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5331/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5332/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5333/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5334/// * [`_MM_FROUND_TO_POS_INF`] : round up
5335/// * [`_MM_FROUND_TO_ZERO`] : truncate
5336/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5337///
5338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5339#[inline]
5340#[target_feature(enable = "avx512f")]
5341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5342#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5343#[rustc_legacy_const_generics(2)]
5344pub unsafe fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5345    static_assert_uimm_bits!(IMM8, 8);
5346    let a = a.as_f64x8();
5347    let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5348    transmute(r)
5349}
5350
5351/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5352/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5353/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5354/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5355/// * [`_MM_FROUND_TO_POS_INF`] : round up
5356/// * [`_MM_FROUND_TO_ZERO`] : truncate
5357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5358///
5359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5360#[inline]
5361#[target_feature(enable = "avx512f,avx512vl")]
5362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5363#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5364#[rustc_legacy_const_generics(1)]
5365pub unsafe fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5366    static_assert_uimm_bits!(IMM8, 8);
5367    let a = a.as_f64x4();
5368    let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
5369    transmute(r)
5370}
5371
5372/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5373/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5374/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5375/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5376/// * [`_MM_FROUND_TO_POS_INF`] : round up
5377/// * [`_MM_FROUND_TO_ZERO`] : truncate
5378/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5379///
5380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5381#[inline]
5382#[target_feature(enable = "avx512f,avx512vl")]
5383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5384#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5385#[rustc_legacy_const_generics(3)]
5386pub unsafe fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5387    src: __m256d,
5388    k: __mmask8,
5389    a: __m256d,
5390) -> __m256d {
5391    static_assert_uimm_bits!(IMM8, 8);
5392    let a = a.as_f64x4();
5393    let src = src.as_f64x4();
5394    let r = vrndscalepd256(a, IMM8, src, k);
5395    transmute(r)
5396}
5397
5398/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5399/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5400/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5401/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5402/// * [`_MM_FROUND_TO_POS_INF`] : round up
5403/// * [`_MM_FROUND_TO_ZERO`] : truncate
5404/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5407#[inline]
5408#[target_feature(enable = "avx512f,avx512vl")]
5409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5411#[rustc_legacy_const_generics(2)]
5412pub unsafe fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5413    static_assert_uimm_bits!(IMM8, 8);
5414    let a = a.as_f64x4();
5415    let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
5416    transmute(r)
5417}
5418
5419/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5420/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5421/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5422/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5423/// * [`_MM_FROUND_TO_POS_INF`] : round up
5424/// * [`_MM_FROUND_TO_ZERO`] : truncate
5425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5426///
5427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5428#[inline]
5429#[target_feature(enable = "avx512f,avx512vl")]
5430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5431#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5432#[rustc_legacy_const_generics(1)]
5433pub unsafe fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5434    static_assert_uimm_bits!(IMM8, 8);
5435    let a = a.as_f64x2();
5436    let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
5437    transmute(r)
5438}
5439
5440/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5441/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5442/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5443/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5444/// * [`_MM_FROUND_TO_POS_INF`] : round up
5445/// * [`_MM_FROUND_TO_ZERO`] : truncate
5446/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5447///
5448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5449#[inline]
5450#[target_feature(enable = "avx512f,avx512vl")]
5451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5452#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5453#[rustc_legacy_const_generics(3)]
5454pub unsafe fn _mm_mask_roundscale_pd<const IMM8: i32>(
5455    src: __m128d,
5456    k: __mmask8,
5457    a: __m128d,
5458) -> __m128d {
5459    static_assert_uimm_bits!(IMM8, 8);
5460    let a = a.as_f64x2();
5461    let src = src.as_f64x2();
5462    let r = vrndscalepd128(a, IMM8, src, k);
5463    transmute(r)
5464}
5465
5466/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5467/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5468/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5469/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5470/// * [`_MM_FROUND_TO_POS_INF`] : round up
5471/// * [`_MM_FROUND_TO_ZERO`] : truncate
5472/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5473///
5474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5475#[inline]
5476#[target_feature(enable = "avx512f,avx512vl")]
5477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5478#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5479#[rustc_legacy_const_generics(2)]
5480pub unsafe fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5481    static_assert_uimm_bits!(IMM8, 8);
5482    let a = a.as_f64x2();
5483    let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
5484    transmute(r)
5485}
5486
5487/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5488///
5489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5490#[inline]
5491#[target_feature(enable = "avx512f")]
5492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5493#[cfg_attr(test, assert_instr(vscalefps))]
5494pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5495    transmute(vscalefps(
5496        a.as_f32x16(),
5497        b.as_f32x16(),
5498        f32x16::ZERO,
5499        0b11111111_11111111,
5500        _MM_FROUND_CUR_DIRECTION,
5501    ))
5502}
5503
5504/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5505///
5506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5507#[inline]
5508#[target_feature(enable = "avx512f")]
5509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5510#[cfg_attr(test, assert_instr(vscalefps))]
5511pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5512    transmute(vscalefps(
5513        a.as_f32x16(),
5514        b.as_f32x16(),
5515        src.as_f32x16(),
5516        k,
5517        _MM_FROUND_CUR_DIRECTION,
5518    ))
5519}
5520
5521/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5522///
5523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5524#[inline]
5525#[target_feature(enable = "avx512f")]
5526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5527#[cfg_attr(test, assert_instr(vscalefps))]
5528pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5529    transmute(vscalefps(
5530        a.as_f32x16(),
5531        b.as_f32x16(),
5532        f32x16::ZERO,
5533        k,
5534        _MM_FROUND_CUR_DIRECTION,
5535    ))
5536}
5537
5538/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5539///
5540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
5541#[inline]
5542#[target_feature(enable = "avx512f,avx512vl")]
5543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5544#[cfg_attr(test, assert_instr(vscalefps))]
5545pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
5546    transmute(vscalefps256(
5547        a.as_f32x8(),
5548        b.as_f32x8(),
5549        f32x8::ZERO,
5550        0b11111111,
5551    ))
5552}
5553
5554/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5555///
5556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
5557#[inline]
5558#[target_feature(enable = "avx512f,avx512vl")]
5559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5560#[cfg_attr(test, assert_instr(vscalefps))]
5561pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
5562    transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k))
5563}
5564
5565/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5566///
5567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
5568#[inline]
5569#[target_feature(enable = "avx512f,avx512vl")]
5570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5571#[cfg_attr(test, assert_instr(vscalefps))]
5572pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5573    transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k))
5574}
5575
5576/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5577///
5578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
5579#[inline]
5580#[target_feature(enable = "avx512f,avx512vl")]
5581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5582#[cfg_attr(test, assert_instr(vscalefps))]
5583pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
5584    transmute(vscalefps128(
5585        a.as_f32x4(),
5586        b.as_f32x4(),
5587        f32x4::ZERO,
5588        0b00001111,
5589    ))
5590}
5591
5592/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5593///
5594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
5595#[inline]
5596#[target_feature(enable = "avx512f,avx512vl")]
5597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5598#[cfg_attr(test, assert_instr(vscalefps))]
5599pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
5600    transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
5601}
5602
5603/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5604///
5605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
5606#[inline]
5607#[target_feature(enable = "avx512f,avx512vl")]
5608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5609#[cfg_attr(test, assert_instr(vscalefps))]
5610pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5611    transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k))
5612}
5613
5614/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5615///
5616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
5617#[inline]
5618#[target_feature(enable = "avx512f")]
5619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5620#[cfg_attr(test, assert_instr(vscalefpd))]
5621pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
5622    transmute(vscalefpd(
5623        a.as_f64x8(),
5624        b.as_f64x8(),
5625        f64x8::ZERO,
5626        0b11111111,
5627        _MM_FROUND_CUR_DIRECTION,
5628    ))
5629}
5630
5631/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5632///
5633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
5634#[inline]
5635#[target_feature(enable = "avx512f")]
5636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5637#[cfg_attr(test, assert_instr(vscalefpd))]
5638pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5639    transmute(vscalefpd(
5640        a.as_f64x8(),
5641        b.as_f64x8(),
5642        src.as_f64x8(),
5643        k,
5644        _MM_FROUND_CUR_DIRECTION,
5645    ))
5646}
5647
5648/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5649///
5650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
5651#[inline]
5652#[target_feature(enable = "avx512f")]
5653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5654#[cfg_attr(test, assert_instr(vscalefpd))]
5655pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5656    transmute(vscalefpd(
5657        a.as_f64x8(),
5658        b.as_f64x8(),
5659        f64x8::ZERO,
5660        k,
5661        _MM_FROUND_CUR_DIRECTION,
5662    ))
5663}
5664
5665/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5666///
5667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
5668#[inline]
5669#[target_feature(enable = "avx512f,avx512vl")]
5670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5671#[cfg_attr(test, assert_instr(vscalefpd))]
5672pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
5673    transmute(vscalefpd256(
5674        a.as_f64x4(),
5675        b.as_f64x4(),
5676        f64x4::ZERO,
5677        0b00001111,
5678    ))
5679}
5680
5681/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5682///
5683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
5684#[inline]
5685#[target_feature(enable = "avx512f,avx512vl")]
5686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5687#[cfg_attr(test, assert_instr(vscalefpd))]
5688pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5689    transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k))
5690}
5691
5692/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5693///
5694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
5695#[inline]
5696#[target_feature(enable = "avx512f,avx512vl")]
5697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5698#[cfg_attr(test, assert_instr(vscalefpd))]
5699pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5700    transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k))
5701}
5702
5703/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
5704///
5705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
5706#[inline]
5707#[target_feature(enable = "avx512f,avx512vl")]
5708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5709#[cfg_attr(test, assert_instr(vscalefpd))]
5710pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
5711    transmute(vscalefpd128(
5712        a.as_f64x2(),
5713        b.as_f64x2(),
5714        f64x2::ZERO,
5715        0b00000011,
5716    ))
5717}
5718
5719/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5720///
5721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
5722#[inline]
5723#[target_feature(enable = "avx512f,avx512vl")]
5724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5725#[cfg_attr(test, assert_instr(vscalefpd))]
5726pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5727    transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
5728}
5729
5730/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5731///
5732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
5733#[inline]
5734#[target_feature(enable = "avx512f,avx512vl")]
5735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5736#[cfg_attr(test, assert_instr(vscalefpd))]
5737pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5738    transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k))
5739}
5740
5741/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
5742///
5743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
5744#[inline]
5745#[target_feature(enable = "avx512f")]
5746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5747#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5748#[rustc_legacy_const_generics(3)]
5749pub unsafe fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
5750    static_assert_uimm_bits!(IMM8, 8);
5751    let a = a.as_f32x16();
5752    let b = b.as_f32x16();
5753    let c = c.as_i32x16();
5754    let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
5755    transmute(r)
5756}
5757
5758/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5759///
5760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
5761#[inline]
5762#[target_feature(enable = "avx512f")]
5763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5764#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5765#[rustc_legacy_const_generics(4)]
5766pub unsafe fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
5767    a: __m512,
5768    k: __mmask16,
5769    b: __m512,
5770    c: __m512i,
5771) -> __m512 {
5772    static_assert_uimm_bits!(IMM8, 8);
5773    let a = a.as_f32x16();
5774    let b = b.as_f32x16();
5775    let c = c.as_i32x16();
5776    let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
5777    transmute(r)
5778}
5779
5780/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5781///
5782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
5783#[inline]
5784#[target_feature(enable = "avx512f")]
5785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5786#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5787#[rustc_legacy_const_generics(4)]
5788pub unsafe fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
5789    k: __mmask16,
5790    a: __m512,
5791    b: __m512,
5792    c: __m512i,
5793) -> __m512 {
5794    static_assert_uimm_bits!(IMM8, 8);
5795    let a = a.as_f32x16();
5796    let b = b.as_f32x16();
5797    let c = c.as_i32x16();
5798    let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
5799    transmute(r)
5800}
5801
5802/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
5803///
5804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
5805#[inline]
5806#[target_feature(enable = "avx512f,avx512vl")]
5807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5808#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5809#[rustc_legacy_const_generics(3)]
5810pub unsafe fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
5811    static_assert_uimm_bits!(IMM8, 8);
5812    let a = a.as_f32x8();
5813    let b = b.as_f32x8();
5814    let c = c.as_i32x8();
5815    let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
5816    transmute(r)
5817}
5818
5819/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5820///
5821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
5822#[inline]
5823#[target_feature(enable = "avx512f,avx512vl")]
5824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5825#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5826#[rustc_legacy_const_generics(4)]
5827pub unsafe fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
5828    a: __m256,
5829    k: __mmask8,
5830    b: __m256,
5831    c: __m256i,
5832) -> __m256 {
5833    static_assert_uimm_bits!(IMM8, 8);
5834    let a = a.as_f32x8();
5835    let b = b.as_f32x8();
5836    let c = c.as_i32x8();
5837    let r = vfixupimmps256(a, b, c, IMM8, k);
5838    transmute(r)
5839}
5840
5841/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5842///
5843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
5844#[inline]
5845#[target_feature(enable = "avx512f,avx512vl")]
5846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5847#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5848#[rustc_legacy_const_generics(4)]
5849pub unsafe fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
5850    k: __mmask8,
5851    a: __m256,
5852    b: __m256,
5853    c: __m256i,
5854) -> __m256 {
5855    static_assert_uimm_bits!(IMM8, 8);
5856    let a = a.as_f32x8();
5857    let b = b.as_f32x8();
5858    let c = c.as_i32x8();
5859    let r = vfixupimmpsz256(a, b, c, IMM8, k);
5860    transmute(r)
5861}
5862
5863/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
5864///
5865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
5866#[inline]
5867#[target_feature(enable = "avx512f,avx512vl")]
5868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5869#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5870#[rustc_legacy_const_generics(3)]
5871pub unsafe fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
5872    static_assert_uimm_bits!(IMM8, 8);
5873    let a = a.as_f32x4();
5874    let b = b.as_f32x4();
5875    let c = c.as_i32x4();
5876    let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
5877    transmute(r)
5878}
5879
5880/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5881///
5882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
5883#[inline]
5884#[target_feature(enable = "avx512f,avx512vl")]
5885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5886#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5887#[rustc_legacy_const_generics(4)]
5888pub unsafe fn _mm_mask_fixupimm_ps<const IMM8: i32>(
5889    a: __m128,
5890    k: __mmask8,
5891    b: __m128,
5892    c: __m128i,
5893) -> __m128 {
5894    static_assert_uimm_bits!(IMM8, 8);
5895    let a = a.as_f32x4();
5896    let b = b.as_f32x4();
5897    let c = c.as_i32x4();
5898    let r = vfixupimmps128(a, b, c, IMM8, k);
5899    transmute(r)
5900}
5901
5902/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5903///
5904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
5905#[inline]
5906#[target_feature(enable = "avx512f,avx512vl")]
5907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5908#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
5909#[rustc_legacy_const_generics(4)]
5910pub unsafe fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
5911    k: __mmask8,
5912    a: __m128,
5913    b: __m128,
5914    c: __m128i,
5915) -> __m128 {
5916    static_assert_uimm_bits!(IMM8, 8);
5917    let a = a.as_f32x4();
5918    let b = b.as_f32x4();
5919    let c = c.as_i32x4();
5920    let r = vfixupimmpsz128(a, b, c, IMM8, k);
5921    transmute(r)
5922}
5923
5924/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
5925///
5926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
5927#[inline]
5928#[target_feature(enable = "avx512f")]
5929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5930#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
5931#[rustc_legacy_const_generics(3)]
5932pub unsafe fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
5933    static_assert_uimm_bits!(IMM8, 8);
5934    let a = a.as_f64x8();
5935    let b = b.as_f64x8();
5936    let c = c.as_i64x8();
5937    let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
5938    transmute(r)
5939}
5940
5941/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
5944#[inline]
5945#[target_feature(enable = "avx512f")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(4)]
5949pub unsafe fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
5950    a: __m512d,
5951    k: __mmask8,
5952    b: __m512d,
5953    c: __m512i,
5954) -> __m512d {
5955    static_assert_uimm_bits!(IMM8, 8);
5956    let a = a.as_f64x8();
5957    let b = b.as_f64x8();
5958    let c = c.as_i64x8();
5959    let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
5960    transmute(r)
5961}
5962
5963/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
5964///
5965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
5966#[inline]
5967#[target_feature(enable = "avx512f")]
5968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5969#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
5970#[rustc_legacy_const_generics(4)]
5971pub unsafe fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
5972    k: __mmask8,
5973    a: __m512d,
5974    b: __m512d,
5975    c: __m512i,
5976) -> __m512d {
5977    static_assert_uimm_bits!(IMM8, 8);
5978    let a = a.as_f64x8();
5979    let b = b.as_f64x8();
5980    let c = c.as_i64x8();
5981    let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
5982    transmute(r)
5983}
5984
5985/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
5986///
5987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
5988#[inline]
5989#[target_feature(enable = "avx512f,avx512vl")]
5990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5991#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
5992#[rustc_legacy_const_generics(3)]
5993pub unsafe fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
5994    static_assert_uimm_bits!(IMM8, 8);
5995    let a = a.as_f64x4();
5996    let b = b.as_f64x4();
5997    let c = c.as_i64x4();
5998    let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
5999    transmute(r)
6000}
6001
6002/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6003///
6004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6005#[inline]
6006#[target_feature(enable = "avx512f,avx512vl")]
6007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6008#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6009#[rustc_legacy_const_generics(4)]
6010pub unsafe fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6011    a: __m256d,
6012    k: __mmask8,
6013    b: __m256d,
6014    c: __m256i,
6015) -> __m256d {
6016    static_assert_uimm_bits!(IMM8, 8);
6017    let a = a.as_f64x4();
6018    let b = b.as_f64x4();
6019    let c = c.as_i64x4();
6020    let r = vfixupimmpd256(a, b, c, IMM8, k);
6021    transmute(r)
6022}
6023
6024/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6025///
6026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6027#[inline]
6028#[target_feature(enable = "avx512f,avx512vl")]
6029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6030#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6031#[rustc_legacy_const_generics(4)]
6032pub unsafe fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6033    k: __mmask8,
6034    a: __m256d,
6035    b: __m256d,
6036    c: __m256i,
6037) -> __m256d {
6038    static_assert_uimm_bits!(IMM8, 8);
6039    let a = a.as_f64x4();
6040    let b = b.as_f64x4();
6041    let c = c.as_i64x4();
6042    let r = vfixupimmpdz256(a, b, c, IMM8, k);
6043    transmute(r)
6044}
6045
6046/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6047///
6048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6049#[inline]
6050#[target_feature(enable = "avx512f,avx512vl")]
6051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6052#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6053#[rustc_legacy_const_generics(3)]
6054pub unsafe fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6055    static_assert_uimm_bits!(IMM8, 8);
6056    let a = a.as_f64x2();
6057    let b = b.as_f64x2();
6058    let c = c.as_i64x2();
6059    let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6060    transmute(r)
6061}
6062
6063/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6064///
6065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6066#[inline]
6067#[target_feature(enable = "avx512f,avx512vl")]
6068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6069#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6070#[rustc_legacy_const_generics(4)]
6071pub unsafe fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6072    a: __m128d,
6073    k: __mmask8,
6074    b: __m128d,
6075    c: __m128i,
6076) -> __m128d {
6077    static_assert_uimm_bits!(IMM8, 8);
6078    let a = a.as_f64x2();
6079    let b = b.as_f64x2();
6080    let c = c.as_i64x2();
6081    let r = vfixupimmpd128(a, b, c, IMM8, k);
6082    transmute(r)
6083}
6084
6085/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6086///
6087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6088#[inline]
6089#[target_feature(enable = "avx512f,avx512vl")]
6090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6091#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6092#[rustc_legacy_const_generics(4)]
6093pub unsafe fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6094    k: __mmask8,
6095    a: __m128d,
6096    b: __m128d,
6097    c: __m128i,
6098) -> __m128d {
6099    static_assert_uimm_bits!(IMM8, 8);
6100    let a = a.as_f64x2();
6101    let b = b.as_f64x2();
6102    let c = c.as_i64x2();
6103    let r = vfixupimmpdz128(a, b, c, IMM8, k);
6104    transmute(r)
6105}
6106
6107/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6108///
6109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6110#[inline]
6111#[target_feature(enable = "avx512f")]
6112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6113#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6114#[rustc_legacy_const_generics(3)]
6115pub unsafe fn _mm512_ternarylogic_epi32<const IMM8: i32>(
6116    a: __m512i,
6117    b: __m512i,
6118    c: __m512i,
6119) -> __m512i {
6120    static_assert_uimm_bits!(IMM8, 8);
6121    let a = a.as_i32x16();
6122    let b = b.as_i32x16();
6123    let c = c.as_i32x16();
6124    let r = vpternlogd(a, b, c, IMM8);
6125    transmute(r)
6126}
6127
6128/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6129///
6130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6131#[inline]
6132#[target_feature(enable = "avx512f")]
6133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6134#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6135#[rustc_legacy_const_generics(4)]
6136pub unsafe fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6137    src: __m512i,
6138    k: __mmask16,
6139    a: __m512i,
6140    b: __m512i,
6141) -> __m512i {
6142    static_assert_uimm_bits!(IMM8, 8);
6143    let src = src.as_i32x16();
6144    let a = a.as_i32x16();
6145    let b = b.as_i32x16();
6146    let r = vpternlogd(src, a, b, IMM8);
6147    transmute(simd_select_bitmask(k, r, src))
6148}
6149
6150/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6151///
6152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6153#[inline]
6154#[target_feature(enable = "avx512f")]
6155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6156#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6157#[rustc_legacy_const_generics(4)]
6158pub unsafe fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6159    k: __mmask16,
6160    a: __m512i,
6161    b: __m512i,
6162    c: __m512i,
6163) -> __m512i {
6164    static_assert_uimm_bits!(IMM8, 8);
6165    let a = a.as_i32x16();
6166    let b = b.as_i32x16();
6167    let c = c.as_i32x16();
6168    let r = vpternlogd(a, b, c, IMM8);
6169    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6170}
6171
6172/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6173///
6174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6175#[inline]
6176#[target_feature(enable = "avx512f,avx512vl")]
6177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6178#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6179#[rustc_legacy_const_generics(3)]
6180pub unsafe fn _mm256_ternarylogic_epi32<const IMM8: i32>(
6181    a: __m256i,
6182    b: __m256i,
6183    c: __m256i,
6184) -> __m256i {
6185    static_assert_uimm_bits!(IMM8, 8);
6186    let a = a.as_i32x8();
6187    let b = b.as_i32x8();
6188    let c = c.as_i32x8();
6189    let r = vpternlogd256(a, b, c, IMM8);
6190    transmute(r)
6191}
6192
6193/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6194///
6195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6196#[inline]
6197#[target_feature(enable = "avx512f,avx512vl")]
6198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6199#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6200#[rustc_legacy_const_generics(4)]
6201pub unsafe fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6202    src: __m256i,
6203    k: __mmask8,
6204    a: __m256i,
6205    b: __m256i,
6206) -> __m256i {
6207    static_assert_uimm_bits!(IMM8, 8);
6208    let src = src.as_i32x8();
6209    let a = a.as_i32x8();
6210    let b = b.as_i32x8();
6211    let r = vpternlogd256(src, a, b, IMM8);
6212    transmute(simd_select_bitmask(k, r, src))
6213}
6214
6215/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6216///
6217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6218#[inline]
6219#[target_feature(enable = "avx512f,avx512vl")]
6220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6221#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6222#[rustc_legacy_const_generics(4)]
6223pub unsafe fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6224    k: __mmask8,
6225    a: __m256i,
6226    b: __m256i,
6227    c: __m256i,
6228) -> __m256i {
6229    static_assert_uimm_bits!(IMM8, 8);
6230    let a = a.as_i32x8();
6231    let b = b.as_i32x8();
6232    let c = c.as_i32x8();
6233    let r = vpternlogd256(a, b, c, IMM8);
6234    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
6235}
6236
6237/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6238///
6239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6240#[inline]
6241#[target_feature(enable = "avx512f,avx512vl")]
6242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6243#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6244#[rustc_legacy_const_generics(3)]
6245pub unsafe fn _mm_ternarylogic_epi32<const IMM8: i32>(
6246    a: __m128i,
6247    b: __m128i,
6248    c: __m128i,
6249) -> __m128i {
6250    static_assert_uimm_bits!(IMM8, 8);
6251    let a = a.as_i32x4();
6252    let b = b.as_i32x4();
6253    let c = c.as_i32x4();
6254    let r = vpternlogd128(a, b, c, IMM8);
6255    transmute(r)
6256}
6257
6258/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6259///
6260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6261#[inline]
6262#[target_feature(enable = "avx512f,avx512vl")]
6263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6264#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6265#[rustc_legacy_const_generics(4)]
6266pub unsafe fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6267    src: __m128i,
6268    k: __mmask8,
6269    a: __m128i,
6270    b: __m128i,
6271) -> __m128i {
6272    static_assert_uimm_bits!(IMM8, 8);
6273    let src = src.as_i32x4();
6274    let a = a.as_i32x4();
6275    let b = b.as_i32x4();
6276    let r = vpternlogd128(src, a, b, IMM8);
6277    transmute(simd_select_bitmask(k, r, src))
6278}
6279
6280/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6281///
6282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6283#[inline]
6284#[target_feature(enable = "avx512f,avx512vl")]
6285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6286#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6287#[rustc_legacy_const_generics(4)]
6288pub unsafe fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6289    k: __mmask8,
6290    a: __m128i,
6291    b: __m128i,
6292    c: __m128i,
6293) -> __m128i {
6294    static_assert_uimm_bits!(IMM8, 8);
6295    let a = a.as_i32x4();
6296    let b = b.as_i32x4();
6297    let c = c.as_i32x4();
6298    let r = vpternlogd128(a, b, c, IMM8);
6299    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
6300}
6301
6302/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6303///
6304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6305#[inline]
6306#[target_feature(enable = "avx512f")]
6307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6308#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6309#[rustc_legacy_const_generics(3)]
6310pub unsafe fn _mm512_ternarylogic_epi64<const IMM8: i32>(
6311    a: __m512i,
6312    b: __m512i,
6313    c: __m512i,
6314) -> __m512i {
6315    static_assert_uimm_bits!(IMM8, 8);
6316    let a = a.as_i64x8();
6317    let b = b.as_i64x8();
6318    let c = c.as_i64x8();
6319    let r = vpternlogq(a, b, c, IMM8);
6320    transmute(r)
6321}
6322
6323/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6324///
6325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6326#[inline]
6327#[target_feature(enable = "avx512f")]
6328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6329#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6330#[rustc_legacy_const_generics(4)]
6331pub unsafe fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6332    src: __m512i,
6333    k: __mmask8,
6334    a: __m512i,
6335    b: __m512i,
6336) -> __m512i {
6337    static_assert_uimm_bits!(IMM8, 8);
6338    let src = src.as_i64x8();
6339    let a = a.as_i64x8();
6340    let b = b.as_i64x8();
6341    let r = vpternlogq(src, a, b, IMM8);
6342    transmute(simd_select_bitmask(k, r, src))
6343}
6344
6345/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6346///
6347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6348#[inline]
6349#[target_feature(enable = "avx512f")]
6350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6351#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6352#[rustc_legacy_const_generics(4)]
6353pub unsafe fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6354    k: __mmask8,
6355    a: __m512i,
6356    b: __m512i,
6357    c: __m512i,
6358) -> __m512i {
6359    static_assert_uimm_bits!(IMM8, 8);
6360    let a = a.as_i64x8();
6361    let b = b.as_i64x8();
6362    let c = c.as_i64x8();
6363    let r = vpternlogq(a, b, c, IMM8);
6364    transmute(simd_select_bitmask(k, r, i64x8::ZERO))
6365}
6366
6367/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6368///
6369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6370#[inline]
6371#[target_feature(enable = "avx512f,avx512vl")]
6372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6373#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6374#[rustc_legacy_const_generics(3)]
6375pub unsafe fn _mm256_ternarylogic_epi64<const IMM8: i32>(
6376    a: __m256i,
6377    b: __m256i,
6378    c: __m256i,
6379) -> __m256i {
6380    static_assert_uimm_bits!(IMM8, 8);
6381    let a = a.as_i64x4();
6382    let b = b.as_i64x4();
6383    let c = c.as_i64x4();
6384    let r = vpternlogq256(a, b, c, IMM8);
6385    transmute(r)
6386}
6387
6388/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6389///
6390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6391#[inline]
6392#[target_feature(enable = "avx512f,avx512vl")]
6393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6394#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6395#[rustc_legacy_const_generics(4)]
6396pub unsafe fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6397    src: __m256i,
6398    k: __mmask8,
6399    a: __m256i,
6400    b: __m256i,
6401) -> __m256i {
6402    static_assert_uimm_bits!(IMM8, 8);
6403    let src = src.as_i64x4();
6404    let a = a.as_i64x4();
6405    let b = b.as_i64x4();
6406    let r = vpternlogq256(src, a, b, IMM8);
6407    transmute(simd_select_bitmask(k, r, src))
6408}
6409
6410/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6411///
6412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6413#[inline]
6414#[target_feature(enable = "avx512f,avx512vl")]
6415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6416#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6417#[rustc_legacy_const_generics(4)]
6418pub unsafe fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6419    k: __mmask8,
6420    a: __m256i,
6421    b: __m256i,
6422    c: __m256i,
6423) -> __m256i {
6424    static_assert_uimm_bits!(IMM8, 8);
6425    let a = a.as_i64x4();
6426    let b = b.as_i64x4();
6427    let c = c.as_i64x4();
6428    let r = vpternlogq256(a, b, c, IMM8);
6429    transmute(simd_select_bitmask(k, r, i64x4::ZERO))
6430}
6431
6432/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6433///
6434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6435#[inline]
6436#[target_feature(enable = "avx512f,avx512vl")]
6437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6438#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6439#[rustc_legacy_const_generics(3)]
6440pub unsafe fn _mm_ternarylogic_epi64<const IMM8: i32>(
6441    a: __m128i,
6442    b: __m128i,
6443    c: __m128i,
6444) -> __m128i {
6445    static_assert_uimm_bits!(IMM8, 8);
6446    let a = a.as_i64x2();
6447    let b = b.as_i64x2();
6448    let c = c.as_i64x2();
6449    let r = vpternlogq128(a, b, c, IMM8);
6450    transmute(r)
6451}
6452
6453/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6454///
6455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6456#[inline]
6457#[target_feature(enable = "avx512f,avx512vl")]
6458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6459#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6460#[rustc_legacy_const_generics(4)]
6461pub unsafe fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6462    src: __m128i,
6463    k: __mmask8,
6464    a: __m128i,
6465    b: __m128i,
6466) -> __m128i {
6467    static_assert_uimm_bits!(IMM8, 8);
6468    let src = src.as_i64x2();
6469    let a = a.as_i64x2();
6470    let b = b.as_i64x2();
6471    let r = vpternlogq128(src, a, b, IMM8);
6472    transmute(simd_select_bitmask(k, r, src))
6473}
6474
6475/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6476///
6477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
6478#[inline]
6479#[target_feature(enable = "avx512f,avx512vl")]
6480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6481#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6482#[rustc_legacy_const_generics(4)]
6483pub unsafe fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
6484    k: __mmask8,
6485    a: __m128i,
6486    b: __m128i,
6487    c: __m128i,
6488) -> __m128i {
6489    static_assert_uimm_bits!(IMM8, 8);
6490    let a = a.as_i64x2();
6491    let b = b.as_i64x2();
6492    let c = c.as_i64x2();
6493    let r = vpternlogq128(a, b, c, IMM8);
6494    transmute(simd_select_bitmask(k, r, i64x2::ZERO))
6495}
6496
6497/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6498/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6499///    _MM_MANT_NORM_1_2     // interval [1, 2)
6500///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
6501///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
6502///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6503/// The sign is determined by sc which can take the following values:
6504///    _MM_MANT_SIGN_src     // sign = sign(src)
6505///    _MM_MANT_SIGN_zero    // sign = 0
6506///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6507///
6508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
6509#[inline]
6510#[target_feature(enable = "avx512f")]
6511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6512#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6513#[rustc_legacy_const_generics(1, 2)]
6514pub unsafe fn _mm512_getmant_ps<
6515    const NORM: _MM_MANTISSA_NORM_ENUM,
6516    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6517>(
6518    a: __m512,
6519) -> __m512 {
6520    static_assert_uimm_bits!(NORM, 4);
6521    static_assert_uimm_bits!(SIGN, 2);
6522    let a = a.as_f32x16();
6523    let zero = f32x16::ZERO;
6524    let r = vgetmantps(
6525        a,
6526        SIGN << 2 | NORM,
6527        zero,
6528        0b11111111_11111111,
6529        _MM_FROUND_CUR_DIRECTION,
6530    );
6531    transmute(r)
6532}
6533
6534/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6535/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6536///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6537///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6538///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6539///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6540/// The sign is determined by sc which can take the following values:\
6541///    _MM_MANT_SIGN_src     // sign = sign(src)\
6542///    _MM_MANT_SIGN_zero    // sign = 0\
6543///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
6546#[inline]
6547#[target_feature(enable = "avx512f")]
6548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6550#[rustc_legacy_const_generics(3, 4)]
6551pub unsafe fn _mm512_mask_getmant_ps<
6552    const NORM: _MM_MANTISSA_NORM_ENUM,
6553    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6554>(
6555    src: __m512,
6556    k: __mmask16,
6557    a: __m512,
6558) -> __m512 {
6559    static_assert_uimm_bits!(NORM, 4);
6560    static_assert_uimm_bits!(SIGN, 2);
6561    let a = a.as_f32x16();
6562    let src = src.as_f32x16();
6563    let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
6564    transmute(r)
6565}
6566
6567/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6568/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6569///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6570///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6571///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6572///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6573/// The sign is determined by sc which can take the following values:\
6574///    _MM_MANT_SIGN_src     // sign = sign(src)\
6575///    _MM_MANT_SIGN_zero    // sign = 0\
6576///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6577///
6578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
6579#[inline]
6580#[target_feature(enable = "avx512f")]
6581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6582#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6583#[rustc_legacy_const_generics(2, 3)]
6584pub unsafe fn _mm512_maskz_getmant_ps<
6585    const NORM: _MM_MANTISSA_NORM_ENUM,
6586    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6587>(
6588    k: __mmask16,
6589    a: __m512,
6590) -> __m512 {
6591    static_assert_uimm_bits!(NORM, 4);
6592    static_assert_uimm_bits!(SIGN, 2);
6593    let a = a.as_f32x16();
6594    let r = vgetmantps(
6595        a,
6596        SIGN << 2 | NORM,
6597        f32x16::ZERO,
6598        k,
6599        _MM_FROUND_CUR_DIRECTION,
6600    );
6601    transmute(r)
6602}
6603
6604/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6605/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6606///    _MM_MANT_NORM_1_2     // interval [1, 2)
6607///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
6608///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
6609///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6610/// The sign is determined by sc which can take the following values:
6611///    _MM_MANT_SIGN_src     // sign = sign(src)
6612///    _MM_MANT_SIGN_zero    // sign = 0
6613///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6614///
6615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
6616#[inline]
6617#[target_feature(enable = "avx512f,avx512vl")]
6618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6619#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6620#[rustc_legacy_const_generics(1, 2)]
6621pub unsafe fn _mm256_getmant_ps<
6622    const NORM: _MM_MANTISSA_NORM_ENUM,
6623    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6624>(
6625    a: __m256,
6626) -> __m256 {
6627    static_assert_uimm_bits!(NORM, 4);
6628    static_assert_uimm_bits!(SIGN, 2);
6629    let a = a.as_f32x8();
6630    let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
6631    transmute(r)
6632}
6633
6634/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6635/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6636///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6637///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6638///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6639///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6640/// The sign is determined by sc which can take the following values:\
6641///    _MM_MANT_SIGN_src     // sign = sign(src)\
6642///    _MM_MANT_SIGN_zero    // sign = 0\
6643///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6644///
6645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
6646#[inline]
6647#[target_feature(enable = "avx512f,avx512vl")]
6648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6649#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6650#[rustc_legacy_const_generics(3, 4)]
6651pub unsafe fn _mm256_mask_getmant_ps<
6652    const NORM: _MM_MANTISSA_NORM_ENUM,
6653    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6654>(
6655    src: __m256,
6656    k: __mmask8,
6657    a: __m256,
6658) -> __m256 {
6659    static_assert_uimm_bits!(NORM, 4);
6660    static_assert_uimm_bits!(SIGN, 2);
6661    let a = a.as_f32x8();
6662    let src = src.as_f32x8();
6663    let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
6664    transmute(r)
6665}
6666
6667/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6668/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6669///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6670///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6671///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6672///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6673/// The sign is determined by sc which can take the following values:\
6674///    _MM_MANT_SIGN_src     // sign = sign(src)\
6675///    _MM_MANT_SIGN_zero    // sign = 0\
6676///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6677///
6678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
6679#[inline]
6680#[target_feature(enable = "avx512f,avx512vl")]
6681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6682#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6683#[rustc_legacy_const_generics(2, 3)]
6684pub unsafe fn _mm256_maskz_getmant_ps<
6685    const NORM: _MM_MANTISSA_NORM_ENUM,
6686    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6687>(
6688    k: __mmask8,
6689    a: __m256,
6690) -> __m256 {
6691    static_assert_uimm_bits!(NORM, 4);
6692    static_assert_uimm_bits!(SIGN, 2);
6693    let a = a.as_f32x8();
6694    let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
6695    transmute(r)
6696}
6697
6698/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
6699/// The mantissa is normalized to the interval specified by interv, which can take the following values:
6700///    _MM_MANT_NORM_1_2     // interval [1, 2)
6701///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
6702///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
6703///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
6704/// The sign is determined by sc which can take the following values:
6705///    _MM_MANT_SIGN_src     // sign = sign(src)
6706///    _MM_MANT_SIGN_zero    // sign = 0
6707///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6708///
6709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
6710#[inline]
6711#[target_feature(enable = "avx512f,avx512vl")]
6712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6713#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6714#[rustc_legacy_const_generics(1, 2)]
6715pub unsafe fn _mm_getmant_ps<
6716    const NORM: _MM_MANTISSA_NORM_ENUM,
6717    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6718>(
6719    a: __m128,
6720) -> __m128 {
6721    static_assert_uimm_bits!(NORM, 4);
6722    static_assert_uimm_bits!(SIGN, 2);
6723    let a = a.as_f32x4();
6724    let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
6725    transmute(r)
6726}
6727
6728/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6729/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6730///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6731///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6732///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6733///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6734/// The sign is determined by sc which can take the following values:\
6735///    _MM_MANT_SIGN_src     // sign = sign(src)\
6736///    _MM_MANT_SIGN_zero    // sign = 0\
6737///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6738///
6739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
6740#[inline]
6741#[target_feature(enable = "avx512f,avx512vl")]
6742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6743#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6744#[rustc_legacy_const_generics(3, 4)]
6745pub unsafe fn _mm_mask_getmant_ps<
6746    const NORM: _MM_MANTISSA_NORM_ENUM,
6747    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6748>(
6749    src: __m128,
6750    k: __mmask8,
6751    a: __m128,
6752) -> __m128 {
6753    static_assert_uimm_bits!(NORM, 4);
6754    static_assert_uimm_bits!(SIGN, 2);
6755    let a = a.as_f32x4();
6756    let src = src.as_f32x4();
6757    let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
6758    transmute(r)
6759}
6760
6761/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6762/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6763///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6764///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6765///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6766///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6767/// The sign is determined by sc which can take the following values:\
6768///    _MM_MANT_SIGN_src     // sign = sign(src)\
6769///    _MM_MANT_SIGN_zero    // sign = 0\
6770///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6771///
6772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
6773#[inline]
6774#[target_feature(enable = "avx512f,avx512vl")]
6775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6776#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
6777#[rustc_legacy_const_generics(2, 3)]
6778pub unsafe fn _mm_maskz_getmant_ps<
6779    const NORM: _MM_MANTISSA_NORM_ENUM,
6780    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6781>(
6782    k: __mmask8,
6783    a: __m128,
6784) -> __m128 {
6785    static_assert_uimm_bits!(NORM, 4);
6786    static_assert_uimm_bits!(SIGN, 2);
6787    let a = a.as_f32x4();
6788    let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
6789    transmute(r)
6790}
6791
6792/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6793/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6794///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6795///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6796///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6797///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6798/// The sign is determined by sc which can take the following values:\
6799///    _MM_MANT_SIGN_src     // sign = sign(src)\
6800///    _MM_MANT_SIGN_zero    // sign = 0\
6801///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6802///
6803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
6804#[inline]
6805#[target_feature(enable = "avx512f")]
6806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6807#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
6808#[rustc_legacy_const_generics(1, 2)]
6809pub unsafe fn _mm512_getmant_pd<
6810    const NORM: _MM_MANTISSA_NORM_ENUM,
6811    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6812>(
6813    a: __m512d,
6814) -> __m512d {
6815    static_assert_uimm_bits!(NORM, 4);
6816    static_assert_uimm_bits!(SIGN, 2);
6817    let a = a.as_f64x8();
6818    let zero = f64x8::ZERO;
6819    let r = vgetmantpd(
6820        a,
6821        SIGN << 2 | NORM,
6822        zero,
6823        0b11111111,
6824        _MM_FROUND_CUR_DIRECTION,
6825    );
6826    transmute(r)
6827}
6828
6829/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6830/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6831///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6832///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6833///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6834///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6835/// The sign is determined by sc which can take the following values:\
6836///    _MM_MANT_SIGN_src     // sign = sign(src)\
6837///    _MM_MANT_SIGN_zero    // sign = 0\
6838///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6839///
6840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
6841#[inline]
6842#[target_feature(enable = "avx512f")]
6843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6844#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
6845#[rustc_legacy_const_generics(3, 4)]
6846pub unsafe fn _mm512_mask_getmant_pd<
6847    const NORM: _MM_MANTISSA_NORM_ENUM,
6848    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6849>(
6850    src: __m512d,
6851    k: __mmask8,
6852    a: __m512d,
6853) -> __m512d {
6854    static_assert_uimm_bits!(NORM, 4);
6855    static_assert_uimm_bits!(SIGN, 2);
6856    let a = a.as_f64x8();
6857    let src = src.as_f64x8();
6858    let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
6859    transmute(r)
6860}
6861
6862/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6863/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6864///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6865///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6866///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6867///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6868/// The sign is determined by sc which can take the following values:\
6869///    _MM_MANT_SIGN_src     // sign = sign(src)\
6870///    _MM_MANT_SIGN_zero    // sign = 0\
6871///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6872///
6873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
6874#[inline]
6875#[target_feature(enable = "avx512f")]
6876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6877#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
6878#[rustc_legacy_const_generics(2, 3)]
6879pub unsafe fn _mm512_maskz_getmant_pd<
6880    const NORM: _MM_MANTISSA_NORM_ENUM,
6881    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6882>(
6883    k: __mmask8,
6884    a: __m512d,
6885) -> __m512d {
6886    static_assert_uimm_bits!(NORM, 4);
6887    static_assert_uimm_bits!(SIGN, 2);
6888    let a = a.as_f64x8();
6889    let r = vgetmantpd(
6890        a,
6891        SIGN << 2 | NORM,
6892        f64x8::ZERO,
6893        k,
6894        _MM_FROUND_CUR_DIRECTION,
6895    );
6896    transmute(r)
6897}
6898
6899/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6900/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6901///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6902///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6903///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6904///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6905/// The sign is determined by sc which can take the following values:\
6906///    _MM_MANT_SIGN_src     // sign = sign(src)\
6907///    _MM_MANT_SIGN_zero    // sign = 0\
6908///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6909///
6910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
6911#[inline]
6912#[target_feature(enable = "avx512f,avx512vl")]
6913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6914#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
6915#[rustc_legacy_const_generics(1, 2)]
6916pub unsafe fn _mm256_getmant_pd<
6917    const NORM: _MM_MANTISSA_NORM_ENUM,
6918    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6919>(
6920    a: __m256d,
6921) -> __m256d {
6922    static_assert_uimm_bits!(NORM, 4);
6923    static_assert_uimm_bits!(SIGN, 2);
6924    let a = a.as_f64x4();
6925    let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
6926    transmute(r)
6927}
6928
6929/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6930/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6931///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6932///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6933///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6934///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6935/// The sign is determined by sc which can take the following values:\
6936///    _MM_MANT_SIGN_src     // sign = sign(src)\
6937///    _MM_MANT_SIGN_zero    // sign = 0\
6938///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6939///
6940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
6941#[inline]
6942#[target_feature(enable = "avx512f,avx512vl")]
6943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6944#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
6945#[rustc_legacy_const_generics(3, 4)]
6946pub unsafe fn _mm256_mask_getmant_pd<
6947    const NORM: _MM_MANTISSA_NORM_ENUM,
6948    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6949>(
6950    src: __m256d,
6951    k: __mmask8,
6952    a: __m256d,
6953) -> __m256d {
6954    static_assert_uimm_bits!(NORM, 4);
6955    static_assert_uimm_bits!(SIGN, 2);
6956    let a = a.as_f64x4();
6957    let src = src.as_f64x4();
6958    let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
6959    transmute(r)
6960}
6961
6962/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6963/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6964///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6965///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6966///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6967///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6968/// The sign is determined by sc which can take the following values:\
6969///    _MM_MANT_SIGN_src     // sign = sign(src)\
6970///    _MM_MANT_SIGN_zero    // sign = 0\
6971///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
6972///
6973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
6974#[inline]
6975#[target_feature(enable = "avx512f,avx512vl")]
6976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6977#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
6978#[rustc_legacy_const_generics(2, 3)]
6979pub unsafe fn _mm256_maskz_getmant_pd<
6980    const NORM: _MM_MANTISSA_NORM_ENUM,
6981    const SIGN: _MM_MANTISSA_SIGN_ENUM,
6982>(
6983    k: __mmask8,
6984    a: __m256d,
6985) -> __m256d {
6986    static_assert_uimm_bits!(NORM, 4);
6987    static_assert_uimm_bits!(SIGN, 2);
6988    let a = a.as_f64x4();
6989    let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
6990    transmute(r)
6991}
6992
6993/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6994/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6995///    _MM_MANT_NORM_1_2     // interval [1, 2)\
6996///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
6997///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
6998///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6999/// The sign is determined by sc which can take the following values:\
7000///    _MM_MANT_SIGN_src     // sign = sign(src)\
7001///    _MM_MANT_SIGN_zero    // sign = 0\
7002///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7003///
7004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7005#[inline]
7006#[target_feature(enable = "avx512f,avx512vl")]
7007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7008#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7009#[rustc_legacy_const_generics(1, 2)]
7010pub unsafe fn _mm_getmant_pd<
7011    const NORM: _MM_MANTISSA_NORM_ENUM,
7012    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7013>(
7014    a: __m128d,
7015) -> __m128d {
7016    static_assert_uimm_bits!(NORM, 4);
7017    static_assert_uimm_bits!(SIGN, 2);
7018    let a = a.as_f64x2();
7019    let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7020    transmute(r)
7021}
7022
7023/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7024/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7025///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7026///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7027///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7028///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7029/// The sign is determined by sc which can take the following values:\
7030///    _MM_MANT_SIGN_src     // sign = sign(src)\
7031///    _MM_MANT_SIGN_zero    // sign = 0\
7032///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7033///
7034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7035#[inline]
7036#[target_feature(enable = "avx512f,avx512vl")]
7037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7038#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7039#[rustc_legacy_const_generics(3, 4)]
7040pub unsafe fn _mm_mask_getmant_pd<
7041    const NORM: _MM_MANTISSA_NORM_ENUM,
7042    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7043>(
7044    src: __m128d,
7045    k: __mmask8,
7046    a: __m128d,
7047) -> __m128d {
7048    static_assert_uimm_bits!(NORM, 4);
7049    static_assert_uimm_bits!(SIGN, 2);
7050    let a = a.as_f64x2();
7051    let src = src.as_f64x2();
7052    let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7053    transmute(r)
7054}
7055
7056/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7057/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7058///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7059///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7060///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7061///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7062/// The sign is determined by sc which can take the following values:\
7063///    _MM_MANT_SIGN_src     // sign = sign(src)\
7064///    _MM_MANT_SIGN_zero    // sign = 0\
7065///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7066///
7067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7068#[inline]
7069#[target_feature(enable = "avx512f,avx512vl")]
7070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7071#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7072#[rustc_legacy_const_generics(2, 3)]
7073pub unsafe fn _mm_maskz_getmant_pd<
7074    const NORM: _MM_MANTISSA_NORM_ENUM,
7075    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7076>(
7077    k: __mmask8,
7078    a: __m128d,
7079) -> __m128d {
7080    static_assert_uimm_bits!(NORM, 4);
7081    static_assert_uimm_bits!(SIGN, 2);
7082    let a = a.as_f64x2();
7083    let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7084    transmute(r)
7085}
7086
7087/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7088///
7089/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7090/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7091/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7092/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7093/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7094/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7095///
7096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7097#[inline]
7098#[target_feature(enable = "avx512f")]
7099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7100#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7101#[rustc_legacy_const_generics(2)]
7102pub unsafe fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7103    static_assert_rounding!(ROUNDING);
7104    let a = a.as_f32x16();
7105    let b = b.as_f32x16();
7106    let r = vaddps(a, b, ROUNDING);
7107    transmute(r)
7108}
7109
7110/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7111///
7112/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7113/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7114/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7115/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7116/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7117/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7118///
7119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7120#[inline]
7121#[target_feature(enable = "avx512f")]
7122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7123#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7124#[rustc_legacy_const_generics(4)]
7125pub unsafe fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7126    src: __m512,
7127    k: __mmask16,
7128    a: __m512,
7129    b: __m512,
7130) -> __m512 {
7131    static_assert_rounding!(ROUNDING);
7132    let a = a.as_f32x16();
7133    let b = b.as_f32x16();
7134    let r = vaddps(a, b, ROUNDING);
7135    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7136}
7137
7138/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7139///
7140/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7141/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7142/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7143/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7144/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7145/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7146///
7147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7148#[inline]
7149#[target_feature(enable = "avx512f")]
7150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7151#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7152#[rustc_legacy_const_generics(3)]
7153pub unsafe fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7154    k: __mmask16,
7155    a: __m512,
7156    b: __m512,
7157) -> __m512 {
7158    static_assert_rounding!(ROUNDING);
7159    let a = a.as_f32x16();
7160    let b = b.as_f32x16();
7161    let r = vaddps(a, b, ROUNDING);
7162    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7163}
7164
7165/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7166///
7167/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7168/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7169/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7170/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7171/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7172/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7173///
7174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7175#[inline]
7176#[target_feature(enable = "avx512f")]
7177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7178#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7179#[rustc_legacy_const_generics(2)]
7180pub unsafe fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7181    static_assert_rounding!(ROUNDING);
7182    let a = a.as_f64x8();
7183    let b = b.as_f64x8();
7184    let r = vaddpd(a, b, ROUNDING);
7185    transmute(r)
7186}
7187
7188/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7189///
7190/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7191/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7192/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7193/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7194/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7196///
7197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7198#[inline]
7199#[target_feature(enable = "avx512f")]
7200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7201#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7202#[rustc_legacy_const_generics(4)]
7203pub unsafe fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7204    src: __m512d,
7205    k: __mmask8,
7206    a: __m512d,
7207    b: __m512d,
7208) -> __m512d {
7209    static_assert_rounding!(ROUNDING);
7210    let a = a.as_f64x8();
7211    let b = b.as_f64x8();
7212    let r = vaddpd(a, b, ROUNDING);
7213    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7214}
7215
7216/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7217///
7218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7224///
7225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7226#[inline]
7227#[target_feature(enable = "avx512f")]
7228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7229#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7230#[rustc_legacy_const_generics(3)]
7231pub unsafe fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7232    k: __mmask8,
7233    a: __m512d,
7234    b: __m512d,
7235) -> __m512d {
7236    static_assert_rounding!(ROUNDING);
7237    let a = a.as_f64x8();
7238    let b = b.as_f64x8();
7239    let r = vaddpd(a, b, ROUNDING);
7240    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7241}
7242
7243/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7244///
7245/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7246/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7247/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7248/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7249/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7250/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7253#[inline]
7254#[target_feature(enable = "avx512f")]
7255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7256#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7257#[rustc_legacy_const_generics(2)]
7258pub unsafe fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7259    static_assert_rounding!(ROUNDING);
7260    let a = a.as_f32x16();
7261    let b = b.as_f32x16();
7262    let r = vsubps(a, b, ROUNDING);
7263    transmute(r)
7264}
7265
7266/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7267///
7268/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7269/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7270/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7271/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7272/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7273/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7274///
7275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7276#[inline]
7277#[target_feature(enable = "avx512f")]
7278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7279#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7280#[rustc_legacy_const_generics(4)]
7281pub unsafe fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7282    src: __m512,
7283    k: __mmask16,
7284    a: __m512,
7285    b: __m512,
7286) -> __m512 {
7287    static_assert_rounding!(ROUNDING);
7288    let a = a.as_f32x16();
7289    let b = b.as_f32x16();
7290    let r = vsubps(a, b, ROUNDING);
7291    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7292}
7293
7294/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7295///
7296/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7297/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7298/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7299/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7300/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7302///
7303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7304#[inline]
7305#[target_feature(enable = "avx512f")]
7306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7307#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7308#[rustc_legacy_const_generics(3)]
7309pub unsafe fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7310    k: __mmask16,
7311    a: __m512,
7312    b: __m512,
7313) -> __m512 {
7314    static_assert_rounding!(ROUNDING);
7315    let a = a.as_f32x16();
7316    let b = b.as_f32x16();
7317    let r = vsubps(a, b, ROUNDING);
7318    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7319}
7320
7321/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7322///
7323/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7324/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7325/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7326/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7327/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7328/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7329///
7330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7331#[inline]
7332#[target_feature(enable = "avx512f")]
7333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7334#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7335#[rustc_legacy_const_generics(2)]
7336pub unsafe fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7337    static_assert_rounding!(ROUNDING);
7338    let a = a.as_f64x8();
7339    let b = b.as_f64x8();
7340    let r = vsubpd(a, b, ROUNDING);
7341    transmute(r)
7342}
7343
7344/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7345///
7346/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7347/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7348/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7349/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7350/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7351/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7352///
7353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7354#[inline]
7355#[target_feature(enable = "avx512f")]
7356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7357#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7358#[rustc_legacy_const_generics(4)]
7359pub unsafe fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7360    src: __m512d,
7361    k: __mmask8,
7362    a: __m512d,
7363    b: __m512d,
7364) -> __m512d {
7365    static_assert_rounding!(ROUNDING);
7366    let a = a.as_f64x8();
7367    let b = b.as_f64x8();
7368    let r = vsubpd(a, b, ROUNDING);
7369    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7370}
7371
7372/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7373///
7374/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7375/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7376/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7377/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7378/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7379/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7380///
7381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7382#[inline]
7383#[target_feature(enable = "avx512f")]
7384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7385#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7386#[rustc_legacy_const_generics(3)]
7387pub unsafe fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7388    k: __mmask8,
7389    a: __m512d,
7390    b: __m512d,
7391) -> __m512d {
7392    static_assert_rounding!(ROUNDING);
7393    let a = a.as_f64x8();
7394    let b = b.as_f64x8();
7395    let r = vsubpd(a, b, ROUNDING);
7396    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7397}
7398
7399/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7400///
7401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7407///
7408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7409#[inline]
7410#[target_feature(enable = "avx512f")]
7411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7412#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7413#[rustc_legacy_const_generics(2)]
7414pub unsafe fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7415    static_assert_rounding!(ROUNDING);
7416    let a = a.as_f32x16();
7417    let b = b.as_f32x16();
7418    let r = vmulps(a, b, ROUNDING);
7419    transmute(r)
7420}
7421
7422/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7423///
7424/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7425/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7426/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7427/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7428/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7429/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7430///
7431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
7432#[inline]
7433#[target_feature(enable = "avx512f")]
7434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7435#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7436#[rustc_legacy_const_generics(4)]
7437pub unsafe fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
7438    src: __m512,
7439    k: __mmask16,
7440    a: __m512,
7441    b: __m512,
7442) -> __m512 {
7443    static_assert_rounding!(ROUNDING);
7444    let a = a.as_f32x16();
7445    let b = b.as_f32x16();
7446    let r = vmulps(a, b, ROUNDING);
7447    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7448}
7449
7450/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7451///
7452/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7458///
7459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
7460#[inline]
7461#[target_feature(enable = "avx512f")]
7462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7463#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7464#[rustc_legacy_const_generics(3)]
7465pub unsafe fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
7466    k: __mmask16,
7467    a: __m512,
7468    b: __m512,
7469) -> __m512 {
7470    static_assert_rounding!(ROUNDING);
7471    let a = a.as_f32x16();
7472    let b = b.as_f32x16();
7473    let r = vmulps(a, b, ROUNDING);
7474    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7475}
7476
7477/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7478///
7479/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7480/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7481/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7482/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7483/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7484/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7485///
7486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
7487#[inline]
7488#[target_feature(enable = "avx512f")]
7489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7490#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
7491#[rustc_legacy_const_generics(2)]
7492pub unsafe fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7493    static_assert_rounding!(ROUNDING);
7494    let a = a.as_f64x8();
7495    let b = b.as_f64x8();
7496    let r = vmulpd(a, b, ROUNDING);
7497    transmute(r)
7498}
7499
7500/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7501///
7502/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7503/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7504/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7505/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7506/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7507/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7508///
7509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
7510#[inline]
7511#[target_feature(enable = "avx512f")]
7512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7513#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
7514#[rustc_legacy_const_generics(4)]
7515pub unsafe fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
7516    src: __m512d,
7517    k: __mmask8,
7518    a: __m512d,
7519    b: __m512d,
7520) -> __m512d {
7521    static_assert_rounding!(ROUNDING);
7522    let a = a.as_f64x8();
7523    let b = b.as_f64x8();
7524    let r = vmulpd(a, b, ROUNDING);
7525    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7526}
7527
7528/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7529///
7530/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7531/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7532/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7533/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7534/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7535/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7536///
7537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
7538#[inline]
7539#[target_feature(enable = "avx512f")]
7540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7541#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
7542#[rustc_legacy_const_generics(3)]
7543pub unsafe fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
7544    k: __mmask8,
7545    a: __m512d,
7546    b: __m512d,
7547) -> __m512d {
7548    static_assert_rounding!(ROUNDING);
7549    let a = a.as_f64x8();
7550    let b = b.as_f64x8();
7551    let r = vmulpd(a, b, ROUNDING);
7552    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7553}
7554
7555/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
7556///
7557/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7558/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7559/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7560/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7561/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7562/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7563///
7564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
7565#[inline]
7566#[target_feature(enable = "avx512f")]
7567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7568#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
7569#[rustc_legacy_const_generics(2)]
7570pub unsafe fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7571    static_assert_rounding!(ROUNDING);
7572    let a = a.as_f32x16();
7573    let b = b.as_f32x16();
7574    let r = vdivps(a, b, ROUNDING);
7575    transmute(r)
7576}
7577
7578/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7579///
7580/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7581/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7582/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7583/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7584/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7585/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
7588#[inline]
7589#[target_feature(enable = "avx512f")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
7592#[rustc_legacy_const_generics(4)]
7593pub unsafe fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
7594    src: __m512,
7595    k: __mmask16,
7596    a: __m512,
7597    b: __m512,
7598) -> __m512 {
7599    static_assert_rounding!(ROUNDING);
7600    let a = a.as_f32x16();
7601    let b = b.as_f32x16();
7602    let r = vdivps(a, b, ROUNDING);
7603    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7604}
7605
7606/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7607///
7608/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7609/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7610/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7611/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7612/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7613/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7614///
7615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
7616#[inline]
7617#[target_feature(enable = "avx512f")]
7618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7619#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
7620#[rustc_legacy_const_generics(3)]
7621pub unsafe fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
7622    k: __mmask16,
7623    a: __m512,
7624    b: __m512,
7625) -> __m512 {
7626    static_assert_rounding!(ROUNDING);
7627    let a = a.as_f32x16();
7628    let b = b.as_f32x16();
7629    let r = vdivps(a, b, ROUNDING);
7630    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7631}
7632
7633/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
7634///
7635/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7636/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7637/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7638/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7639/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7640/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7641///
7642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
7643#[inline]
7644#[target_feature(enable = "avx512f")]
7645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7646#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
7647#[rustc_legacy_const_generics(2)]
7648pub unsafe fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7649    static_assert_rounding!(ROUNDING);
7650    let a = a.as_f64x8();
7651    let b = b.as_f64x8();
7652    let r = vdivpd(a, b, ROUNDING);
7653    transmute(r)
7654}
7655
7656/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7657///
7658/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7659/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7660/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7661/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7662/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7663/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7664///
7665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
7666#[inline]
7667#[target_feature(enable = "avx512f")]
7668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7669#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
7670#[rustc_legacy_const_generics(4)]
7671pub unsafe fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
7672    src: __m512d,
7673    k: __mmask8,
7674    a: __m512d,
7675    b: __m512d,
7676) -> __m512d {
7677    static_assert_rounding!(ROUNDING);
7678    let a = a.as_f64x8();
7679    let b = b.as_f64x8();
7680    let r = vdivpd(a, b, ROUNDING);
7681    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7682}
7683
7684/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7685///
7686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7687/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7688/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7689/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7690/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7691/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7692///
7693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
7694#[inline]
7695#[target_feature(enable = "avx512f")]
7696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7697#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
7698#[rustc_legacy_const_generics(3)]
7699pub unsafe fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
7700    k: __mmask8,
7701    a: __m512d,
7702    b: __m512d,
7703) -> __m512d {
7704    static_assert_rounding!(ROUNDING);
7705    let a = a.as_f64x8();
7706    let b = b.as_f64x8();
7707    let r = vdivpd(a, b, ROUNDING);
7708    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7709}
7710
7711/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7712///
7713/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7714/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7715/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7716/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7717/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7718/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7719///
7720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
7721#[inline]
7722#[target_feature(enable = "avx512f")]
7723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7724#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
7725#[rustc_legacy_const_generics(1)]
7726pub unsafe fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
7727    static_assert_rounding!(ROUNDING);
7728    let a = a.as_f32x16();
7729    let r = vsqrtps(a, ROUNDING);
7730    transmute(r)
7731}
7732
7733/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7734///
7735/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7736/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7737/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7738/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7739/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7740/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7741///
7742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
7743#[inline]
7744#[target_feature(enable = "avx512f")]
7745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7746#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
7747#[rustc_legacy_const_generics(3)]
7748pub unsafe fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
7749    src: __m512,
7750    k: __mmask16,
7751    a: __m512,
7752) -> __m512 {
7753    static_assert_rounding!(ROUNDING);
7754    let a = a.as_f32x16();
7755    let r = vsqrtps(a, ROUNDING);
7756    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7757}
7758
7759/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7760///
7761/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7762/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7763/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7764/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7765/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7767///
7768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
7769#[inline]
7770#[target_feature(enable = "avx512f")]
7771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7772#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
7773#[rustc_legacy_const_generics(2)]
7774pub unsafe fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
7775    static_assert_rounding!(ROUNDING);
7776    let a = a.as_f32x16();
7777    let r = vsqrtps(a, ROUNDING);
7778    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
7779}
7780
7781/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7782///
7783/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7784/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7785/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7786/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7787/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7788/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7789///
7790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
7791#[inline]
7792#[target_feature(enable = "avx512f")]
7793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7794#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
7795#[rustc_legacy_const_generics(1)]
7796pub unsafe fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
7797    static_assert_rounding!(ROUNDING);
7798    let a = a.as_f64x8();
7799    let r = vsqrtpd(a, ROUNDING);
7800    transmute(r)
7801}
7802
7803/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7804///
7805/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7806/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7807/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7808/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7809/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7810/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7811///
7812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
7813#[inline]
7814#[target_feature(enable = "avx512f")]
7815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7816#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
7817#[rustc_legacy_const_generics(3)]
7818pub unsafe fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
7819    src: __m512d,
7820    k: __mmask8,
7821    a: __m512d,
7822) -> __m512d {
7823    static_assert_rounding!(ROUNDING);
7824    let a = a.as_f64x8();
7825    let r = vsqrtpd(a, ROUNDING);
7826    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
7827}
7828
7829/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7830///
7831/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7832/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7833/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7834/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7835/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7836/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7837///
7838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
7839#[inline]
7840#[target_feature(enable = "avx512f")]
7841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7842#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
7843#[rustc_legacy_const_generics(2)]
7844pub unsafe fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
7845    static_assert_rounding!(ROUNDING);
7846    let a = a.as_f64x8();
7847    let r = vsqrtpd(a, ROUNDING);
7848    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
7849}
7850
7851/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
7852///
7853/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7854/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7855/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7856/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7857/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7858/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7859///
7860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
7861#[inline]
7862#[target_feature(enable = "avx512f")]
7863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7864#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
7865#[rustc_legacy_const_generics(3)]
7866pub unsafe fn _mm512_fmadd_round_ps<const ROUNDING: i32>(
7867    a: __m512,
7868    b: __m512,
7869    c: __m512,
7870) -> __m512 {
7871    static_assert_rounding!(ROUNDING);
7872    vfmadd132psround(a, b, c, ROUNDING)
7873}
7874
7875/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
7876///
7877/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7878/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7879/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7880/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7881/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7882/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7883///
7884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
7885#[inline]
7886#[target_feature(enable = "avx512f")]
7887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7888#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
7889#[rustc_legacy_const_generics(4)]
7890pub unsafe fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
7891    a: __m512,
7892    k: __mmask16,
7893    b: __m512,
7894    c: __m512,
7895) -> __m512 {
7896    static_assert_rounding!(ROUNDING);
7897    simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
7898}
7899
7900/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7901///
7902/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7903/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7904/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7905/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7906/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7907/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7908///
7909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
7910#[inline]
7911#[target_feature(enable = "avx512f")]
7912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7913#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
7914#[rustc_legacy_const_generics(4)]
7915pub unsafe fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
7916    k: __mmask16,
7917    a: __m512,
7918    b: __m512,
7919    c: __m512,
7920) -> __m512 {
7921    static_assert_rounding!(ROUNDING);
7922    simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
7923}
7924
7925/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
7926///
7927/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7928/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7929/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7930/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7931/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7932/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7933///
7934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
7935#[inline]
7936#[target_feature(enable = "avx512f")]
7937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7938#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
7939#[rustc_legacy_const_generics(4)]
7940pub unsafe fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
7941    a: __m512,
7942    b: __m512,
7943    c: __m512,
7944    k: __mmask16,
7945) -> __m512 {
7946    static_assert_rounding!(ROUNDING);
7947    simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
7948}
7949
7950/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
7951///
7952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7958///
7959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
7960#[inline]
7961#[target_feature(enable = "avx512f")]
7962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7963#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
7964#[rustc_legacy_const_generics(3)]
7965pub unsafe fn _mm512_fmadd_round_pd<const ROUNDING: i32>(
7966    a: __m512d,
7967    b: __m512d,
7968    c: __m512d,
7969) -> __m512d {
7970    static_assert_rounding!(ROUNDING);
7971    vfmadd132pdround(a, b, c, ROUNDING)
7972}
7973
7974/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
7975///
7976/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7977/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7978/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7979/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7980/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7981/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7982///
7983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
7984#[inline]
7985#[target_feature(enable = "avx512f")]
7986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7987#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
7988#[rustc_legacy_const_generics(4)]
7989pub unsafe fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
7990    a: __m512d,
7991    k: __mmask8,
7992    b: __m512d,
7993    c: __m512d,
7994) -> __m512d {
7995    static_assert_rounding!(ROUNDING);
7996    simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
7997}
7998
7999/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8000///
8001/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8002/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8003/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8004/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8005/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8006/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8007///
8008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8009#[inline]
8010#[target_feature(enable = "avx512f")]
8011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8012#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8013#[rustc_legacy_const_generics(4)]
8014pub unsafe fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8015    k: __mmask8,
8016    a: __m512d,
8017    b: __m512d,
8018    c: __m512d,
8019) -> __m512d {
8020    static_assert_rounding!(ROUNDING);
8021    simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8022}
8023
8024/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8025///
8026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8032///
8033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8034#[inline]
8035#[target_feature(enable = "avx512f")]
8036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8037#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8038#[rustc_legacy_const_generics(4)]
8039pub unsafe fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8040    a: __m512d,
8041    b: __m512d,
8042    c: __m512d,
8043    k: __mmask8,
8044) -> __m512d {
8045    static_assert_rounding!(ROUNDING);
8046    simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8047}
8048
8049/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8050///
8051/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8052/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8053/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8054/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8055/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8056/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8057///
8058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8059#[inline]
8060#[target_feature(enable = "avx512f")]
8061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8062#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8063#[rustc_legacy_const_generics(3)]
8064pub unsafe fn _mm512_fmsub_round_ps<const ROUNDING: i32>(
8065    a: __m512,
8066    b: __m512,
8067    c: __m512,
8068) -> __m512 {
8069    static_assert_rounding!(ROUNDING);
8070    vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8071}
8072
8073/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8074///
8075/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8076/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8077/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8078/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8079/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8080/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8081///
8082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8083#[inline]
8084#[target_feature(enable = "avx512f")]
8085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8086#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8087#[rustc_legacy_const_generics(4)]
8088pub unsafe fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8089    a: __m512,
8090    k: __mmask16,
8091    b: __m512,
8092    c: __m512,
8093) -> __m512 {
8094    static_assert_rounding!(ROUNDING);
8095    let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8096    simd_select_bitmask(k, r, a)
8097}
8098
8099/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8100///
8101/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8102/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8103/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8104/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8105/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8106/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8107///
8108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8109#[inline]
8110#[target_feature(enable = "avx512f")]
8111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8112#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8113#[rustc_legacy_const_generics(4)]
8114pub unsafe fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8115    k: __mmask16,
8116    a: __m512,
8117    b: __m512,
8118    c: __m512,
8119) -> __m512 {
8120    static_assert_rounding!(ROUNDING);
8121    let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8122    simd_select_bitmask(k, r, _mm512_setzero_ps())
8123}
8124
8125/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8126///
8127/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8128/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8129/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8130/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8131/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8132/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8133///
8134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8135#[inline]
8136#[target_feature(enable = "avx512f")]
8137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8138#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8139#[rustc_legacy_const_generics(4)]
8140pub unsafe fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8141    a: __m512,
8142    b: __m512,
8143    c: __m512,
8144    k: __mmask16,
8145) -> __m512 {
8146    static_assert_rounding!(ROUNDING);
8147    let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
8148    simd_select_bitmask(k, r, c)
8149}
8150
8151/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8152///
8153/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8154/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8155/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8156/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8157/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8158/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8159///
8160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8161#[inline]
8162#[target_feature(enable = "avx512f")]
8163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8164#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8165#[rustc_legacy_const_generics(3)]
8166pub unsafe fn _mm512_fmsub_round_pd<const ROUNDING: i32>(
8167    a: __m512d,
8168    b: __m512d,
8169    c: __m512d,
8170) -> __m512d {
8171    static_assert_rounding!(ROUNDING);
8172    vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
8173}
8174
8175/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8176///
8177/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8178/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8179/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8180/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8181/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8182/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8183///
8184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8185#[inline]
8186#[target_feature(enable = "avx512f")]
8187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8188#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8189#[rustc_legacy_const_generics(4)]
8190pub unsafe fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8191    a: __m512d,
8192    k: __mmask8,
8193    b: __m512d,
8194    c: __m512d,
8195) -> __m512d {
8196    static_assert_rounding!(ROUNDING);
8197    let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8198    simd_select_bitmask(k, r, a)
8199}
8200
8201/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8202///
8203/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8204/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8205/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8206/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8207/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8208/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8209///
8210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8211#[inline]
8212#[target_feature(enable = "avx512f")]
8213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8214#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8215#[rustc_legacy_const_generics(4)]
8216pub unsafe fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8217    k: __mmask8,
8218    a: __m512d,
8219    b: __m512d,
8220    c: __m512d,
8221) -> __m512d {
8222    static_assert_rounding!(ROUNDING);
8223    let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8224    simd_select_bitmask(k, r, _mm512_setzero_pd())
8225}
8226
8227/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8228///
8229/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8230/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8231/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8232/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8233/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8234/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8235///
8236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8237#[inline]
8238#[target_feature(enable = "avx512f")]
8239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8240#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8241#[rustc_legacy_const_generics(4)]
8242pub unsafe fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8243    a: __m512d,
8244    b: __m512d,
8245    c: __m512d,
8246    k: __mmask8,
8247) -> __m512d {
8248    static_assert_rounding!(ROUNDING);
8249    let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
8250    simd_select_bitmask(k, r, c)
8251}
8252
8253/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8254///
8255/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8256/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8257/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8258/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8259/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8260/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8261///
8262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8263#[inline]
8264#[target_feature(enable = "avx512f")]
8265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8266#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8267#[rustc_legacy_const_generics(3)]
8268pub unsafe fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(
8269    a: __m512,
8270    b: __m512,
8271    c: __m512,
8272) -> __m512 {
8273    static_assert_rounding!(ROUNDING);
8274    vfmaddsubpsround(a, b, c, ROUNDING)
8275}
8276
8277/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8278///
8279/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8280/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8281/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8282/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8283/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8284/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8285///
8286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8287#[inline]
8288#[target_feature(enable = "avx512f")]
8289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8290#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8291#[rustc_legacy_const_generics(4)]
8292pub unsafe fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8293    a: __m512,
8294    k: __mmask16,
8295    b: __m512,
8296    c: __m512,
8297) -> __m512 {
8298    static_assert_rounding!(ROUNDING);
8299    simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
8300}
8301
8302/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8303///
8304/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8305/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8306/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8307/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8308/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8309/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8310///
8311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8312#[inline]
8313#[target_feature(enable = "avx512f")]
8314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8315#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8316#[rustc_legacy_const_generics(4)]
8317pub unsafe fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8318    k: __mmask16,
8319    a: __m512,
8320    b: __m512,
8321    c: __m512,
8322) -> __m512 {
8323    static_assert_rounding!(ROUNDING);
8324    simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
8325}
8326
8327/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8328///
8329/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8330/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8331/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8332/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8333/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8334/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8335///
8336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8337#[inline]
8338#[target_feature(enable = "avx512f")]
8339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8340#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8341#[rustc_legacy_const_generics(4)]
8342pub unsafe fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8343    a: __m512,
8344    b: __m512,
8345    c: __m512,
8346    k: __mmask16,
8347) -> __m512 {
8348    static_assert_rounding!(ROUNDING);
8349    simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
8350}
8351
8352/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8353///
8354/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8355/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8356/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8357/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8358/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8359/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8360///
8361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8362#[inline]
8363#[target_feature(enable = "avx512f")]
8364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8365#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8366#[rustc_legacy_const_generics(3)]
8367pub unsafe fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
8368    a: __m512d,
8369    b: __m512d,
8370    c: __m512d,
8371) -> __m512d {
8372    static_assert_rounding!(ROUNDING);
8373    vfmaddsubpdround(a, b, c, ROUNDING)
8374}
8375
8376/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8377///
8378/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8379/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8380/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8381/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8382/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8383/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8384///
8385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
8386#[inline]
8387#[target_feature(enable = "avx512f")]
8388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8389#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8390#[rustc_legacy_const_generics(4)]
8391pub unsafe fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
8392    a: __m512d,
8393    k: __mmask8,
8394    b: __m512d,
8395    c: __m512d,
8396) -> __m512d {
8397    static_assert_rounding!(ROUNDING);
8398    simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
8399}
8400
8401/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8402///
8403/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8404/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8405/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8406/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8407/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8408/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8409///
8410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
8411#[inline]
8412#[target_feature(enable = "avx512f")]
8413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8414#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8415#[rustc_legacy_const_generics(4)]
8416pub unsafe fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
8417    k: __mmask8,
8418    a: __m512d,
8419    b: __m512d,
8420    c: __m512d,
8421) -> __m512d {
8422    static_assert_rounding!(ROUNDING);
8423    simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8424}
8425
8426/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8427///
8428/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8429/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8430/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8431/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8432/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8433/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8434///
8435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
8436#[inline]
8437#[target_feature(enable = "avx512f")]
8438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8439#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
8440#[rustc_legacy_const_generics(4)]
8441pub unsafe fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
8442    a: __m512d,
8443    b: __m512d,
8444    c: __m512d,
8445    k: __mmask8,
8446) -> __m512d {
8447    static_assert_rounding!(ROUNDING);
8448    simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
8449}
8450
8451/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
8452///
8453/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8454/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8455/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8456/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8457/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8458/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8459///
8460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
8461#[inline]
8462#[target_feature(enable = "avx512f")]
8463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8464#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8465#[rustc_legacy_const_generics(3)]
8466pub unsafe fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(
8467    a: __m512,
8468    b: __m512,
8469    c: __m512,
8470) -> __m512 {
8471    static_assert_rounding!(ROUNDING);
8472    vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
8473}
8474
8475/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8476///
8477/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8478/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8479/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8480/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8481/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8482/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8483///
8484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
8485#[inline]
8486#[target_feature(enable = "avx512f")]
8487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8488#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8489#[rustc_legacy_const_generics(4)]
8490pub unsafe fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
8491    a: __m512,
8492    k: __mmask16,
8493    b: __m512,
8494    c: __m512,
8495) -> __m512 {
8496    static_assert_rounding!(ROUNDING);
8497    let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
8498    simd_select_bitmask(k, r, a)
8499}
8500
8501/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8502///
8503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8509///
8510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
8511#[inline]
8512#[target_feature(enable = "avx512f")]
8513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8514#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8515#[rustc_legacy_const_generics(4)]
8516pub unsafe fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
8517    k: __mmask16,
8518    a: __m512,
8519    b: __m512,
8520    c: __m512,
8521) -> __m512 {
8522    static_assert_rounding!(ROUNDING);
8523    let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
8524    simd_select_bitmask(k, r, _mm512_setzero_ps())
8525}
8526
8527/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8528///
8529/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8530/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8531/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8532/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8533/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8534/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8535///
8536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
8537#[inline]
8538#[target_feature(enable = "avx512f")]
8539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8540#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
8541#[rustc_legacy_const_generics(4)]
8542pub unsafe fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
8543    a: __m512,
8544    b: __m512,
8545    c: __m512,
8546    k: __mmask16,
8547) -> __m512 {
8548    static_assert_rounding!(ROUNDING);
8549    let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
8550    simd_select_bitmask(k, r, c)
8551}
8552
8553/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
8554///
8555/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8556/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8557/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8558/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8559/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8560/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8561///
8562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
8563#[inline]
8564#[target_feature(enable = "avx512f")]
8565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8566#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
8567#[rustc_legacy_const_generics(3)]
8568pub unsafe fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
8569    a: __m512d,
8570    b: __m512d,
8571    c: __m512d,
8572) -> __m512d {
8573    static_assert_rounding!(ROUNDING);
8574    vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
8575}
8576
8577/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8578///
8579/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8580/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8581/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8582/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8583/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8584/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8585///
8586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
8587#[inline]
8588#[target_feature(enable = "avx512f")]
8589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8590#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
8591#[rustc_legacy_const_generics(4)]
8592pub unsafe fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
8593    a: __m512d,
8594    k: __mmask8,
8595    b: __m512d,
8596    c: __m512d,
8597) -> __m512d {
8598    static_assert_rounding!(ROUNDING);
8599    let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
8600    simd_select_bitmask(k, r, a)
8601}
8602
8603/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8604///
8605/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8606/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8607/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8608/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8609/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8610/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8611///
8612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
8613#[inline]
8614#[target_feature(enable = "avx512f")]
8615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8616#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
8617#[rustc_legacy_const_generics(4)]
8618pub unsafe fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
8619    k: __mmask8,
8620    a: __m512d,
8621    b: __m512d,
8622    c: __m512d,
8623) -> __m512d {
8624    static_assert_rounding!(ROUNDING);
8625    let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
8626    simd_select_bitmask(k, r, _mm512_setzero_pd())
8627}
8628
8629/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8630///
8631/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8632/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8633/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8634/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8635/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8636/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8637///
8638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
8639#[inline]
8640#[target_feature(enable = "avx512f")]
8641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8642#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
8643#[rustc_legacy_const_generics(4)]
8644pub unsafe fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
8645    a: __m512d,
8646    b: __m512d,
8647    c: __m512d,
8648    k: __mmask8,
8649) -> __m512d {
8650    static_assert_rounding!(ROUNDING);
8651    let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
8652    simd_select_bitmask(k, r, c)
8653}
8654
8655/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
8656///
8657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8663///
8664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
8665#[inline]
8666#[target_feature(enable = "avx512f")]
8667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8668#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
8669#[rustc_legacy_const_generics(3)]
8670pub unsafe fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(
8671    a: __m512,
8672    b: __m512,
8673    c: __m512,
8674) -> __m512 {
8675    static_assert_rounding!(ROUNDING);
8676    vfmadd132psround(simd_neg(a), b, c, ROUNDING)
8677}
8678
8679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8680///
8681/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8682/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8683/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8684/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8685/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8686/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8687///
8688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
8689#[inline]
8690#[target_feature(enable = "avx512f")]
8691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8692#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
8693#[rustc_legacy_const_generics(4)]
8694pub unsafe fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
8695    a: __m512,
8696    k: __mmask16,
8697    b: __m512,
8698    c: __m512,
8699) -> __m512 {
8700    static_assert_rounding!(ROUNDING);
8701    let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
8702    simd_select_bitmask(k, r, a)
8703}
8704
8705/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8706///
8707/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8708/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8709/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8710/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8711/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8712/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8713///
8714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
8715#[inline]
8716#[target_feature(enable = "avx512f")]
8717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8718#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
8719#[rustc_legacy_const_generics(4)]
8720pub unsafe fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
8721    k: __mmask16,
8722    a: __m512,
8723    b: __m512,
8724    c: __m512,
8725) -> __m512 {
8726    static_assert_rounding!(ROUNDING);
8727    let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
8728    simd_select_bitmask(k, r, _mm512_setzero_ps())
8729}
8730
8731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8732///
8733/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8734/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8735/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8736/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8737/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8738/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8739///
8740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
8741#[inline]
8742#[target_feature(enable = "avx512f")]
8743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8744#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
8745#[rustc_legacy_const_generics(4)]
8746pub unsafe fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
8747    a: __m512,
8748    b: __m512,
8749    c: __m512,
8750    k: __mmask16,
8751) -> __m512 {
8752    static_assert_rounding!(ROUNDING);
8753    let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
8754    simd_select_bitmask(k, r, c)
8755}
8756
8757/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
8758///
8759/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8760/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8761/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8762/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8763/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8764/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8765///
8766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
8767#[inline]
8768#[target_feature(enable = "avx512f")]
8769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8770#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
8771#[rustc_legacy_const_generics(3)]
8772pub unsafe fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(
8773    a: __m512d,
8774    b: __m512d,
8775    c: __m512d,
8776) -> __m512d {
8777    static_assert_rounding!(ROUNDING);
8778    vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
8779}
8780
8781/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8782///
8783/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8784/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8785/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8786/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8787/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8788/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8789///
8790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
8791#[inline]
8792#[target_feature(enable = "avx512f")]
8793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8794#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
8795#[rustc_legacy_const_generics(4)]
8796pub unsafe fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
8797    a: __m512d,
8798    k: __mmask8,
8799    b: __m512d,
8800    c: __m512d,
8801) -> __m512d {
8802    static_assert_rounding!(ROUNDING);
8803    let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
8804    simd_select_bitmask(k, r, a)
8805}
8806
8807/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8808///
8809/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8810/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8811/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8812/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8813/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8814/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8815///
8816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
8817#[inline]
8818#[target_feature(enable = "avx512f")]
8819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8820#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
8821#[rustc_legacy_const_generics(4)]
8822pub unsafe fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
8823    k: __mmask8,
8824    a: __m512d,
8825    b: __m512d,
8826    c: __m512d,
8827) -> __m512d {
8828    static_assert_rounding!(ROUNDING);
8829    let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
8830    simd_select_bitmask(k, r, _mm512_setzero_pd())
8831}
8832
8833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8834///
8835/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8836/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8837/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8838/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8839/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8841///
8842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
8843#[inline]
8844#[target_feature(enable = "avx512f")]
8845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8846#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
8847#[rustc_legacy_const_generics(4)]
8848pub unsafe fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
8849    a: __m512d,
8850    b: __m512d,
8851    c: __m512d,
8852    k: __mmask8,
8853) -> __m512d {
8854    static_assert_rounding!(ROUNDING);
8855    let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
8856    simd_select_bitmask(k, r, c)
8857}
8858
8859/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
8860///
8861/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8862/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8863/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8864/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8865/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8866/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8867///
8868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
8869#[inline]
8870#[target_feature(enable = "avx512f")]
8871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8872#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
8873#[rustc_legacy_const_generics(3)]
8874pub unsafe fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(
8875    a: __m512,
8876    b: __m512,
8877    c: __m512,
8878) -> __m512 {
8879    static_assert_rounding!(ROUNDING);
8880    vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
8881}
8882
8883/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8884///
8885/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8886/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8887/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8888/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8889/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8890/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8891///
8892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
8893#[inline]
8894#[target_feature(enable = "avx512f")]
8895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8896#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
8897#[rustc_legacy_const_generics(4)]
8898pub unsafe fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
8899    a: __m512,
8900    k: __mmask16,
8901    b: __m512,
8902    c: __m512,
8903) -> __m512 {
8904    static_assert_rounding!(ROUNDING);
8905    let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
8906    simd_select_bitmask(k, r, a)
8907}
8908
8909/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8910///
8911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8917///
8918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
8919#[inline]
8920#[target_feature(enable = "avx512f")]
8921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8922#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
8923#[rustc_legacy_const_generics(4)]
8924pub unsafe fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
8925    k: __mmask16,
8926    a: __m512,
8927    b: __m512,
8928    c: __m512,
8929) -> __m512 {
8930    static_assert_rounding!(ROUNDING);
8931    let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
8932    simd_select_bitmask(k, r, _mm512_setzero_ps())
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub unsafe fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
8951    a: __m512,
8952    b: __m512,
8953    c: __m512,
8954    k: __mmask16,
8955) -> __m512 {
8956    static_assert_rounding!(ROUNDING);
8957    let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
8958    simd_select_bitmask(k, r, c)
8959}
8960
8961/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
8962///
8963/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8964/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8965/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8966/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8967/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8969///
8970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
8971#[inline]
8972#[target_feature(enable = "avx512f")]
8973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8974#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
8975#[rustc_legacy_const_generics(3)]
8976pub unsafe fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(
8977    a: __m512d,
8978    b: __m512d,
8979    c: __m512d,
8980) -> __m512d {
8981    static_assert_rounding!(ROUNDING);
8982    vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
8983}
8984
8985/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8986///
8987/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8988/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8989/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8990/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8991/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8992/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8993///
8994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
8995#[inline]
8996#[target_feature(enable = "avx512f")]
8997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8998#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
8999#[rustc_legacy_const_generics(4)]
9000pub unsafe fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9001    a: __m512d,
9002    k: __mmask8,
9003    b: __m512d,
9004    c: __m512d,
9005) -> __m512d {
9006    static_assert_rounding!(ROUNDING);
9007    let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9008    simd_select_bitmask(k, r, a)
9009}
9010
9011/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9012///
9013/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9014/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9015/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9016/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9017/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9018/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9019///
9020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9021#[inline]
9022#[target_feature(enable = "avx512f")]
9023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9024#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9025#[rustc_legacy_const_generics(4)]
9026pub unsafe fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9027    k: __mmask8,
9028    a: __m512d,
9029    b: __m512d,
9030    c: __m512d,
9031) -> __m512d {
9032    static_assert_rounding!(ROUNDING);
9033    let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9034    simd_select_bitmask(k, r, _mm512_setzero_pd())
9035}
9036
9037/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9038///
9039/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9040/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9041/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9042/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9043/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9044/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9045///
9046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9047#[inline]
9048#[target_feature(enable = "avx512f")]
9049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9050#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9051#[rustc_legacy_const_generics(4)]
9052pub unsafe fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9053    a: __m512d,
9054    b: __m512d,
9055    c: __m512d,
9056    k: __mmask8,
9057) -> __m512d {
9058    static_assert_rounding!(ROUNDING);
9059    let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9060    simd_select_bitmask(k, r, c)
9061}
9062
9063/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9064/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9065///
9066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9067#[inline]
9068#[target_feature(enable = "avx512f")]
9069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9070#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9071#[rustc_legacy_const_generics(2)]
9072pub unsafe fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9073    static_assert_sae!(SAE);
9074    let a = a.as_f32x16();
9075    let b = b.as_f32x16();
9076    let r = vmaxps(a, b, SAE);
9077    transmute(r)
9078}
9079
9080/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9082///
9083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9084#[inline]
9085#[target_feature(enable = "avx512f")]
9086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9087#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9088#[rustc_legacy_const_generics(4)]
9089pub unsafe fn _mm512_mask_max_round_ps<const SAE: i32>(
9090    src: __m512,
9091    k: __mmask16,
9092    a: __m512,
9093    b: __m512,
9094) -> __m512 {
9095    static_assert_sae!(SAE);
9096    let a = a.as_f32x16();
9097    let b = b.as_f32x16();
9098    let r = vmaxps(a, b, SAE);
9099    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9100}
9101
9102/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9103/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9110#[rustc_legacy_const_generics(3)]
9111pub unsafe fn _mm512_maskz_max_round_ps<const SAE: i32>(
9112    k: __mmask16,
9113    a: __m512,
9114    b: __m512,
9115) -> __m512 {
9116    static_assert_sae!(SAE);
9117    let a = a.as_f32x16();
9118    let b = b.as_f32x16();
9119    let r = vmaxps(a, b, SAE);
9120    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9121}
9122
9123/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9124/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9125///
9126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9127#[inline]
9128#[target_feature(enable = "avx512f")]
9129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9130#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9131#[rustc_legacy_const_generics(2)]
9132pub unsafe fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9133    static_assert_sae!(SAE);
9134    let a = a.as_f64x8();
9135    let b = b.as_f64x8();
9136    let r = vmaxpd(a, b, SAE);
9137    transmute(r)
9138}
9139
9140/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9141/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9142///
9143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9144#[inline]
9145#[target_feature(enable = "avx512f")]
9146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9147#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9148#[rustc_legacy_const_generics(4)]
9149pub unsafe fn _mm512_mask_max_round_pd<const SAE: i32>(
9150    src: __m512d,
9151    k: __mmask8,
9152    a: __m512d,
9153    b: __m512d,
9154) -> __m512d {
9155    static_assert_sae!(SAE);
9156    let a = a.as_f64x8();
9157    let b = b.as_f64x8();
9158    let r = vmaxpd(a, b, SAE);
9159    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9160}
9161
9162/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9163/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9164///
9165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9166#[inline]
9167#[target_feature(enable = "avx512f")]
9168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9169#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9170#[rustc_legacy_const_generics(3)]
9171pub unsafe fn _mm512_maskz_max_round_pd<const SAE: i32>(
9172    k: __mmask8,
9173    a: __m512d,
9174    b: __m512d,
9175) -> __m512d {
9176    static_assert_sae!(SAE);
9177    let a = a.as_f64x8();
9178    let b = b.as_f64x8();
9179    let r = vmaxpd(a, b, SAE);
9180    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9181}
9182
9183/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9184/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9185///
9186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9187#[inline]
9188#[target_feature(enable = "avx512f")]
9189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9190#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9191#[rustc_legacy_const_generics(2)]
9192pub unsafe fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9193    static_assert_sae!(SAE);
9194    let a = a.as_f32x16();
9195    let b = b.as_f32x16();
9196    let r = vminps(a, b, SAE);
9197    transmute(r)
9198}
9199
9200/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9201/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9202///
9203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9204#[inline]
9205#[target_feature(enable = "avx512f")]
9206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9207#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9208#[rustc_legacy_const_generics(4)]
9209pub unsafe fn _mm512_mask_min_round_ps<const SAE: i32>(
9210    src: __m512,
9211    k: __mmask16,
9212    a: __m512,
9213    b: __m512,
9214) -> __m512 {
9215    static_assert_sae!(SAE);
9216    let a = a.as_f32x16();
9217    let b = b.as_f32x16();
9218    let r = vminps(a, b, SAE);
9219    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
9220}
9221
9222/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9223/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9224///
9225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9226#[inline]
9227#[target_feature(enable = "avx512f")]
9228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9229#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9230#[rustc_legacy_const_generics(3)]
9231pub unsafe fn _mm512_maskz_min_round_ps<const SAE: i32>(
9232    k: __mmask16,
9233    a: __m512,
9234    b: __m512,
9235) -> __m512 {
9236    static_assert_sae!(SAE);
9237    let a = a.as_f32x16();
9238    let b = b.as_f32x16();
9239    let r = vminps(a, b, SAE);
9240    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
9241}
9242
9243/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9245///
9246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9247#[inline]
9248#[target_feature(enable = "avx512f")]
9249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9250#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9251#[rustc_legacy_const_generics(2)]
9252pub unsafe fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9253    static_assert_sae!(SAE);
9254    let a = a.as_f64x8();
9255    let b = b.as_f64x8();
9256    let r = vminpd(a, b, SAE);
9257    transmute(r)
9258}
9259
9260/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9261/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9262///
9263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9264#[inline]
9265#[target_feature(enable = "avx512f")]
9266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9267#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9268#[rustc_legacy_const_generics(4)]
9269pub unsafe fn _mm512_mask_min_round_pd<const SAE: i32>(
9270    src: __m512d,
9271    k: __mmask8,
9272    a: __m512d,
9273    b: __m512d,
9274) -> __m512d {
9275    static_assert_sae!(SAE);
9276    let a = a.as_f64x8();
9277    let b = b.as_f64x8();
9278    let r = vminpd(a, b, SAE);
9279    transmute(simd_select_bitmask(k, r, src.as_f64x8()))
9280}
9281
9282/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9283/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9284///
9285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9286#[inline]
9287#[target_feature(enable = "avx512f")]
9288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9289#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9290#[rustc_legacy_const_generics(3)]
9291pub unsafe fn _mm512_maskz_min_round_pd<const SAE: i32>(
9292    k: __mmask8,
9293    a: __m512d,
9294    b: __m512d,
9295) -> __m512d {
9296    static_assert_sae!(SAE);
9297    let a = a.as_f64x8();
9298    let b = b.as_f64x8();
9299    let r = vminpd(a, b, SAE);
9300    transmute(simd_select_bitmask(k, r, f64x8::ZERO))
9301}
9302
9303/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9304/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9305///
9306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9307#[inline]
9308#[target_feature(enable = "avx512f")]
9309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9310#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9311#[rustc_legacy_const_generics(1)]
9312pub unsafe fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9313    static_assert_sae!(SAE);
9314    let a = a.as_f32x16();
9315    let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
9316    transmute(r)
9317}
9318
9319/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9320/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9321///
9322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
9323#[inline]
9324#[target_feature(enable = "avx512f")]
9325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9326#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9327#[rustc_legacy_const_generics(3)]
9328pub unsafe fn _mm512_mask_getexp_round_ps<const SAE: i32>(
9329    src: __m512,
9330    k: __mmask16,
9331    a: __m512,
9332) -> __m512 {
9333    static_assert_sae!(SAE);
9334    let a = a.as_f32x16();
9335    let src = src.as_f32x16();
9336    let r = vgetexpps(a, src, k, SAE);
9337    transmute(r)
9338}
9339
9340/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9341/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9348#[rustc_legacy_const_generics(2)]
9349pub unsafe fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
9350    static_assert_sae!(SAE);
9351    let a = a.as_f32x16();
9352    let r = vgetexpps(a, f32x16::ZERO, k, SAE);
9353    transmute(r)
9354}
9355
9356/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9357/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
9360#[inline]
9361#[target_feature(enable = "avx512f")]
9362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9363#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
9364#[rustc_legacy_const_generics(1)]
9365pub unsafe fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
9366    static_assert_sae!(SAE);
9367    let a = a.as_f64x8();
9368    let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
9369    transmute(r)
9370}
9371
9372/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9373/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9374///
9375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
9376#[inline]
9377#[target_feature(enable = "avx512f")]
9378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9379#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
9380#[rustc_legacy_const_generics(3)]
9381pub unsafe fn _mm512_mask_getexp_round_pd<const SAE: i32>(
9382    src: __m512d,
9383    k: __mmask8,
9384    a: __m512d,
9385) -> __m512d {
9386    static_assert_sae!(SAE);
9387    let a = a.as_f64x8();
9388    let src = src.as_f64x8();
9389    let r = vgetexppd(a, src, k, SAE);
9390    transmute(r)
9391}
9392
9393/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
9394/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9395///
9396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
9397#[inline]
9398#[target_feature(enable = "avx512f")]
9399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9400#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
9401#[rustc_legacy_const_generics(2)]
9402pub unsafe fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
9403    static_assert_sae!(SAE);
9404    let a = a.as_f64x8();
9405    let r = vgetexppd(a, f64x8::ZERO, k, SAE);
9406    transmute(r)
9407}
9408
9409/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
9410/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9411/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9412/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9413/// * [`_MM_FROUND_TO_POS_INF`] : round up
9414/// * [`_MM_FROUND_TO_ZERO`] : truncate
9415/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9416///
9417/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
9419#[inline]
9420#[target_feature(enable = "avx512f")]
9421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9422#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
9423#[rustc_legacy_const_generics(1, 2)]
9424pub unsafe fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
9425    static_assert_uimm_bits!(IMM8, 8);
9426    static_assert_mantissas_sae!(SAE);
9427    let a = a.as_f32x16();
9428    let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
9429    transmute(r)
9430}
9431
9432/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9433/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9434/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9435/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9436/// * [`_MM_FROUND_TO_POS_INF`] : round up
9437/// * [`_MM_FROUND_TO_ZERO`] : truncate
9438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9439///
9440/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
9442#[inline]
9443#[target_feature(enable = "avx512f")]
9444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9445#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
9446#[rustc_legacy_const_generics(3, 4)]
9447pub unsafe fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
9448    src: __m512,
9449    k: __mmask16,
9450    a: __m512,
9451) -> __m512 {
9452    static_assert_uimm_bits!(IMM8, 8);
9453    static_assert_mantissas_sae!(SAE);
9454    let a = a.as_f32x16();
9455    let src = src.as_f32x16();
9456    let r = vrndscaleps(a, IMM8, src, k, SAE);
9457    transmute(r)
9458}
9459
9460/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9461/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9462/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9463/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9464/// * [`_MM_FROUND_TO_POS_INF`] : round up
9465/// * [`_MM_FROUND_TO_ZERO`] : truncate
9466/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9467///
9468/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
9470#[inline]
9471#[target_feature(enable = "avx512f")]
9472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9473#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
9474#[rustc_legacy_const_generics(2, 3)]
9475pub unsafe fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
9476    k: __mmask16,
9477    a: __m512,
9478) -> __m512 {
9479    static_assert_uimm_bits!(IMM8, 8);
9480    static_assert_mantissas_sae!(SAE);
9481    let a = a.as_f32x16();
9482    let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
9483    transmute(r)
9484}
9485
9486/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
9487/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9488/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9489/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9490/// * [`_MM_FROUND_TO_POS_INF`] : round up
9491/// * [`_MM_FROUND_TO_ZERO`] : truncate
9492/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9493///
9494/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
9496#[inline]
9497#[target_feature(enable = "avx512f")]
9498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9499#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
9500#[rustc_legacy_const_generics(1, 2)]
9501pub unsafe fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
9502    static_assert_uimm_bits!(IMM8, 8);
9503    static_assert_mantissas_sae!(SAE);
9504    let a = a.as_f64x8();
9505    let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
9506    transmute(r)
9507}
9508
9509/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9510/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9511/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9512/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9513/// * [`_MM_FROUND_TO_POS_INF`] : round up
9514/// * [`_MM_FROUND_TO_ZERO`] : truncate
9515/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9516///
9517/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
9519#[inline]
9520#[target_feature(enable = "avx512f")]
9521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9522#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
9523#[rustc_legacy_const_generics(3, 4)]
9524pub unsafe fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
9525    src: __m512d,
9526    k: __mmask8,
9527    a: __m512d,
9528) -> __m512d {
9529    static_assert_uimm_bits!(IMM8, 8);
9530    static_assert_mantissas_sae!(SAE);
9531    let a = a.as_f64x8();
9532    let src = src.as_f64x8();
9533    let r = vrndscalepd(a, IMM8, src, k, SAE);
9534    transmute(r)
9535}
9536
9537/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9538/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
9539/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
9540/// * [`_MM_FROUND_TO_NEG_INF`] : round down
9541/// * [`_MM_FROUND_TO_POS_INF`] : round up
9542/// * [`_MM_FROUND_TO_ZERO`] : truncate
9543/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9544///
9545/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
9547#[inline]
9548#[target_feature(enable = "avx512f")]
9549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9550#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
9551#[rustc_legacy_const_generics(2, 3)]
9552pub unsafe fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
9553    k: __mmask8,
9554    a: __m512d,
9555) -> __m512d {
9556    static_assert_uimm_bits!(IMM8, 8);
9557    static_assert_mantissas_sae!(SAE);
9558    let a = a.as_f64x8();
9559    let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
9560    transmute(r)
9561}
9562
9563/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
9564///
9565/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9566/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9567/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9568/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9569/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9570/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9571///
9572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
9573#[inline]
9574#[target_feature(enable = "avx512f")]
9575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9576#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
9577#[rustc_legacy_const_generics(2)]
9578pub unsafe fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
9579    static_assert_rounding!(ROUNDING);
9580    let a = a.as_f32x16();
9581    let b = b.as_f32x16();
9582    let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
9583    transmute(r)
9584}
9585
9586/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9587///
9588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9594///
9595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
9596#[inline]
9597#[target_feature(enable = "avx512f")]
9598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9599#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
9600#[rustc_legacy_const_generics(4)]
9601pub unsafe fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
9602    src: __m512,
9603    k: __mmask16,
9604    a: __m512,
9605    b: __m512,
9606) -> __m512 {
9607    static_assert_rounding!(ROUNDING);
9608    let a = a.as_f32x16();
9609    let b = b.as_f32x16();
9610    let src = src.as_f32x16();
9611    let r = vscalefps(a, b, src, k, ROUNDING);
9612    transmute(r)
9613}
9614
9615/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9616///
9617/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9623///
9624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
9625#[inline]
9626#[target_feature(enable = "avx512f")]
9627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9628#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
9629#[rustc_legacy_const_generics(3)]
9630pub unsafe fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
9631    k: __mmask16,
9632    a: __m512,
9633    b: __m512,
9634) -> __m512 {
9635    static_assert_rounding!(ROUNDING);
9636    let a = a.as_f32x16();
9637    let b = b.as_f32x16();
9638    let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
9639    transmute(r)
9640}
9641
9642/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
9643///
9644/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9645/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9646/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9647/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9648/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9649/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9650///
9651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
9652#[inline]
9653#[target_feature(enable = "avx512f")]
9654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9655#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
9656#[rustc_legacy_const_generics(2)]
9657pub unsafe fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
9658    static_assert_rounding!(ROUNDING);
9659    let a = a.as_f64x8();
9660    let b = b.as_f64x8();
9661    let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
9662    transmute(r)
9663}
9664
9665/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9666///
9667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9673///
9674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
9675#[inline]
9676#[target_feature(enable = "avx512f")]
9677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9678#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
9679#[rustc_legacy_const_generics(4)]
9680pub unsafe fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
9681    src: __m512d,
9682    k: __mmask8,
9683    a: __m512d,
9684    b: __m512d,
9685) -> __m512d {
9686    static_assert_rounding!(ROUNDING);
9687    let a = a.as_f64x8();
9688    let b = b.as_f64x8();
9689    let src = src.as_f64x8();
9690    let r = vscalefpd(a, b, src, k, ROUNDING);
9691    transmute(r)
9692}
9693
9694/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9695///
9696/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9697/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9698/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9699/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9700/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9701/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9702///
9703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
9704#[inline]
9705#[target_feature(enable = "avx512f")]
9706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9707#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
9708#[rustc_legacy_const_generics(3)]
9709pub unsafe fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
9710    k: __mmask8,
9711    a: __m512d,
9712    b: __m512d,
9713) -> __m512d {
9714    static_assert_rounding!(ROUNDING);
9715    let a = a.as_f64x8();
9716    let b = b.as_f64x8();
9717    let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
9718    transmute(r)
9719}
9720
9721/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
9722///
9723/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
9725#[inline]
9726#[target_feature(enable = "avx512f")]
9727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9728#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
9729#[rustc_legacy_const_generics(3, 4)]
9730pub unsafe fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
9731    a: __m512,
9732    b: __m512,
9733    c: __m512i,
9734) -> __m512 {
9735    static_assert_uimm_bits!(IMM8, 8);
9736    static_assert_mantissas_sae!(SAE);
9737    let a = a.as_f32x16();
9738    let b = b.as_f32x16();
9739    let c = c.as_i32x16();
9740    let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
9741    transmute(r)
9742}
9743
9744/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
9745///
9746/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
9748#[inline]
9749#[target_feature(enable = "avx512f")]
9750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9751#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
9752#[rustc_legacy_const_generics(4, 5)]
9753pub unsafe fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
9754    a: __m512,
9755    k: __mmask16,
9756    b: __m512,
9757    c: __m512i,
9758) -> __m512 {
9759    static_assert_uimm_bits!(IMM8, 8);
9760    static_assert_mantissas_sae!(SAE);
9761    let a = a.as_f32x16();
9762    let b = b.as_f32x16();
9763    let c = c.as_i32x16();
9764    let r = vfixupimmps(a, b, c, IMM8, k, SAE);
9765    transmute(r)
9766}
9767
9768/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
9769///
9770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
9772#[inline]
9773#[target_feature(enable = "avx512f")]
9774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9775#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
9776#[rustc_legacy_const_generics(4, 5)]
9777pub unsafe fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
9778    k: __mmask16,
9779    a: __m512,
9780    b: __m512,
9781    c: __m512i,
9782) -> __m512 {
9783    static_assert_uimm_bits!(IMM8, 8);
9784    static_assert_mantissas_sae!(SAE);
9785    let a = a.as_f32x16();
9786    let b = b.as_f32x16();
9787    let c = c.as_i32x16();
9788    let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
9789    transmute(r)
9790}
9791
9792/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
9793///
9794/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
9796#[inline]
9797#[target_feature(enable = "avx512f")]
9798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9799#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
9800#[rustc_legacy_const_generics(3, 4)]
9801pub unsafe fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
9802    a: __m512d,
9803    b: __m512d,
9804    c: __m512i,
9805) -> __m512d {
9806    static_assert_uimm_bits!(IMM8, 8);
9807    static_assert_mantissas_sae!(SAE);
9808    let a = a.as_f64x8();
9809    let b = b.as_f64x8();
9810    let c = c.as_i64x8();
9811    let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
9812    transmute(r)
9813}
9814
9815/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
9816///
9817/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
9819#[inline]
9820#[target_feature(enable = "avx512f")]
9821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9822#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
9823#[rustc_legacy_const_generics(4, 5)]
9824pub unsafe fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
9825    a: __m512d,
9826    k: __mmask8,
9827    b: __m512d,
9828    c: __m512i,
9829) -> __m512d {
9830    static_assert_uimm_bits!(IMM8, 8);
9831    static_assert_mantissas_sae!(SAE);
9832    let a = a.as_f64x8();
9833    let b = b.as_f64x8();
9834    let c = c.as_i64x8();
9835    let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
9836    transmute(r)
9837}
9838
9839/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
9840///
9841/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
9843#[inline]
9844#[target_feature(enable = "avx512f")]
9845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9846#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
9847#[rustc_legacy_const_generics(4, 5)]
9848pub unsafe fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
9849    k: __mmask8,
9850    a: __m512d,
9851    b: __m512d,
9852    c: __m512i,
9853) -> __m512d {
9854    static_assert_uimm_bits!(IMM8, 8);
9855    static_assert_mantissas_sae!(SAE);
9856    let a = a.as_f64x8();
9857    let b = b.as_f64x8();
9858    let c = c.as_i64x8();
9859    let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
9860    transmute(r)
9861}
9862
9863/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
9864/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
9865///    _MM_MANT_NORM_1_2     // interval [1, 2)\
9866///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
9867///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
9868///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
9869/// The sign is determined by sc which can take the following values:\
9870///    _MM_MANT_SIGN_src     // sign = sign(src)\
9871///    _MM_MANT_SIGN_zero    // sign = 0\
9872///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
9873/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9874///
9875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
9876#[inline]
9877#[target_feature(enable = "avx512f")]
9878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9879#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
9880#[rustc_legacy_const_generics(1, 2, 3)]
9881pub unsafe fn _mm512_getmant_round_ps<
9882    const NORM: _MM_MANTISSA_NORM_ENUM,
9883    const SIGN: _MM_MANTISSA_SIGN_ENUM,
9884    const SAE: i32,
9885>(
9886    a: __m512,
9887) -> __m512 {
9888    static_assert_uimm_bits!(NORM, 4);
9889    static_assert_uimm_bits!(SIGN, 2);
9890    static_assert_mantissas_sae!(SAE);
9891    let a = a.as_f32x16();
9892    let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
9893    transmute(r)
9894}
9895
9896/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
9897/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
9898///    _MM_MANT_NORM_1_2     // interval [1, 2)\
9899///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
9900///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
9901///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
9902/// The sign is determined by sc which can take the following values:\
9903///    _MM_MANT_SIGN_src     // sign = sign(src)\
9904///    _MM_MANT_SIGN_zero    // sign = 0\
9905///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
9906/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9907///
9908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
9909#[inline]
9910#[target_feature(enable = "avx512f")]
9911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9912#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
9913#[rustc_legacy_const_generics(3, 4, 5)]
9914pub unsafe fn _mm512_mask_getmant_round_ps<
9915    const NORM: _MM_MANTISSA_NORM_ENUM,
9916    const SIGN: _MM_MANTISSA_SIGN_ENUM,
9917    const SAE: i32,
9918>(
9919    src: __m512,
9920    k: __mmask16,
9921    a: __m512,
9922) -> __m512 {
9923    static_assert_uimm_bits!(NORM, 4);
9924    static_assert_uimm_bits!(SIGN, 2);
9925    static_assert_mantissas_sae!(SAE);
9926    let a = a.as_f32x16();
9927    let src = src.as_f32x16();
9928    let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
9929    transmute(r)
9930}
9931
9932/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
9933/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
9934///    _MM_MANT_NORM_1_2     // interval [1, 2)\
9935///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
9936///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
9937///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
9938/// The sign is determined by sc which can take the following values:\
9939///    _MM_MANT_SIGN_src     // sign = sign(src)\
9940///    _MM_MANT_SIGN_zero    // sign = 0\
9941///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
9949#[rustc_legacy_const_generics(2, 3, 4)]
9950pub unsafe fn _mm512_maskz_getmant_round_ps<
9951    const NORM: _MM_MANTISSA_NORM_ENUM,
9952    const SIGN: _MM_MANTISSA_SIGN_ENUM,
9953    const SAE: i32,
9954>(
9955    k: __mmask16,
9956    a: __m512,
9957) -> __m512 {
9958    static_assert_uimm_bits!(NORM, 4);
9959    static_assert_uimm_bits!(SIGN, 2);
9960    static_assert_mantissas_sae!(SAE);
9961    let a = a.as_f32x16();
9962    let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
9963    transmute(r)
9964}
9965
9966/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
9967/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
9968///    _MM_MANT_NORM_1_2     // interval [1, 2)\
9969///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
9970///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
9971///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
9972/// The sign is determined by sc which can take the following values:\
9973///    _MM_MANT_SIGN_src     // sign = sign(src)\
9974///    _MM_MANT_SIGN_zero    // sign = 0\
9975///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
9976/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9977///
9978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
9979#[inline]
9980#[target_feature(enable = "avx512f")]
9981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9982#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
9983#[rustc_legacy_const_generics(1, 2, 3)]
9984pub unsafe fn _mm512_getmant_round_pd<
9985    const NORM: _MM_MANTISSA_NORM_ENUM,
9986    const SIGN: _MM_MANTISSA_SIGN_ENUM,
9987    const SAE: i32,
9988>(
9989    a: __m512d,
9990) -> __m512d {
9991    static_assert_uimm_bits!(NORM, 4);
9992    static_assert_uimm_bits!(SIGN, 2);
9993    static_assert_mantissas_sae!(SAE);
9994    let a = a.as_f64x8();
9995    let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
9996    transmute(r)
9997}
9998
9999/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10000/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10001///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10002///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10003///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10004///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10005/// The sign is determined by sc which can take the following values:\
10006///    _MM_MANT_SIGN_src     // sign = sign(src)\
10007///    _MM_MANT_SIGN_zero    // sign = 0\
10008///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10009/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10010///
10011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10012#[inline]
10013#[target_feature(enable = "avx512f")]
10014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10015#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10016#[rustc_legacy_const_generics(3, 4, 5)]
10017pub unsafe fn _mm512_mask_getmant_round_pd<
10018    const NORM: _MM_MANTISSA_NORM_ENUM,
10019    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10020    const SAE: i32,
10021>(
10022    src: __m512d,
10023    k: __mmask8,
10024    a: __m512d,
10025) -> __m512d {
10026    static_assert_uimm_bits!(NORM, 4);
10027    static_assert_uimm_bits!(SIGN, 2);
10028    static_assert_mantissas_sae!(SAE);
10029    let a = a.as_f64x8();
10030    let src = src.as_f64x8();
10031    let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
10032    transmute(r)
10033}
10034
10035/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10036/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10037///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10038///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10039///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10040///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10041/// The sign is determined by sc which can take the following values:\
10042///    _MM_MANT_SIGN_src     // sign = sign(src)\
10043///    _MM_MANT_SIGN_zero    // sign = 0\
10044///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10045/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10046///
10047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10048#[inline]
10049#[target_feature(enable = "avx512f")]
10050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10051#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10052#[rustc_legacy_const_generics(2, 3, 4)]
10053pub unsafe fn _mm512_maskz_getmant_round_pd<
10054    const NORM: _MM_MANTISSA_NORM_ENUM,
10055    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10056    const SAE: i32,
10057>(
10058    k: __mmask8,
10059    a: __m512d,
10060) -> __m512d {
10061    static_assert_uimm_bits!(NORM, 4);
10062    static_assert_uimm_bits!(SIGN, 2);
10063    static_assert_mantissas_sae!(SAE);
10064    let a = a.as_f64x8();
10065    let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
10066    transmute(r)
10067}
10068
10069/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10070///
10071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)   
10072#[inline]
10073#[target_feature(enable = "avx512f")]
10074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10075#[cfg_attr(test, assert_instr(vcvtps2dq))]
10076pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10077    transmute(vcvtps2dq(
10078        a.as_f32x16(),
10079        i32x16::ZERO,
10080        0b11111111_11111111,
10081        _MM_FROUND_CUR_DIRECTION,
10082    ))
10083}
10084
10085/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10086///
10087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10088#[inline]
10089#[target_feature(enable = "avx512f")]
10090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10091#[cfg_attr(test, assert_instr(vcvtps2dq))]
10092pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10093    transmute(vcvtps2dq(
10094        a.as_f32x16(),
10095        src.as_i32x16(),
10096        k,
10097        _MM_FROUND_CUR_DIRECTION,
10098    ))
10099}
10100
10101/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10102///
10103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10104#[inline]
10105#[target_feature(enable = "avx512f")]
10106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10107#[cfg_attr(test, assert_instr(vcvtps2dq))]
10108pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10109    transmute(vcvtps2dq(
10110        a.as_f32x16(),
10111        i32x16::ZERO,
10112        k,
10113        _MM_FROUND_CUR_DIRECTION,
10114    ))
10115}
10116
10117/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10118///
10119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10120#[inline]
10121#[target_feature(enable = "avx512f,avx512vl")]
10122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10123#[cfg_attr(test, assert_instr(vcvtps2dq))]
10124pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10125    let convert = _mm256_cvtps_epi32(a);
10126    transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
10127}
10128
10129/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10130///
10131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10132#[inline]
10133#[target_feature(enable = "avx512f,avx512vl")]
10134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10135#[cfg_attr(test, assert_instr(vcvtps2dq))]
10136pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10137    let convert = _mm256_cvtps_epi32(a);
10138    transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
10139}
10140
10141/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10142///
10143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10144#[inline]
10145#[target_feature(enable = "avx512f,avx512vl")]
10146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10147#[cfg_attr(test, assert_instr(vcvtps2dq))]
10148pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10149    let convert = _mm_cvtps_epi32(a);
10150    transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10151}
10152
10153/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10154///
10155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10156#[inline]
10157#[target_feature(enable = "avx512f,avx512vl")]
10158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10159#[cfg_attr(test, assert_instr(vcvtps2dq))]
10160pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10161    let convert = _mm_cvtps_epi32(a);
10162    transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10163}
10164
10165/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10166///    
10167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10168#[inline]
10169#[target_feature(enable = "avx512f")]
10170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10171#[cfg_attr(test, assert_instr(vcvtps2udq))]
10172pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10173    transmute(vcvtps2udq(
10174        a.as_f32x16(),
10175        u32x16::ZERO,
10176        0b11111111_11111111,
10177        _MM_FROUND_CUR_DIRECTION,
10178    ))
10179}
10180
10181/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10182///    
10183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10184#[inline]
10185#[target_feature(enable = "avx512f")]
10186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10187#[cfg_attr(test, assert_instr(vcvtps2udq))]
10188pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10189    transmute(vcvtps2udq(
10190        a.as_f32x16(),
10191        src.as_u32x16(),
10192        k,
10193        _MM_FROUND_CUR_DIRECTION,
10194    ))
10195}
10196
10197/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10198///    
10199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10200#[inline]
10201#[target_feature(enable = "avx512f")]
10202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10203#[cfg_attr(test, assert_instr(vcvtps2udq))]
10204pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10205    transmute(vcvtps2udq(
10206        a.as_f32x16(),
10207        u32x16::ZERO,
10208        k,
10209        _MM_FROUND_CUR_DIRECTION,
10210    ))
10211}
10212
10213/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10214///    
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10216#[inline]
10217#[target_feature(enable = "avx512f,avx512vl")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vcvtps2udq))]
10220pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10221    transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111))
10222}
10223
10224/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10225///    
10226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10227#[inline]
10228#[target_feature(enable = "avx512f,avx512vl")]
10229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10230#[cfg_attr(test, assert_instr(vcvtps2udq))]
10231pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10232    transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k))
10233}
10234
10235/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10236///    
10237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10238#[inline]
10239#[target_feature(enable = "avx512f,avx512vl")]
10240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10241#[cfg_attr(test, assert_instr(vcvtps2udq))]
10242pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
10243    transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k))
10244}
10245
10246/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10247///    
10248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
10249#[inline]
10250#[target_feature(enable = "avx512f,avx512vl")]
10251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10252#[cfg_attr(test, assert_instr(vcvtps2udq))]
10253pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i {
10254    transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111))
10255}
10256
10257/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10258///    
10259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
10260#[inline]
10261#[target_feature(enable = "avx512f,avx512vl")]
10262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10263#[cfg_attr(test, assert_instr(vcvtps2udq))]
10264pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10265    transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k))
10266}
10267
10268/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10269///    
10270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
10271#[inline]
10272#[target_feature(enable = "avx512f,avx512vl")]
10273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10274#[cfg_attr(test, assert_instr(vcvtps2udq))]
10275pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
10276    transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k))
10277}
10278
10279/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
10280///
10281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
10282#[inline]
10283#[target_feature(enable = "avx512f")]
10284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10285#[cfg_attr(test, assert_instr(vcvtps2pd))]
10286pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
10287    transmute(vcvtps2pd(
10288        a.as_f32x8(),
10289        f64x8::ZERO,
10290        0b11111111,
10291        _MM_FROUND_CUR_DIRECTION,
10292    ))
10293}
10294
10295/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10296///
10297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
10298#[inline]
10299#[target_feature(enable = "avx512f")]
10300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10301#[cfg_attr(test, assert_instr(vcvtps2pd))]
10302pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
10303    transmute(vcvtps2pd(
10304        a.as_f32x8(),
10305        src.as_f64x8(),
10306        k,
10307        _MM_FROUND_CUR_DIRECTION,
10308    ))
10309}
10310
10311/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10312///
10313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
10314#[inline]
10315#[target_feature(enable = "avx512f")]
10316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10317#[cfg_attr(test, assert_instr(vcvtps2pd))]
10318pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
10319    transmute(vcvtps2pd(
10320        a.as_f32x8(),
10321        f64x8::ZERO,
10322        k,
10323        _MM_FROUND_CUR_DIRECTION,
10324    ))
10325}
10326
10327/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333#[cfg_attr(test, assert_instr(vcvtps2pd))]
10334pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
10335    transmute(vcvtps2pd(
10336        _mm512_castps512_ps256(v2).as_f32x8(),
10337        f64x8::ZERO,
10338        0b11111111,
10339        _MM_FROUND_CUR_DIRECTION,
10340    ))
10341}
10342
10343/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10344///
10345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
10346#[inline]
10347#[target_feature(enable = "avx512f")]
10348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10349#[cfg_attr(test, assert_instr(vcvtps2pd))]
10350pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
10351    transmute(vcvtps2pd(
10352        _mm512_castps512_ps256(v2).as_f32x8(),
10353        src.as_f64x8(),
10354        k,
10355        _MM_FROUND_CUR_DIRECTION,
10356    ))
10357}
10358
10359/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
10360///
10361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
10362#[inline]
10363#[target_feature(enable = "avx512f")]
10364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10365#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10366pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
10367    transmute(vcvtpd2ps(
10368        a.as_f64x8(),
10369        f32x8::ZERO,
10370        0b11111111,
10371        _MM_FROUND_CUR_DIRECTION,
10372    ))
10373}
10374
10375/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10376///
10377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
10378#[inline]
10379#[target_feature(enable = "avx512f")]
10380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10381#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10382pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
10383    transmute(vcvtpd2ps(
10384        a.as_f64x8(),
10385        src.as_f32x8(),
10386        k,
10387        _MM_FROUND_CUR_DIRECTION,
10388    ))
10389}
10390
10391/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10392///
10393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
10394#[inline]
10395#[target_feature(enable = "avx512f")]
10396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10397#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10398pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
10399    transmute(vcvtpd2ps(
10400        a.as_f64x8(),
10401        f32x8::ZERO,
10402        k,
10403        _MM_FROUND_CUR_DIRECTION,
10404    ))
10405}
10406
10407/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10408///
10409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
10410#[inline]
10411#[target_feature(enable = "avx512f,avx512vl")]
10412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10413#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10414pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
10415    let convert = _mm256_cvtpd_ps(a);
10416    transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
10417}
10418
10419/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10420///
10421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
10422#[inline]
10423#[target_feature(enable = "avx512f,avx512vl")]
10424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10425#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10426pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
10427    let convert = _mm256_cvtpd_ps(a);
10428    transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
10429}
10430
10431/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10432///
10433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
10434#[inline]
10435#[target_feature(enable = "avx512f,avx512vl")]
10436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10437#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10438pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
10439    let convert = _mm_cvtpd_ps(a);
10440    transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
10441}
10442
10443/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10444///
10445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
10446#[inline]
10447#[target_feature(enable = "avx512f,avx512vl")]
10448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10449#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10450pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
10451    let convert = _mm_cvtpd_ps(a);
10452    transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
10453}
10454
10455/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10456///
10457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
10458#[inline]
10459#[target_feature(enable = "avx512f")]
10460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10461#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10462pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
10463    transmute(vcvtpd2dq(
10464        a.as_f64x8(),
10465        i32x8::ZERO,
10466        0b11111111,
10467        _MM_FROUND_CUR_DIRECTION,
10468    ))
10469}
10470
10471/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10472///
10473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
10474#[inline]
10475#[target_feature(enable = "avx512f")]
10476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10477#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10478pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
10479    transmute(vcvtpd2dq(
10480        a.as_f64x8(),
10481        src.as_i32x8(),
10482        k,
10483        _MM_FROUND_CUR_DIRECTION,
10484    ))
10485}
10486
10487/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10488///
10489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
10490#[inline]
10491#[target_feature(enable = "avx512f")]
10492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10493#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10494pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
10495    transmute(vcvtpd2dq(
10496        a.as_f64x8(),
10497        i32x8::ZERO,
10498        k,
10499        _MM_FROUND_CUR_DIRECTION,
10500    ))
10501}
10502
10503/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10504///
10505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
10506#[inline]
10507#[target_feature(enable = "avx512f,avx512vl")]
10508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10509#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10510pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
10511    let convert = _mm256_cvtpd_epi32(a);
10512    transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10513}
10514
10515/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10516///
10517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
10518#[inline]
10519#[target_feature(enable = "avx512f,avx512vl")]
10520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10521#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10522pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
10523    let convert = _mm256_cvtpd_epi32(a);
10524    transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10525}
10526
10527/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10528///
10529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
10530#[inline]
10531#[target_feature(enable = "avx512f,avx512vl")]
10532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10533#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10534pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
10535    let convert = _mm_cvtpd_epi32(a);
10536    transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
10537}
10538
10539/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10540///
10541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
10542#[inline]
10543#[target_feature(enable = "avx512f,avx512vl")]
10544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10545#[cfg_attr(test, assert_instr(vcvtpd2dq))]
10546pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
10547    let convert = _mm_cvtpd_epi32(a);
10548    transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
10549}
10550
10551/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10552///
10553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
10554#[inline]
10555#[target_feature(enable = "avx512f")]
10556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10557#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10558pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
10559    transmute(vcvtpd2udq(
10560        a.as_f64x8(),
10561        u32x8::ZERO,
10562        0b11111111,
10563        _MM_FROUND_CUR_DIRECTION,
10564    ))
10565}
10566
10567/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10568///
10569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
10570#[inline]
10571#[target_feature(enable = "avx512f")]
10572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10573#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10574pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
10575    transmute(vcvtpd2udq(
10576        a.as_f64x8(),
10577        src.as_u32x8(),
10578        k,
10579        _MM_FROUND_CUR_DIRECTION,
10580    ))
10581}
10582
10583/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10584///
10585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
10586#[inline]
10587#[target_feature(enable = "avx512f")]
10588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10589#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10590pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
10591    transmute(vcvtpd2udq(
10592        a.as_f64x8(),
10593        u32x8::ZERO,
10594        k,
10595        _MM_FROUND_CUR_DIRECTION,
10596    ))
10597}
10598
10599/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10600///
10601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
10602#[inline]
10603#[target_feature(enable = "avx512f,avx512vl")]
10604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10605#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10606pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
10607    transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111))
10608}
10609
10610/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10611///
10612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
10613#[inline]
10614#[target_feature(enable = "avx512f,avx512vl")]
10615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10616#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10617pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
10618    transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k))
10619}
10620
10621/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10622///
10623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
10624#[inline]
10625#[target_feature(enable = "avx512f,avx512vl")]
10626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10627#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10628pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
10629    transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k))
10630}
10631
10632/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10633///
10634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
10635#[inline]
10636#[target_feature(enable = "avx512f,avx512vl")]
10637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10638#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10639pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
10640    transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111))
10641}
10642
10643/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10644///
10645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
10646#[inline]
10647#[target_feature(enable = "avx512f,avx512vl")]
10648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10649#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10650pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
10651    transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k))
10652}
10653
10654/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10655///
10656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
10657#[inline]
10658#[target_feature(enable = "avx512f,avx512vl")]
10659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10660#[cfg_attr(test, assert_instr(vcvtpd2udq))]
10661pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
10662    transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k))
10663}
10664
10665/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
10666///
10667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
10668#[inline]
10669#[target_feature(enable = "avx512f")]
10670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10671#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10672pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
10673    let r: f32x8 = vcvtpd2ps(
10674        v2.as_f64x8(),
10675        f32x8::ZERO,
10676        0b11111111,
10677        _MM_FROUND_CUR_DIRECTION,
10678    );
10679    simd_shuffle!(
10680        r,
10681        f32x8::ZERO,
10682        [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
10683    )
10684}
10685
10686/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
10687///
10688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
10689#[inline]
10690#[target_feature(enable = "avx512f")]
10691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10692#[cfg_attr(test, assert_instr(vcvtpd2ps))]
10693pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
10694    let r: f32x8 = vcvtpd2ps(
10695        v2.as_f64x8(),
10696        _mm512_castps512_ps256(src).as_f32x8(),
10697        k,
10698        _MM_FROUND_CUR_DIRECTION,
10699    );
10700    simd_shuffle!(
10701        r,
10702        f32x8::ZERO,
10703        [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
10704    )
10705}
10706
10707/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713#[cfg_attr(test, assert_instr(vpmovsxbd))]
10714pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
10715    let a = a.as_i8x16();
10716    transmute::<i32x16, _>(simd_cast(a))
10717}
10718
10719/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10720///
10721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
10722#[inline]
10723#[target_feature(enable = "avx512f")]
10724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10725#[cfg_attr(test, assert_instr(vpmovsxbd))]
10726pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
10727    let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
10728    transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
10729}
10730
10731/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10732///
10733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
10734#[inline]
10735#[target_feature(enable = "avx512f")]
10736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10737#[cfg_attr(test, assert_instr(vpmovsxbd))]
10738pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
10739    let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
10740    transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
10741}
10742
10743/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10744///
10745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
10746#[inline]
10747#[target_feature(enable = "avx512f,avx512vl")]
10748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10749#[cfg_attr(test, assert_instr(vpmovsxbd))]
10750pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
10751    let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
10752    transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
10753}
10754
10755/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10756///
10757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
10758#[inline]
10759#[target_feature(enable = "avx512f,avx512vl")]
10760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10761#[cfg_attr(test, assert_instr(vpmovsxbd))]
10762pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
10763    let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
10764    transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
10765}
10766
10767/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10768///
10769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
10770#[inline]
10771#[target_feature(enable = "avx512f,avx512vl")]
10772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10773#[cfg_attr(test, assert_instr(vpmovsxbd))]
10774pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10775    let convert = _mm_cvtepi8_epi32(a).as_i32x4();
10776    transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
10777}
10778
10779/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10780///
10781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
10782#[inline]
10783#[target_feature(enable = "avx512f,avx512vl")]
10784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10785#[cfg_attr(test, assert_instr(vpmovsxbd))]
10786pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
10787    let convert = _mm_cvtepi8_epi32(a).as_i32x4();
10788    transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
10789}
10790
10791/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
10792///
10793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
10794#[inline]
10795#[target_feature(enable = "avx512f")]
10796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10797#[cfg_attr(test, assert_instr(vpmovsxbq))]
10798pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
10799    let a = a.as_i8x16();
10800    let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
10801    transmute::<i64x8, _>(simd_cast(v64))
10802}
10803
10804/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10805///
10806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
10807#[inline]
10808#[target_feature(enable = "avx512f")]
10809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10810#[cfg_attr(test, assert_instr(vpmovsxbq))]
10811pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
10812    let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
10813    transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
10814}
10815
10816/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10817///
10818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
10819#[inline]
10820#[target_feature(enable = "avx512f")]
10821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10822#[cfg_attr(test, assert_instr(vpmovsxbq))]
10823pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
10824    let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
10825    transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
10826}
10827
10828/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10829///
10830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
10831#[inline]
10832#[target_feature(enable = "avx512f,avx512vl")]
10833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10834#[cfg_attr(test, assert_instr(vpmovsxbq))]
10835pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
10836    let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
10837    transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
10838}
10839
10840/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10841///
10842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
10843#[inline]
10844#[target_feature(enable = "avx512f,avx512vl")]
10845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10846#[cfg_attr(test, assert_instr(vpmovsxbq))]
10847pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
10848    let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
10849    transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
10850}
10851
10852/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10853///
10854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
10855#[inline]
10856#[target_feature(enable = "avx512f,avx512vl")]
10857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10858#[cfg_attr(test, assert_instr(vpmovsxbq))]
10859pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10860    let convert = _mm_cvtepi8_epi64(a).as_i64x2();
10861    transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
10862}
10863
10864/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10865///
10866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
10867#[inline]
10868#[target_feature(enable = "avx512f,avx512vl")]
10869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10870#[cfg_attr(test, assert_instr(vpmovsxbq))]
10871pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
10872    let convert = _mm_cvtepi8_epi64(a).as_i64x2();
10873    transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
10874}
10875
10876/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
10877///
10878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
10879#[inline]
10880#[target_feature(enable = "avx512f")]
10881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10882#[cfg_attr(test, assert_instr(vpmovzxbd))]
10883pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
10884    let a = a.as_u8x16();
10885    transmute::<i32x16, _>(simd_cast(a))
10886}
10887
10888/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
10891#[inline]
10892#[target_feature(enable = "avx512f")]
10893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894#[cfg_attr(test, assert_instr(vpmovzxbd))]
10895pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
10896    let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
10897    transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
10898}
10899
10900/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10901///
10902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
10903#[inline]
10904#[target_feature(enable = "avx512f")]
10905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10906#[cfg_attr(test, assert_instr(vpmovzxbd))]
10907pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
10908    let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
10909    transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
10910}
10911
10912/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10913///
10914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
10915#[inline]
10916#[target_feature(enable = "avx512f,avx512vl")]
10917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10918#[cfg_attr(test, assert_instr(vpmovzxbd))]
10919pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
10920    let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
10921    transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
10922}
10923
10924/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10925///
10926/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
10927#[inline]
10928#[target_feature(enable = "avx512f,avx512vl")]
10929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10930#[cfg_attr(test, assert_instr(vpmovzxbd))]
10931pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
10932    let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
10933    transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
10934}
10935
10936/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10937///
10938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
10939#[inline]
10940#[target_feature(enable = "avx512f,avx512vl")]
10941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10942#[cfg_attr(test, assert_instr(vpmovzxbd))]
10943pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10944    let convert = _mm_cvtepu8_epi32(a).as_i32x4();
10945    transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
10946}
10947
10948/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10949///
10950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
10951#[inline]
10952#[target_feature(enable = "avx512f,avx512vl")]
10953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10954#[cfg_attr(test, assert_instr(vpmovzxbd))]
10955pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
10956    let convert = _mm_cvtepu8_epi32(a).as_i32x4();
10957    transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
10958}
10959
10960/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
10961///
10962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
10963#[inline]
10964#[target_feature(enable = "avx512f")]
10965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10966#[cfg_attr(test, assert_instr(vpmovzxbq))]
10967pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
10968    let a = a.as_u8x16();
10969    let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
10970    transmute::<i64x8, _>(simd_cast(v64))
10971}
10972
10973/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10974///
10975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
10976#[inline]
10977#[target_feature(enable = "avx512f")]
10978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10979#[cfg_attr(test, assert_instr(vpmovzxbq))]
10980pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
10981    let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
10982    transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
10983}
10984
10985/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10986///
10987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
10988#[inline]
10989#[target_feature(enable = "avx512f")]
10990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10991#[cfg_attr(test, assert_instr(vpmovzxbq))]
10992pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
10993    let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
10994    transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
10995}
10996
10997/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10998///
10999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11000#[inline]
11001#[target_feature(enable = "avx512f,avx512vl")]
11002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11003#[cfg_attr(test, assert_instr(vpmovzxbq))]
11004pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11005    let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11006    transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11007}
11008
11009/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11010///
11011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11012#[inline]
11013#[target_feature(enable = "avx512f,avx512vl")]
11014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11015#[cfg_attr(test, assert_instr(vpmovzxbq))]
11016pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11017    let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
11018    transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11019}
11020
11021/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11022///
11023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11024#[inline]
11025#[target_feature(enable = "avx512f,avx512vl")]
11026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11027#[cfg_attr(test, assert_instr(vpmovzxbq))]
11028pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11029    let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11030    transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11031}
11032
11033/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11034///
11035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11036#[inline]
11037#[target_feature(enable = "avx512f,avx512vl")]
11038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11039#[cfg_attr(test, assert_instr(vpmovzxbq))]
11040pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11041    let convert = _mm_cvtepu8_epi64(a).as_i64x2();
11042    transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11043}
11044
11045/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11046///
11047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11048#[inline]
11049#[target_feature(enable = "avx512f")]
11050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11051#[cfg_attr(test, assert_instr(vpmovsxwd))]
11052pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11053    let a = a.as_i16x16();
11054    transmute::<i32x16, _>(simd_cast(a))
11055}
11056
11057/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11058///
11059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11060#[inline]
11061#[target_feature(enable = "avx512f")]
11062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11063#[cfg_attr(test, assert_instr(vpmovsxwd))]
11064pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11065    let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11066    transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11067}
11068
11069/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11070///
11071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11072#[inline]
11073#[target_feature(enable = "avx512f")]
11074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11075#[cfg_attr(test, assert_instr(vpmovsxwd))]
11076pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11077    let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
11078    transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11079}
11080
11081/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11082///
11083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11084#[inline]
11085#[target_feature(enable = "avx512f,avx512vl")]
11086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11087#[cfg_attr(test, assert_instr(vpmovsxwd))]
11088pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11089    let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11090    transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11091}
11092
11093/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11094///
11095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11096#[inline]
11097#[target_feature(enable = "avx512f,avx512vl")]
11098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11099#[cfg_attr(test, assert_instr(vpmovsxwd))]
11100pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11101    let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
11102    transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11103}
11104
11105/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106///
11107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11108#[inline]
11109#[target_feature(enable = "avx512f,avx512vl")]
11110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11111#[cfg_attr(test, assert_instr(vpmovsxwd))]
11112pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11113    let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11114    transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11115}
11116
11117/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11118///
11119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11120#[inline]
11121#[target_feature(enable = "avx512f,avx512vl")]
11122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11123#[cfg_attr(test, assert_instr(vpmovsxwd))]
11124pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11125    let convert = _mm_cvtepi16_epi32(a).as_i32x4();
11126    transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11127}
11128
11129/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
11130///
11131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
11132#[inline]
11133#[target_feature(enable = "avx512f")]
11134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11135#[cfg_attr(test, assert_instr(vpmovsxwq))]
11136pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
11137    let a = a.as_i16x8();
11138    transmute::<i64x8, _>(simd_cast(a))
11139}
11140
11141/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11142///
11143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
11144#[inline]
11145#[target_feature(enable = "avx512f")]
11146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11147#[cfg_attr(test, assert_instr(vpmovsxwq))]
11148pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11149    let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
11150    transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11151}
11152
11153/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11154///
11155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
11156#[inline]
11157#[target_feature(enable = "avx512f")]
11158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11159#[cfg_attr(test, assert_instr(vpmovsxwq))]
11160pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
11161    let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
11162    transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11163}
11164
11165/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11166///
11167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
11168#[inline]
11169#[target_feature(enable = "avx512f,avx512vl")]
11170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11171#[cfg_attr(test, assert_instr(vpmovsxwq))]
11172pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11173    let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
11174    transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11175}
11176
11177/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11178///
11179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
11180#[inline]
11181#[target_feature(enable = "avx512f,avx512vl")]
11182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11183#[cfg_attr(test, assert_instr(vpmovsxwq))]
11184pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
11185    let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
11186    transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11187}
11188
11189/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11190///
11191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
11192#[inline]
11193#[target_feature(enable = "avx512f,avx512vl")]
11194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11195#[cfg_attr(test, assert_instr(vpmovsxwq))]
11196pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11197    let convert = _mm_cvtepi16_epi64(a).as_i64x2();
11198    transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11199}
11200
11201/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11202///
11203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
11204#[inline]
11205#[target_feature(enable = "avx512f,avx512vl")]
11206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11207#[cfg_attr(test, assert_instr(vpmovsxwq))]
11208pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
11209    let convert = _mm_cvtepi16_epi64(a).as_i64x2();
11210    transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11211}
11212
11213/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11214///
11215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
11216#[inline]
11217#[target_feature(enable = "avx512f")]
11218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11219#[cfg_attr(test, assert_instr(vpmovzxwd))]
11220pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
11221    let a = a.as_u16x16();
11222    transmute::<i32x16, _>(simd_cast(a))
11223}
11224
11225/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11226///
11227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
11228#[inline]
11229#[target_feature(enable = "avx512f")]
11230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11231#[cfg_attr(test, assert_instr(vpmovzxwd))]
11232pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11233    let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
11234    transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11235}
11236
11237/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11238///
11239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
11240#[inline]
11241#[target_feature(enable = "avx512f")]
11242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11243#[cfg_attr(test, assert_instr(vpmovzxwd))]
11244pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11245    let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
11246    transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11247}
11248
11249/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11250///
11251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
11252#[inline]
11253#[target_feature(enable = "avx512f,avx512vl")]
11254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11255#[cfg_attr(test, assert_instr(vpmovzxwd))]
11256pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11257    let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
11258    transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11259}
11260
11261/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11262///
11263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
11264#[inline]
11265#[target_feature(enable = "avx512f,avx512vl")]
11266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11267#[cfg_attr(test, assert_instr(vpmovzxwd))]
11268pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11269    let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
11270    transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11271}
11272
11273/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11274///
11275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
11276#[inline]
11277#[target_feature(enable = "avx512f,avx512vl")]
11278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11279#[cfg_attr(test, assert_instr(vpmovzxwd))]
11280pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11281    let convert = _mm_cvtepu16_epi32(a).as_i32x4();
11282    transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11283}
11284
11285/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11286///
11287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
11288#[inline]
11289#[target_feature(enable = "avx512f,avx512vl")]
11290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11291#[cfg_attr(test, assert_instr(vpmovzxwd))]
11292pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11293    let convert = _mm_cvtepu16_epi32(a).as_i32x4();
11294    transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11295}
11296
11297/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
11298///
11299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
11300#[inline]
11301#[target_feature(enable = "avx512f")]
11302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11303#[cfg_attr(test, assert_instr(vpmovzxwq))]
11304pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
11305    let a = a.as_u16x8();
11306    transmute::<i64x8, _>(simd_cast(a))
11307}
11308
11309/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11310///
11311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
11312#[inline]
11313#[target_feature(enable = "avx512f")]
11314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11315#[cfg_attr(test, assert_instr(vpmovzxwq))]
11316pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11317    let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
11318    transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11319}
11320
11321/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11322///
11323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
11324#[inline]
11325#[target_feature(enable = "avx512f")]
11326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11327#[cfg_attr(test, assert_instr(vpmovzxwq))]
11328pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
11329    let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
11330    transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11331}
11332
11333/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11334///
11335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
11336#[inline]
11337#[target_feature(enable = "avx512f,avx512vl")]
11338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11339#[cfg_attr(test, assert_instr(vpmovzxwq))]
11340pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11341    let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
11342    transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11343}
11344
11345/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11346///
11347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
11348#[inline]
11349#[target_feature(enable = "avx512f,avx512vl")]
11350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11351#[cfg_attr(test, assert_instr(vpmovzxwq))]
11352pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
11353    let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
11354    transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11355}
11356
11357/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11358///
11359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
11360#[inline]
11361#[target_feature(enable = "avx512f,avx512vl")]
11362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11363#[cfg_attr(test, assert_instr(vpmovzxwq))]
11364pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11365    let convert = _mm_cvtepu16_epi64(a).as_i64x2();
11366    transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11367}
11368
11369/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11370///
11371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
11372#[inline]
11373#[target_feature(enable = "avx512f,avx512vl")]
11374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11375#[cfg_attr(test, assert_instr(vpmovzxwq))]
11376pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
11377    let convert = _mm_cvtepu16_epi64(a).as_i64x2();
11378    transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11379}
11380
11381/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
11382///
11383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
11384#[inline]
11385#[target_feature(enable = "avx512f")]
11386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11387#[cfg_attr(test, assert_instr(vpmovsxdq))]
11388pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
11389    let a = a.as_i32x8();
11390    transmute::<i64x8, _>(simd_cast(a))
11391}
11392
11393/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11394///
11395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
11396#[inline]
11397#[target_feature(enable = "avx512f")]
11398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11399#[cfg_attr(test, assert_instr(vpmovsxdq))]
11400pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
11401    let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
11402    transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11403}
11404
11405/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11406///
11407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
11408#[inline]
11409#[target_feature(enable = "avx512f")]
11410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11411#[cfg_attr(test, assert_instr(vpmovsxdq))]
11412pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
11413    let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
11414    transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11415}
11416
11417/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11418///
11419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
11420#[inline]
11421#[target_feature(enable = "avx512f,avx512vl")]
11422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11423#[cfg_attr(test, assert_instr(vpmovsxdq))]
11424pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11425    let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
11426    transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11427}
11428
11429/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11430///
11431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
11432#[inline]
11433#[target_feature(enable = "avx512f,avx512vl")]
11434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11435#[cfg_attr(test, assert_instr(vpmovsxdq))]
11436pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
11437    let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
11438    transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11439}
11440
11441/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11442///
11443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
11444#[inline]
11445#[target_feature(enable = "avx512f,avx512vl")]
11446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11447#[cfg_attr(test, assert_instr(vpmovsxdq))]
11448pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11449    let convert = _mm_cvtepi32_epi64(a).as_i64x2();
11450    transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11451}
11452
11453/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11454///
11455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
11456#[inline]
11457#[target_feature(enable = "avx512f,avx512vl")]
11458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11459#[cfg_attr(test, assert_instr(vpmovsxdq))]
11460pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
11461    let convert = _mm_cvtepi32_epi64(a).as_i64x2();
11462    transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11463}
11464
11465/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
11466///
11467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
11468#[inline]
11469#[target_feature(enable = "avx512f")]
11470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11471#[cfg_attr(test, assert_instr(vpmovzxdq))]
11472pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
11473    let a = a.as_u32x8();
11474    transmute::<i64x8, _>(simd_cast(a))
11475}
11476
11477/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11478///
11479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
11480#[inline]
11481#[target_feature(enable = "avx512f")]
11482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11483#[cfg_attr(test, assert_instr(vpmovzxdq))]
11484pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
11485    let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
11486    transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11487}
11488
11489/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495#[cfg_attr(test, assert_instr(vpmovzxdq))]
11496pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
11497    let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
11498    transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11499}
11500
11501/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11502///
11503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
11504#[inline]
11505#[target_feature(enable = "avx512f,avx512vl")]
11506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11507#[cfg_attr(test, assert_instr(vpmovzxdq))]
11508pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11509    let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
11510    transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11511}
11512
11513/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11514///
11515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
11516#[inline]
11517#[target_feature(enable = "avx512f,avx512vl")]
11518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11519#[cfg_attr(test, assert_instr(vpmovzxdq))]
11520pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
11521    let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
11522    transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11523}
11524
11525/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11526///
11527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
11528#[inline]
11529#[target_feature(enable = "avx512f,avx512vl")]
11530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11531#[cfg_attr(test, assert_instr(vpmovzxdq))]
11532pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11533    let convert = _mm_cvtepu32_epi64(a).as_i64x2();
11534    transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11535}
11536
11537/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11538///
11539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
11540#[inline]
11541#[target_feature(enable = "avx512f,avx512vl")]
11542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11543#[cfg_attr(test, assert_instr(vpmovzxdq))]
11544pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
11545    let convert = _mm_cvtepu32_epi64(a).as_i64x2();
11546    transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11547}
11548
11549/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11550///
11551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
11552#[inline]
11553#[target_feature(enable = "avx512f")]
11554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11555#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11556pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
11557    let a = a.as_i32x16();
11558    transmute::<f32x16, _>(simd_cast(a))
11559}
11560
11561/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11562///
11563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
11564#[inline]
11565#[target_feature(enable = "avx512f")]
11566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11567#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11568pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
11569    let convert = _mm512_cvtepi32_ps(a).as_f32x16();
11570    transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
11571}
11572
11573/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11574///
11575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
11576#[inline]
11577#[target_feature(enable = "avx512f")]
11578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11579#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11580pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
11581    let convert = _mm512_cvtepi32_ps(a).as_f32x16();
11582    transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
11583}
11584
11585/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11586///
11587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
11588#[inline]
11589#[target_feature(enable = "avx512f,avx512vl")]
11590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11591#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11592pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
11593    let convert = _mm256_cvtepi32_ps(a).as_f32x8();
11594    transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
11595}
11596
11597/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11598///
11599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
11600#[inline]
11601#[target_feature(enable = "avx512f,avx512vl")]
11602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11603#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11604pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
11605    let convert = _mm256_cvtepi32_ps(a).as_f32x8();
11606    transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
11607}
11608
11609/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11610///
11611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
11612#[inline]
11613#[target_feature(enable = "avx512f,avx512vl")]
11614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11615#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11616pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
11617    let convert = _mm_cvtepi32_ps(a).as_f32x4();
11618    transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
11619}
11620
11621/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11622///
11623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
11624#[inline]
11625#[target_feature(enable = "avx512f,avx512vl")]
11626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11627#[cfg_attr(test, assert_instr(vcvtdq2ps))]
11628pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
11629    let convert = _mm_cvtepi32_ps(a).as_f32x4();
11630    transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
11631}
11632
11633/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11634///
11635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
11636#[inline]
11637#[target_feature(enable = "avx512f")]
11638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11639#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11640pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
11641    let a = a.as_i32x8();
11642    transmute::<f64x8, _>(simd_cast(a))
11643}
11644
11645/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11646///
11647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
11648#[inline]
11649#[target_feature(enable = "avx512f")]
11650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11651#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11652pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
11653    let convert = _mm512_cvtepi32_pd(a).as_f64x8();
11654    transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
11655}
11656
11657/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11658///
11659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
11660#[inline]
11661#[target_feature(enable = "avx512f")]
11662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11663#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11664pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
11665    let convert = _mm512_cvtepi32_pd(a).as_f64x8();
11666    transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
11667}
11668
11669/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11670///
11671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
11672#[inline]
11673#[target_feature(enable = "avx512f,avx512vl")]
11674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11675#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11676pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
11677    let convert = _mm256_cvtepi32_pd(a).as_f64x4();
11678    transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
11679}
11680
11681/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11688pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
11689    let convert = _mm256_cvtepi32_pd(a).as_f64x4();
11690    transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
11691}
11692
11693/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11694///
11695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
11696#[inline]
11697#[target_feature(enable = "avx512f,avx512vl")]
11698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11699#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11700pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
11701    let convert = _mm_cvtepi32_pd(a).as_f64x2();
11702    transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
11703}
11704
11705/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11706///
11707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
11708#[inline]
11709#[target_feature(enable = "avx512f,avx512vl")]
11710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11711#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11712pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
11713    let convert = _mm_cvtepi32_pd(a).as_f64x2();
11714    transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
11715}
11716
11717/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11718///
11719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
11720#[inline]
11721#[target_feature(enable = "avx512f")]
11722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11723#[cfg_attr(test, assert_instr(vcvtudq2ps))]
11724pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
11725    let a = a.as_u32x16();
11726    transmute::<f32x16, _>(simd_cast(a))
11727}
11728
11729/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11730///
11731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
11732#[inline]
11733#[target_feature(enable = "avx512f")]
11734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11735#[cfg_attr(test, assert_instr(vcvtudq2ps))]
11736pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
11737    let convert = _mm512_cvtepu32_ps(a).as_f32x16();
11738    transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
11739}
11740
11741/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11742///
11743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
11744#[inline]
11745#[target_feature(enable = "avx512f")]
11746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11747#[cfg_attr(test, assert_instr(vcvtudq2ps))]
11748pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
11749    let convert = _mm512_cvtepu32_ps(a).as_f32x16();
11750    transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
11751}
11752
11753/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11754///
11755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
11756#[inline]
11757#[target_feature(enable = "avx512f")]
11758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11759#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11760pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
11761    let a = a.as_u32x8();
11762    transmute::<f64x8, _>(simd_cast(a))
11763}
11764
11765/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
11768#[inline]
11769#[target_feature(enable = "avx512f")]
11770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11772pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
11773    let convert = _mm512_cvtepu32_pd(a).as_f64x8();
11774    transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
11775}
11776
11777/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11778///
11779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
11780#[inline]
11781#[target_feature(enable = "avx512f")]
11782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11783#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11784pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
11785    let convert = _mm512_cvtepu32_pd(a).as_f64x8();
11786    transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
11787}
11788
11789/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11790///
11791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
11792#[inline]
11793#[target_feature(enable = "avx512f,avx512vl")]
11794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11795#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11796pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
11797    let a = a.as_u32x4();
11798    transmute::<f64x4, _>(simd_cast(a))
11799}
11800
11801/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11802///
11803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
11804#[inline]
11805#[target_feature(enable = "avx512f,avx512vl")]
11806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11807#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11808pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
11809    let convert = _mm256_cvtepu32_pd(a).as_f64x4();
11810    transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
11811}
11812
11813/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11814///
11815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
11816#[inline]
11817#[target_feature(enable = "avx512f,avx512vl")]
11818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11819#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11820pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
11821    let convert = _mm256_cvtepu32_pd(a).as_f64x4();
11822    transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
11823}
11824
11825/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11826///
11827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
11828#[inline]
11829#[target_feature(enable = "avx512f,avx512vl")]
11830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11831#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11832pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
11833    let a = a.as_u32x4();
11834    let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
11835    transmute::<f64x2, _>(simd_cast(u64))
11836}
11837
11838/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11839///
11840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
11841#[inline]
11842#[target_feature(enable = "avx512f,avx512vl")]
11843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11844#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11845pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
11846    let convert = _mm_cvtepu32_pd(a).as_f64x2();
11847    transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
11848}
11849
11850/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11857pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
11858    let convert = _mm_cvtepu32_pd(a).as_f64x2();
11859    transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
11860}
11861
11862/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11863///
11864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
11865#[inline]
11866#[target_feature(enable = "avx512f")]
11867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11868#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11869pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
11870    let v2 = v2.as_i32x16();
11871    let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
11872    transmute::<f64x8, _>(simd_cast(v256))
11873}
11874
11875/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11876///
11877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
11878#[inline]
11879#[target_feature(enable = "avx512f")]
11880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11881#[cfg_attr(test, assert_instr(vcvtdq2pd))]
11882pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
11883    let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
11884    transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
11885}
11886
11887/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11888///
11889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
11890#[inline]
11891#[target_feature(enable = "avx512f")]
11892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11893#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11894pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
11895    let v2 = v2.as_u32x16();
11896    let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
11897    transmute::<f64x8, _>(simd_cast(v256))
11898}
11899
11900/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11901///
11902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
11903#[inline]
11904#[target_feature(enable = "avx512f")]
11905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11906#[cfg_attr(test, assert_instr(vcvtudq2pd))]
11907pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
11908    let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
11909    transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
11910}
11911
11912/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
11913///
11914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
11915#[inline]
11916#[target_feature(enable = "avx512f")]
11917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11918#[cfg_attr(test, assert_instr(vpmovdw))]
11919pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
11920    let a = a.as_i32x16();
11921    transmute::<i16x16, _>(simd_cast(a))
11922}
11923
11924/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11925///
11926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
11927#[inline]
11928#[target_feature(enable = "avx512f")]
11929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11930#[cfg_attr(test, assert_instr(vpmovdw))]
11931pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
11932    let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
11933    transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
11934}
11935
11936/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11937///
11938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
11939#[inline]
11940#[target_feature(enable = "avx512f")]
11941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11942#[cfg_attr(test, assert_instr(vpmovdw))]
11943pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
11944    let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
11945    transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
11946}
11947
11948/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954#[cfg_attr(test, assert_instr(vpmovdw))]
11955pub unsafe fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
11956    let a = a.as_i32x8();
11957    transmute::<i16x8, _>(simd_cast(a))
11958}
11959
11960/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11961///
11962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
11963#[inline]
11964#[target_feature(enable = "avx512f,avx512vl")]
11965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11966#[cfg_attr(test, assert_instr(vpmovdw))]
11967pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
11968    let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
11969    transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
11970}
11971
11972/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11973///
11974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
11975#[inline]
11976#[target_feature(enable = "avx512f,avx512vl")]
11977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11978#[cfg_attr(test, assert_instr(vpmovdw))]
11979pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
11980    let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
11981    transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
11982}
11983
11984/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
11985///
11986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
11987#[inline]
11988#[target_feature(enable = "avx512f,avx512vl")]
11989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11990#[cfg_attr(test, assert_instr(vpmovdw))]
11991pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
11992    transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111))
11993}
11994
11995/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11996///
11997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
11998#[inline]
11999#[target_feature(enable = "avx512f,avx512vl")]
12000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12001#[cfg_attr(test, assert_instr(vpmovdw))]
12002pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12003    transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k))
12004}
12005
12006/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12007///
12008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
12009#[inline]
12010#[target_feature(enable = "avx512f,avx512vl")]
12011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12012#[cfg_attr(test, assert_instr(vpmovdw))]
12013pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
12014    transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k))
12015}
12016
12017/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12018///
12019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
12020#[inline]
12021#[target_feature(enable = "avx512f")]
12022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12023#[cfg_attr(test, assert_instr(vpmovdb))]
12024pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
12025    let a = a.as_i32x16();
12026    transmute::<i8x16, _>(simd_cast(a))
12027}
12028
12029/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12030///
12031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
12032#[inline]
12033#[target_feature(enable = "avx512f")]
12034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12035#[cfg_attr(test, assert_instr(vpmovdb))]
12036pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
12037    let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
12038    transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
12039}
12040
12041/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12042///
12043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
12044#[inline]
12045#[target_feature(enable = "avx512f")]
12046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12047#[cfg_attr(test, assert_instr(vpmovdb))]
12048pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
12049    let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
12050    transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
12051}
12052
12053/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12054///
12055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
12056#[inline]
12057#[target_feature(enable = "avx512f,avx512vl")]
12058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12059#[cfg_attr(test, assert_instr(vpmovdb))]
12060pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
12061    transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111))
12062}
12063
12064/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12065///
12066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
12067#[inline]
12068#[target_feature(enable = "avx512f,avx512vl")]
12069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12070#[cfg_attr(test, assert_instr(vpmovdb))]
12071pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12072    transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k))
12073}
12074
12075/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12076///
12077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
12078#[inline]
12079#[target_feature(enable = "avx512f,avx512vl")]
12080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12081#[cfg_attr(test, assert_instr(vpmovdb))]
12082pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
12083    transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k))
12084}
12085
12086/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12087///
12088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
12089#[inline]
12090#[target_feature(enable = "avx512f,avx512vl")]
12091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12092#[cfg_attr(test, assert_instr(vpmovdb))]
12093pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
12094    transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111))
12095}
12096
12097/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12098///
12099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
12100#[inline]
12101#[target_feature(enable = "avx512f,avx512vl")]
12102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12103#[cfg_attr(test, assert_instr(vpmovdb))]
12104pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12105    transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k))
12106}
12107
12108/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12109///
12110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
12111#[inline]
12112#[target_feature(enable = "avx512f,avx512vl")]
12113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12114#[cfg_attr(test, assert_instr(vpmovdb))]
12115pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
12116    transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k))
12117}
12118
12119/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12120///
12121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
12122#[inline]
12123#[target_feature(enable = "avx512f")]
12124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12125#[cfg_attr(test, assert_instr(vpmovqd))]
12126pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
12127    let a = a.as_i64x8();
12128    transmute::<i32x8, _>(simd_cast(a))
12129}
12130
12131/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12132///
12133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
12134#[inline]
12135#[target_feature(enable = "avx512f")]
12136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12137#[cfg_attr(test, assert_instr(vpmovqd))]
12138pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
12139    let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
12140    transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12141}
12142
12143/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12144///
12145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
12146#[inline]
12147#[target_feature(enable = "avx512f")]
12148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12149#[cfg_attr(test, assert_instr(vpmovqd))]
12150pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
12151    let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
12152    transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12153}
12154
12155/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12156///
12157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
12158#[inline]
12159#[target_feature(enable = "avx512f,avx512vl")]
12160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12161#[cfg_attr(test, assert_instr(vpmovqd))]
12162pub unsafe fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
12163    let a = a.as_i64x4();
12164    transmute::<i32x4, _>(simd_cast(a))
12165}
12166
12167/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12168///
12169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
12170#[inline]
12171#[target_feature(enable = "avx512f,avx512vl")]
12172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12173#[cfg_attr(test, assert_instr(vpmovqd))]
12174pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12175    let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
12176    transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12177}
12178
12179/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12180///
12181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
12182#[inline]
12183#[target_feature(enable = "avx512f,avx512vl")]
12184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12185#[cfg_attr(test, assert_instr(vpmovqd))]
12186pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
12187    let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
12188    transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12189}
12190
12191/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
12192///
12193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
12194#[inline]
12195#[target_feature(enable = "avx512f,avx512vl")]
12196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12197#[cfg_attr(test, assert_instr(vpmovqd))]
12198pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
12199    transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111))
12200}
12201
12202/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12203///
12204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
12205#[inline]
12206#[target_feature(enable = "avx512f,avx512vl")]
12207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12208#[cfg_attr(test, assert_instr(vpmovqd))]
12209pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12210    transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k))
12211}
12212
12213/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12214///
12215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
12216#[inline]
12217#[target_feature(enable = "avx512f,avx512vl")]
12218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12219#[cfg_attr(test, assert_instr(vpmovqd))]
12220pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
12221    transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k))
12222}
12223
12224/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12225///
12226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
12227#[inline]
12228#[target_feature(enable = "avx512f")]
12229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12230#[cfg_attr(test, assert_instr(vpmovqw))]
12231pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
12232    let a = a.as_i64x8();
12233    transmute::<i16x8, _>(simd_cast(a))
12234}
12235
12236/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12237///
12238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
12239#[inline]
12240#[target_feature(enable = "avx512f")]
12241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12242#[cfg_attr(test, assert_instr(vpmovqw))]
12243pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12244    let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
12245    transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
12246}
12247
12248/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12249///
12250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
12251#[inline]
12252#[target_feature(enable = "avx512f")]
12253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12254#[cfg_attr(test, assert_instr(vpmovqw))]
12255pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
12256    let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
12257    transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
12258}
12259
12260/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12261///
12262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
12263#[inline]
12264#[target_feature(enable = "avx512f,avx512vl")]
12265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12266#[cfg_attr(test, assert_instr(vpmovqw))]
12267pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
12268    transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111))
12269}
12270
12271/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12272///
12273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
12274#[inline]
12275#[target_feature(enable = "avx512f,avx512vl")]
12276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12277#[cfg_attr(test, assert_instr(vpmovqw))]
12278pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12279    transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k))
12280}
12281
12282/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12283///
12284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
12285#[inline]
12286#[target_feature(enable = "avx512f,avx512vl")]
12287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12288#[cfg_attr(test, assert_instr(vpmovqw))]
12289pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
12290    transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k))
12291}
12292
12293/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12294///
12295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
12296#[inline]
12297#[target_feature(enable = "avx512f,avx512vl")]
12298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12299#[cfg_attr(test, assert_instr(vpmovqw))]
12300pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
12301    transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111))
12302}
12303
12304/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12305///
12306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
12307#[inline]
12308#[target_feature(enable = "avx512f,avx512vl")]
12309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12310#[cfg_attr(test, assert_instr(vpmovqw))]
12311pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12312    transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k))
12313}
12314
12315/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12316///
12317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
12318#[inline]
12319#[target_feature(enable = "avx512f,avx512vl")]
12320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12321#[cfg_attr(test, assert_instr(vpmovqw))]
12322pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
12323    transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k))
12324}
12325
12326/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vpmovqb))]
12333pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
12334    transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111))
12335}
12336
12337/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12338///
12339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
12340#[inline]
12341#[target_feature(enable = "avx512f")]
12342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12343#[cfg_attr(test, assert_instr(vpmovqb))]
12344pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12345    transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k))
12346}
12347
12348/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12349///
12350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
12351#[inline]
12352#[target_feature(enable = "avx512f")]
12353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12354#[cfg_attr(test, assert_instr(vpmovqb))]
12355pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
12356    transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k))
12357}
12358
12359/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12360///
12361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
12362#[inline]
12363#[target_feature(enable = "avx512f,avx512vl")]
12364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12365#[cfg_attr(test, assert_instr(vpmovqb))]
12366pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
12367    transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111))
12368}
12369
12370/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12371///
12372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
12373#[inline]
12374#[target_feature(enable = "avx512f,avx512vl")]
12375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12376#[cfg_attr(test, assert_instr(vpmovqb))]
12377pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12378    transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k))
12379}
12380
12381/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12382///
12383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
12384#[inline]
12385#[target_feature(enable = "avx512f,avx512vl")]
12386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12387#[cfg_attr(test, assert_instr(vpmovqb))]
12388pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
12389    transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k))
12390}
12391
12392/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
12393///
12394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
12395#[inline]
12396#[target_feature(enable = "avx512f,avx512vl")]
12397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12398#[cfg_attr(test, assert_instr(vpmovqb))]
12399pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
12400    transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111))
12401}
12402
12403/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12404///
12405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
12406#[inline]
12407#[target_feature(enable = "avx512f,avx512vl")]
12408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12409#[cfg_attr(test, assert_instr(vpmovqb))]
12410pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12411    transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k))
12412}
12413
12414/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12415///
12416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
12417#[inline]
12418#[target_feature(enable = "avx512f,avx512vl")]
12419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12420#[cfg_attr(test, assert_instr(vpmovqb))]
12421pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
12422    transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k))
12423}
12424
12425/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12426///
12427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
12428#[inline]
12429#[target_feature(enable = "avx512f")]
12430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12431#[cfg_attr(test, assert_instr(vpmovsdw))]
12432pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
12433    transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111))
12434}
12435
12436/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12437///
12438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
12439#[inline]
12440#[target_feature(enable = "avx512f")]
12441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12442#[cfg_attr(test, assert_instr(vpmovsdw))]
12443pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12444    transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k))
12445}
12446
12447/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12448///
12449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
12450#[inline]
12451#[target_feature(enable = "avx512f")]
12452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12453#[cfg_attr(test, assert_instr(vpmovsdw))]
12454pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12455    transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k))
12456}
12457
12458/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12459///
12460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
12461#[inline]
12462#[target_feature(enable = "avx512f,avx512vl")]
12463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12464#[cfg_attr(test, assert_instr(vpmovsdw))]
12465pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
12466    transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111))
12467}
12468
12469/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12470///
12471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
12472#[inline]
12473#[target_feature(enable = "avx512f,avx512vl")]
12474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12475#[cfg_attr(test, assert_instr(vpmovsdw))]
12476pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12477    transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k))
12478}
12479
12480/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486#[cfg_attr(test, assert_instr(vpmovsdw))]
12487pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12488    transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k))
12489}
12490
12491/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12492///
12493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
12494#[inline]
12495#[target_feature(enable = "avx512f,avx512vl")]
12496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12497#[cfg_attr(test, assert_instr(vpmovsdw))]
12498pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
12499    transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111))
12500}
12501
12502/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12503///
12504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
12505#[inline]
12506#[target_feature(enable = "avx512f,avx512vl")]
12507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12508#[cfg_attr(test, assert_instr(vpmovsdw))]
12509pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12510    transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k))
12511}
12512
12513/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12514///
12515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
12516#[inline]
12517#[target_feature(enable = "avx512f,avx512vl")]
12518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12519#[cfg_attr(test, assert_instr(vpmovsdw))]
12520pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
12521    transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k))
12522}
12523
12524/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
12525///
12526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
12527#[inline]
12528#[target_feature(enable = "avx512f")]
12529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12530#[cfg_attr(test, assert_instr(vpmovsdb))]
12531pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
12532    transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111))
12533}
12534
12535/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12536///
12537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
12538#[inline]
12539#[target_feature(enable = "avx512f")]
12540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12541#[cfg_attr(test, assert_instr(vpmovsdb))]
12542pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
12543    transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k))
12544}
12545
12546/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12547///
12548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
12549#[inline]
12550#[target_feature(enable = "avx512f")]
12551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12552#[cfg_attr(test, assert_instr(vpmovsdb))]
12553pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
12554    transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k))
12555}
12556
12557/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
12558///
12559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
12560#[inline]
12561#[target_feature(enable = "avx512f,avx512vl")]
12562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12563#[cfg_attr(test, assert_instr(vpmovsdb))]
12564pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
12565    transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111))
12566}
12567
12568/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12569///
12570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
12571#[inline]
12572#[target_feature(enable = "avx512f,avx512vl")]
12573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12574#[cfg_attr(test, assert_instr(vpmovsdb))]
12575pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12576    transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k))
12577}
12578
12579/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12580///
12581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
12582#[inline]
12583#[target_feature(enable = "avx512f,avx512vl")]
12584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12585#[cfg_attr(test, assert_instr(vpmovsdb))]
12586pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
12587    transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k))
12588}
12589
12590/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
12591///
12592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
12593#[inline]
12594#[target_feature(enable = "avx512f,avx512vl")]
12595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12596#[cfg_attr(test, assert_instr(vpmovsdb))]
12597pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
12598    transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111))
12599}
12600
12601/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12602///
12603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
12604#[inline]
12605#[target_feature(enable = "avx512f,avx512vl")]
12606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12607#[cfg_attr(test, assert_instr(vpmovsdb))]
12608pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12609    transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k))
12610}
12611
12612/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12613///
12614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
12615#[inline]
12616#[target_feature(enable = "avx512f,avx512vl")]
12617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12618#[cfg_attr(test, assert_instr(vpmovsdb))]
12619pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
12620    transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k))
12621}
12622
12623/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
12624///
12625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
12626#[inline]
12627#[target_feature(enable = "avx512f")]
12628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12629#[cfg_attr(test, assert_instr(vpmovsqd))]
12630pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
12631    transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111))
12632}
12633
12634/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
12637#[inline]
12638#[target_feature(enable = "avx512f")]
12639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640#[cfg_attr(test, assert_instr(vpmovsqd))]
12641pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
12642    transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k))
12643}
12644
12645/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12646///
12647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
12648#[inline]
12649#[target_feature(enable = "avx512f")]
12650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12651#[cfg_attr(test, assert_instr(vpmovsqd))]
12652pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
12653    transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k))
12654}
12655
12656/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
12657///
12658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
12659#[inline]
12660#[target_feature(enable = "avx512f,avx512vl")]
12661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12662#[cfg_attr(test, assert_instr(vpmovsqd))]
12663pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
12664    transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111))
12665}
12666
12667/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12668///
12669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
12670#[inline]
12671#[target_feature(enable = "avx512f,avx512vl")]
12672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12673#[cfg_attr(test, assert_instr(vpmovsqd))]
12674pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12675    transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k))
12676}
12677
12678/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12679///
12680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
12681#[inline]
12682#[target_feature(enable = "avx512f,avx512vl")]
12683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12684#[cfg_attr(test, assert_instr(vpmovsqd))]
12685pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
12686    transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k))
12687}
12688
12689/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
12690///
12691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
12692#[inline]
12693#[target_feature(enable = "avx512f,avx512vl")]
12694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12695#[cfg_attr(test, assert_instr(vpmovsqd))]
12696pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
12697    transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111))
12698}
12699
12700/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12701///
12702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
12703#[inline]
12704#[target_feature(enable = "avx512f,avx512vl")]
12705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12706#[cfg_attr(test, assert_instr(vpmovsqd))]
12707pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12708    transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k))
12709}
12710
12711/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12712///
12713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
12714#[inline]
12715#[target_feature(enable = "avx512f,avx512vl")]
12716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12717#[cfg_attr(test, assert_instr(vpmovsqd))]
12718pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
12719    transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k))
12720}
12721
12722/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12723///
12724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
12725#[inline]
12726#[target_feature(enable = "avx512f")]
12727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12728#[cfg_attr(test, assert_instr(vpmovsqw))]
12729pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
12730    transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111))
12731}
12732
12733/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12734///
12735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
12736#[inline]
12737#[target_feature(enable = "avx512f")]
12738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12739#[cfg_attr(test, assert_instr(vpmovsqw))]
12740pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12741    transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k))
12742}
12743
12744/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12745///
12746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
12747#[inline]
12748#[target_feature(enable = "avx512f")]
12749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12750#[cfg_attr(test, assert_instr(vpmovsqw))]
12751pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
12752    transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k))
12753}
12754
12755/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12756///
12757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
12758#[inline]
12759#[target_feature(enable = "avx512f,avx512vl")]
12760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12761#[cfg_attr(test, assert_instr(vpmovsqw))]
12762pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
12763    transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111))
12764}
12765
12766/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12767///
12768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
12769#[inline]
12770#[target_feature(enable = "avx512f,avx512vl")]
12771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12772#[cfg_attr(test, assert_instr(vpmovsqw))]
12773pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12774    transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k))
12775}
12776
12777/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12778///
12779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
12780#[inline]
12781#[target_feature(enable = "avx512f,avx512vl")]
12782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12783#[cfg_attr(test, assert_instr(vpmovsqw))]
12784pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
12785    transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k))
12786}
12787
12788/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794#[cfg_attr(test, assert_instr(vpmovsqw))]
12795pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
12796    transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111))
12797}
12798
12799/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12800///
12801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
12802#[inline]
12803#[target_feature(enable = "avx512f,avx512vl")]
12804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12805#[cfg_attr(test, assert_instr(vpmovsqw))]
12806pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12807    transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k))
12808}
12809
12810/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12811///
12812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
12813#[inline]
12814#[target_feature(enable = "avx512f,avx512vl")]
12815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12816#[cfg_attr(test, assert_instr(vpmovsqw))]
12817pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
12818    transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k))
12819}
12820
12821/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
12822///
12823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
12824#[inline]
12825#[target_feature(enable = "avx512f")]
12826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12827#[cfg_attr(test, assert_instr(vpmovsqb))]
12828pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
12829    transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111))
12830}
12831
12832/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12833///
12834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
12835#[inline]
12836#[target_feature(enable = "avx512f")]
12837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12838#[cfg_attr(test, assert_instr(vpmovsqb))]
12839pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
12840    transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k))
12841}
12842
12843/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12844///
12845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
12846#[inline]
12847#[target_feature(enable = "avx512f")]
12848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12849#[cfg_attr(test, assert_instr(vpmovsqb))]
12850pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
12851    transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k))
12852}
12853
12854/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
12855///
12856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
12857#[inline]
12858#[target_feature(enable = "avx512f,avx512vl")]
12859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12860#[cfg_attr(test, assert_instr(vpmovsqb))]
12861pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
12862    transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111))
12863}
12864
12865/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12866///
12867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
12868#[inline]
12869#[target_feature(enable = "avx512f,avx512vl")]
12870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12871#[cfg_attr(test, assert_instr(vpmovsqb))]
12872pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12873    transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k))
12874}
12875
12876/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12877///
12878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
12879#[inline]
12880#[target_feature(enable = "avx512f,avx512vl")]
12881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12882#[cfg_attr(test, assert_instr(vpmovsqb))]
12883pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
12884    transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k))
12885}
12886
12887/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
12888///
12889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
12890#[inline]
12891#[target_feature(enable = "avx512f,avx512vl")]
12892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12893#[cfg_attr(test, assert_instr(vpmovsqb))]
12894pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
12895    transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111))
12896}
12897
12898/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12899///
12900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
12901#[inline]
12902#[target_feature(enable = "avx512f,avx512vl")]
12903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12904#[cfg_attr(test, assert_instr(vpmovsqb))]
12905pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12906    transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k))
12907}
12908
12909/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12910///
12911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
12912#[inline]
12913#[target_feature(enable = "avx512f,avx512vl")]
12914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12915#[cfg_attr(test, assert_instr(vpmovsqb))]
12916pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
12917    transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k))
12918}
12919
12920/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
12921///
12922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
12923#[inline]
12924#[target_feature(enable = "avx512f")]
12925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12926#[cfg_attr(test, assert_instr(vpmovusdw))]
12927pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
12928    transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111))
12929}
12930
12931/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937#[cfg_attr(test, assert_instr(vpmovusdw))]
12938pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939    transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k))
12940}
12941
12942/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12943///
12944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
12945#[inline]
12946#[target_feature(enable = "avx512f")]
12947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12948#[cfg_attr(test, assert_instr(vpmovusdw))]
12949pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12950    transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k))
12951}
12952
12953/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
12954///
12955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
12956#[inline]
12957#[target_feature(enable = "avx512f,avx512vl")]
12958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12959#[cfg_attr(test, assert_instr(vpmovusdw))]
12960pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
12961    transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111))
12962}
12963
12964/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12965///
12966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
12967#[inline]
12968#[target_feature(enable = "avx512f,avx512vl")]
12969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12970#[cfg_attr(test, assert_instr(vpmovusdw))]
12971pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12972    transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k))
12973}
12974
12975/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12976///
12977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
12978#[inline]
12979#[target_feature(enable = "avx512f,avx512vl")]
12980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12981#[cfg_attr(test, assert_instr(vpmovusdw))]
12982pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12983    transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k))
12984}
12985
12986/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
12987///
12988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
12989#[inline]
12990#[target_feature(enable = "avx512f,avx512vl")]
12991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12992#[cfg_attr(test, assert_instr(vpmovusdw))]
12993pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
12994    transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111))
12995}
12996
12997/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12998///
12999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
13000#[inline]
13001#[target_feature(enable = "avx512f,avx512vl")]
13002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13003#[cfg_attr(test, assert_instr(vpmovusdw))]
13004pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13005    transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k))
13006}
13007
13008/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13009///
13010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
13011#[inline]
13012#[target_feature(enable = "avx512f,avx512vl")]
13013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13014#[cfg_attr(test, assert_instr(vpmovusdw))]
13015pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13016    transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k))
13017}
13018
13019/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13020///
13021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
13022#[inline]
13023#[target_feature(enable = "avx512f")]
13024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13025#[cfg_attr(test, assert_instr(vpmovusdb))]
13026pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
13027    transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111))
13028}
13029
13030/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13031///
13032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
13033#[inline]
13034#[target_feature(enable = "avx512f")]
13035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13036#[cfg_attr(test, assert_instr(vpmovusdb))]
13037pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13038    transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k))
13039}
13040
13041/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13042///
13043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
13044#[inline]
13045#[target_feature(enable = "avx512f")]
13046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13047#[cfg_attr(test, assert_instr(vpmovusdb))]
13048pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13049    transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k))
13050}
13051
13052/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13053///
13054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
13055#[inline]
13056#[target_feature(enable = "avx512f,avx512vl")]
13057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13058#[cfg_attr(test, assert_instr(vpmovusdb))]
13059pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
13060    transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111))
13061}
13062
13063/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13064///
13065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
13066#[inline]
13067#[target_feature(enable = "avx512f,avx512vl")]
13068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13069#[cfg_attr(test, assert_instr(vpmovusdb))]
13070pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13071    transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k))
13072}
13073
13074/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13075///
13076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
13077#[inline]
13078#[target_feature(enable = "avx512f,avx512vl")]
13079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13080#[cfg_attr(test, assert_instr(vpmovusdb))]
13081pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13082    transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k))
13083}
13084
13085/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13086///
13087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
13088#[inline]
13089#[target_feature(enable = "avx512f,avx512vl")]
13090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13091#[cfg_attr(test, assert_instr(vpmovusdb))]
13092pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
13093    transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111))
13094}
13095
13096/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13097///
13098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
13099#[inline]
13100#[target_feature(enable = "avx512f,avx512vl")]
13101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13102#[cfg_attr(test, assert_instr(vpmovusdb))]
13103pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13104    transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k))
13105}
13106
13107/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13108///
13109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
13110#[inline]
13111#[target_feature(enable = "avx512f,avx512vl")]
13112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13113#[cfg_attr(test, assert_instr(vpmovusdb))]
13114pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13115    transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k))
13116}
13117
13118/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13119///
13120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
13121#[inline]
13122#[target_feature(enable = "avx512f")]
13123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13124#[cfg_attr(test, assert_instr(vpmovusqd))]
13125pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
13126    transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111))
13127}
13128
13129/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13130///
13131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
13132#[inline]
13133#[target_feature(enable = "avx512f")]
13134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13135#[cfg_attr(test, assert_instr(vpmovusqd))]
13136pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13137    transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k))
13138}
13139
13140/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13141///
13142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
13143#[inline]
13144#[target_feature(enable = "avx512f")]
13145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13146#[cfg_attr(test, assert_instr(vpmovusqd))]
13147pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13148    transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k))
13149}
13150
13151/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13152///
13153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
13154#[inline]
13155#[target_feature(enable = "avx512f,avx512vl")]
13156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13157#[cfg_attr(test, assert_instr(vpmovusqd))]
13158pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
13159    transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111))
13160}
13161
13162/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13163///
13164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
13165#[inline]
13166#[target_feature(enable = "avx512f,avx512vl")]
13167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13168#[cfg_attr(test, assert_instr(vpmovusqd))]
13169pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13170    transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k))
13171}
13172
13173/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13174///
13175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
13176#[inline]
13177#[target_feature(enable = "avx512f,avx512vl")]
13178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13179#[cfg_attr(test, assert_instr(vpmovusqd))]
13180pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13181    transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k))
13182}
13183
13184/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190#[cfg_attr(test, assert_instr(vpmovusqd))]
13191pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
13192    transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111))
13193}
13194
13195/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13196///
13197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
13198#[inline]
13199#[target_feature(enable = "avx512f,avx512vl")]
13200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13201#[cfg_attr(test, assert_instr(vpmovusqd))]
13202pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13203    transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k))
13204}
13205
13206/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13207///
13208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
13209#[inline]
13210#[target_feature(enable = "avx512f,avx512vl")]
13211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13212#[cfg_attr(test, assert_instr(vpmovusqd))]
13213pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13214    transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k))
13215}
13216
13217/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13218///
13219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
13220#[inline]
13221#[target_feature(enable = "avx512f")]
13222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13223#[cfg_attr(test, assert_instr(vpmovusqw))]
13224pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
13225    transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111))
13226}
13227
13228/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13229///
13230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
13231#[inline]
13232#[target_feature(enable = "avx512f")]
13233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13234#[cfg_attr(test, assert_instr(vpmovusqw))]
13235pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13236    transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k))
13237}
13238
13239/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13240///
13241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
13242#[inline]
13243#[target_feature(enable = "avx512f")]
13244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13245#[cfg_attr(test, assert_instr(vpmovusqw))]
13246pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13247    transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k))
13248}
13249
13250/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13251///
13252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
13253#[inline]
13254#[target_feature(enable = "avx512f,avx512vl")]
13255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13256#[cfg_attr(test, assert_instr(vpmovusqw))]
13257pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
13258    transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111))
13259}
13260
13261/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13262///
13263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
13264#[inline]
13265#[target_feature(enable = "avx512f,avx512vl")]
13266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13267#[cfg_attr(test, assert_instr(vpmovusqw))]
13268pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13269    transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k))
13270}
13271
13272/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13273///
13274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
13275#[inline]
13276#[target_feature(enable = "avx512f,avx512vl")]
13277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13278#[cfg_attr(test, assert_instr(vpmovusqw))]
13279pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13280    transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k))
13281}
13282
13283/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13284///
13285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
13286#[inline]
13287#[target_feature(enable = "avx512f,avx512vl")]
13288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13289#[cfg_attr(test, assert_instr(vpmovusqw))]
13290pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
13291    transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111))
13292}
13293
13294/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13295///
13296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
13297#[inline]
13298#[target_feature(enable = "avx512f,avx512vl")]
13299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13300#[cfg_attr(test, assert_instr(vpmovusqw))]
13301pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13302    transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k))
13303}
13304
13305/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13306///
13307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
13308#[inline]
13309#[target_feature(enable = "avx512f,avx512vl")]
13310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13311#[cfg_attr(test, assert_instr(vpmovusqw))]
13312pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13313    transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k))
13314}
13315
13316/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13317///
13318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
13319#[inline]
13320#[target_feature(enable = "avx512f")]
13321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13322#[cfg_attr(test, assert_instr(vpmovusqb))]
13323pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
13324    transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111))
13325}
13326
13327/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13328///
13329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
13330#[inline]
13331#[target_feature(enable = "avx512f")]
13332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13333#[cfg_attr(test, assert_instr(vpmovusqb))]
13334pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13335    transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k))
13336}
13337
13338/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13339///
13340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
13341#[inline]
13342#[target_feature(enable = "avx512f")]
13343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13344#[cfg_attr(test, assert_instr(vpmovusqb))]
13345pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13346    transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k))
13347}
13348
13349/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13350///
13351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
13352#[inline]
13353#[target_feature(enable = "avx512f,avx512vl")]
13354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13355#[cfg_attr(test, assert_instr(vpmovusqb))]
13356pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
13357    transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111))
13358}
13359
13360/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13361///
13362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
13363#[inline]
13364#[target_feature(enable = "avx512f,avx512vl")]
13365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13366#[cfg_attr(test, assert_instr(vpmovusqb))]
13367pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13368    transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k))
13369}
13370
13371/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13372///
13373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
13374#[inline]
13375#[target_feature(enable = "avx512f,avx512vl")]
13376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13377#[cfg_attr(test, assert_instr(vpmovusqb))]
13378pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13379    transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k))
13380}
13381
13382/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
13383///
13384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
13385#[inline]
13386#[target_feature(enable = "avx512f,avx512vl")]
13387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13388#[cfg_attr(test, assert_instr(vpmovusqb))]
13389pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
13390    transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111))
13391}
13392
13393/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13394///
13395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
13396#[inline]
13397#[target_feature(enable = "avx512f,avx512vl")]
13398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13399#[cfg_attr(test, assert_instr(vpmovusqb))]
13400pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13401    transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k))
13402}
13403
13404/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13405///
13406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
13407#[inline]
13408#[target_feature(enable = "avx512f,avx512vl")]
13409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13410#[cfg_attr(test, assert_instr(vpmovusqb))]
13411pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13412    transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k))
13413}
13414
13415/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
13416///
13417/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
13418/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13419/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13420/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13421/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13422/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13423///    
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)   
13425#[inline]
13426#[target_feature(enable = "avx512f")]
13427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
13429#[rustc_legacy_const_generics(1)]
13430pub unsafe fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
13431    static_assert_rounding!(ROUNDING);
13432    let a = a.as_f32x16();
13433    let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
13434    transmute(r)
13435}
13436
13437/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13438///
13439/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13440/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13441/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13442/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13443/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13444/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13445///    
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
13447#[inline]
13448#[target_feature(enable = "avx512f")]
13449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
13451#[rustc_legacy_const_generics(3)]
13452pub unsafe fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
13453    src: __m512i,
13454    k: __mmask16,
13455    a: __m512,
13456) -> __m512i {
13457    static_assert_rounding!(ROUNDING);
13458    let a = a.as_f32x16();
13459    let src = src.as_i32x16();
13460    let r = vcvtps2dq(a, src, k, ROUNDING);
13461    transmute(r)
13462}
13463
13464/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13465///
13466/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13467/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13468/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13469/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13470/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13471/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13472///    
13473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
13474#[inline]
13475#[target_feature(enable = "avx512f")]
13476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13477#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
13478#[rustc_legacy_const_generics(2)]
13479pub unsafe fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(
13480    k: __mmask16,
13481    a: __m512,
13482) -> __m512i {
13483    static_assert_rounding!(ROUNDING);
13484    let a = a.as_f32x16();
13485    let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
13486    transmute(r)
13487}
13488
13489/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
13490///
13491/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13492/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13493/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13494/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13495/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13496/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13497///    
13498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
13499#[inline]
13500#[target_feature(enable = "avx512f")]
13501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13502#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
13503#[rustc_legacy_const_generics(1)]
13504pub unsafe fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
13505    static_assert_rounding!(ROUNDING);
13506    let a = a.as_f32x16();
13507    let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
13508    transmute(r)
13509}
13510
13511/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13512///
13513/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13514/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13515/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13516/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13517/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13518/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13519///    
13520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
13521#[inline]
13522#[target_feature(enable = "avx512f")]
13523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13524#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
13525#[rustc_legacy_const_generics(3)]
13526pub unsafe fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
13527    src: __m512i,
13528    k: __mmask16,
13529    a: __m512,
13530) -> __m512i {
13531    static_assert_rounding!(ROUNDING);
13532    let a = a.as_f32x16();
13533    let src = src.as_u32x16();
13534    let r = vcvtps2udq(a, src, k, ROUNDING);
13535    transmute(r)
13536}
13537
13538/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13539///
13540/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13546///    
13547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
13548#[inline]
13549#[target_feature(enable = "avx512f")]
13550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13551#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
13552#[rustc_legacy_const_generics(2)]
13553pub unsafe fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(
13554    k: __mmask16,
13555    a: __m512,
13556) -> __m512i {
13557    static_assert_rounding!(ROUNDING);
13558    let a = a.as_f32x16();
13559    let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
13560    transmute(r)
13561}
13562
13563/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
13564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13565///    
13566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)   
13567#[inline]
13568#[target_feature(enable = "avx512f")]
13569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13570#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
13571#[rustc_legacy_const_generics(1)]
13572pub unsafe fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
13573    static_assert_sae!(SAE);
13574    let a = a.as_f32x8();
13575    let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
13576    transmute(r)
13577}
13578
13579/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13580/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13581///
13582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
13583#[inline]
13584#[target_feature(enable = "avx512f")]
13585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13586#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
13587#[rustc_legacy_const_generics(3)]
13588pub unsafe fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(
13589    src: __m512d,
13590    k: __mmask8,
13591    a: __m256,
13592) -> __m512d {
13593    static_assert_sae!(SAE);
13594    let a = a.as_f32x8();
13595    let src = src.as_f64x8();
13596    let r = vcvtps2pd(a, src, k, SAE);
13597    transmute(r)
13598}
13599
13600/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13601/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13602///
13603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
13604#[inline]
13605#[target_feature(enable = "avx512f")]
13606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13607#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
13608#[rustc_legacy_const_generics(2)]
13609pub unsafe fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
13610    static_assert_sae!(SAE);
13611    let a = a.as_f32x8();
13612    let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
13613    transmute(r)
13614}
13615
13616/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
13617///
13618/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13619/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13620/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13621/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13622/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13623/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13624///
13625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
13626#[inline]
13627#[target_feature(enable = "avx512f")]
13628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13629#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
13630#[rustc_legacy_const_generics(1)]
13631pub unsafe fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
13632    static_assert_rounding!(ROUNDING);
13633    let a = a.as_f64x8();
13634    let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
13635    transmute(r)
13636}
13637
13638/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13639///
13640/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13641/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13642/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13643/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13644/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13645/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13646///
13647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
13648#[inline]
13649#[target_feature(enable = "avx512f")]
13650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13651#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
13652#[rustc_legacy_const_generics(3)]
13653pub unsafe fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
13654    src: __m256i,
13655    k: __mmask8,
13656    a: __m512d,
13657) -> __m256i {
13658    static_assert_rounding!(ROUNDING);
13659    let a = a.as_f64x8();
13660    let src = src.as_i32x8();
13661    let r = vcvtpd2dq(a, src, k, ROUNDING);
13662    transmute(r)
13663}
13664
13665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13666///
13667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13673///
13674/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
13675#[inline]
13676#[target_feature(enable = "avx512f")]
13677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
13679#[rustc_legacy_const_generics(2)]
13680pub unsafe fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(
13681    k: __mmask8,
13682    a: __m512d,
13683) -> __m256i {
13684    static_assert_rounding!(ROUNDING);
13685    let a = a.as_f64x8();
13686    let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
13687    transmute(r)
13688}
13689
13690/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
13691///
13692/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13693/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13694/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13695/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13696/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13697/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
13700#[inline]
13701#[target_feature(enable = "avx512f")]
13702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
13704#[rustc_legacy_const_generics(1)]
13705pub unsafe fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
13706    static_assert_rounding!(ROUNDING);
13707    let a = a.as_f64x8();
13708    let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
13709    transmute(r)
13710}
13711
13712/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13713///
13714/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13715/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13716/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13717/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13718/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
13722#[inline]
13723#[target_feature(enable = "avx512f")]
13724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
13726#[rustc_legacy_const_generics(3)]
13727pub unsafe fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
13728    src: __m256i,
13729    k: __mmask8,
13730    a: __m512d,
13731) -> __m256i {
13732    static_assert_rounding!(ROUNDING);
13733    let a = a.as_f64x8();
13734    let src = src.as_u32x8();
13735    let r = vcvtpd2udq(a, src, k, ROUNDING);
13736    transmute(r)
13737}
13738
13739/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13740///
13741/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13742/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13743/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13744/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13745/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13747///
13748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
13749#[inline]
13750#[target_feature(enable = "avx512f")]
13751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13752#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
13753#[rustc_legacy_const_generics(2)]
13754pub unsafe fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(
13755    k: __mmask8,
13756    a: __m512d,
13757) -> __m256i {
13758    static_assert_rounding!(ROUNDING);
13759    let a = a.as_f64x8();
13760    let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
13761    transmute(r)
13762}
13763
13764/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
13765///
13766/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13767/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13768/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13769/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13770/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13771/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13772///
13773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
13774#[inline]
13775#[target_feature(enable = "avx512f")]
13776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13777#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
13778#[rustc_legacy_const_generics(1)]
13779pub unsafe fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
13780    static_assert_rounding!(ROUNDING);
13781    let a = a.as_f64x8();
13782    let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
13783    transmute(r)
13784}
13785
13786/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13787///
13788/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13789/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13790/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13791/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13792/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13793/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13794///
13795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
13796#[inline]
13797#[target_feature(enable = "avx512f")]
13798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13799#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
13800#[rustc_legacy_const_generics(3)]
13801pub unsafe fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
13802    src: __m256,
13803    k: __mmask8,
13804    a: __m512d,
13805) -> __m256 {
13806    static_assert_rounding!(ROUNDING);
13807    let a = a.as_f64x8();
13808    let src = src.as_f32x8();
13809    let r = vcvtpd2ps(a, src, k, ROUNDING);
13810    transmute(r)
13811}
13812
13813/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13814///
13815/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13816/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13817/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13818/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13819/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13821///
13822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
13823#[inline]
13824#[target_feature(enable = "avx512f")]
13825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13826#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
13827#[rustc_legacy_const_generics(2)]
13828pub unsafe fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
13829    static_assert_rounding!(ROUNDING);
13830    let a = a.as_f64x8();
13831    let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
13832    transmute(r)
13833}
13834
13835/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
13836///
13837/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13838/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13839/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13840/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13841/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13842/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13843///
13844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
13845#[inline]
13846#[target_feature(enable = "avx512f")]
13847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13848#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
13849#[rustc_legacy_const_generics(1)]
13850pub unsafe fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
13851    static_assert_rounding!(ROUNDING);
13852    let a = a.as_i32x16();
13853    let r = vcvtdq2ps(a, ROUNDING);
13854    transmute(r)
13855}
13856
13857/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13858///
13859/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13860/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13861/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13862/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13863/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13864/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13865///
13866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
13867#[inline]
13868#[target_feature(enable = "avx512f")]
13869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13870#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
13871#[rustc_legacy_const_generics(3)]
13872pub unsafe fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
13873    src: __m512,
13874    k: __mmask16,
13875    a: __m512i,
13876) -> __m512 {
13877    static_assert_rounding!(ROUNDING);
13878    let a = a.as_i32x16();
13879    let r = vcvtdq2ps(a, ROUNDING);
13880    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
13881}
13882
13883/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13884///
13885/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13886/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13887/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13888/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13889/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13890/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13891///
13892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
13893#[inline]
13894#[target_feature(enable = "avx512f")]
13895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13896#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
13897#[rustc_legacy_const_generics(2)]
13898pub unsafe fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(
13899    k: __mmask16,
13900    a: __m512i,
13901) -> __m512 {
13902    static_assert_rounding!(ROUNDING);
13903    let a = a.as_i32x16();
13904    let r = vcvtdq2ps(a, ROUNDING);
13905    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
13906}
13907
13908/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
13909///
13910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13916///
13917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
13918#[inline]
13919#[target_feature(enable = "avx512f")]
13920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13921#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
13922#[rustc_legacy_const_generics(1)]
13923pub unsafe fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
13924    static_assert_rounding!(ROUNDING);
13925    let a = a.as_u32x16();
13926    let r = vcvtudq2ps(a, ROUNDING);
13927    transmute(r)
13928}
13929
13930/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13931///
13932/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13933/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13934/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13935/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13936/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13937/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13938///
13939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
13940#[inline]
13941#[target_feature(enable = "avx512f")]
13942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13943#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
13944#[rustc_legacy_const_generics(3)]
13945pub unsafe fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
13946    src: __m512,
13947    k: __mmask16,
13948    a: __m512i,
13949) -> __m512 {
13950    static_assert_rounding!(ROUNDING);
13951    let a = a.as_u32x16();
13952    let r = vcvtudq2ps(a, ROUNDING);
13953    transmute(simd_select_bitmask(k, r, src.as_f32x16()))
13954}
13955
13956/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
13957///
13958/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
13959/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
13960/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
13961/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
13962/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
13963/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
13964///
13965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
13966#[inline]
13967#[target_feature(enable = "avx512f")]
13968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13969#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
13970#[rustc_legacy_const_generics(2)]
13971pub unsafe fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(
13972    k: __mmask16,
13973    a: __m512i,
13974) -> __m512 {
13975    static_assert_rounding!(ROUNDING);
13976    let a = a.as_u32x16();
13977    let r = vcvtudq2ps(a, ROUNDING);
13978    transmute(simd_select_bitmask(k, r, f32x16::ZERO))
13979}
13980
13981/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
13982/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13983///    
13984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)   
13985#[inline]
13986#[target_feature(enable = "avx512f")]
13987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13988#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
13989#[rustc_legacy_const_generics(1)]
13990pub unsafe fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
13991    static_assert_sae!(SAE);
13992    let a = a.as_f32x16();
13993    let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
13994    transmute(r)
13995}
13996
13997/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
13998/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
13999///    
14000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)   
14001#[inline]
14002#[target_feature(enable = "avx512f")]
14003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14004#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14005#[rustc_legacy_const_generics(3)]
14006pub unsafe fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
14007    src: __m256i,
14008    k: __mmask16,
14009    a: __m512,
14010) -> __m256i {
14011    static_assert_sae!(SAE);
14012    let a = a.as_f32x16();
14013    let src = src.as_i16x16();
14014    let r = vcvtps2ph(a, SAE, src, k);
14015    transmute(r)
14016}
14017
14018/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14019/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14020///    
14021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
14022#[inline]
14023#[target_feature(enable = "avx512f")]
14024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14025#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14026#[rustc_legacy_const_generics(2)]
14027pub unsafe fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
14028    static_assert_sae!(SAE);
14029    let a = a.as_f32x16();
14030    let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
14031    transmute(r)
14032}
14033
14034/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14035/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
14036/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14037/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14038/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14039/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14041///    
14042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)   
14043#[inline]
14044#[target_feature(enable = "avx512f,avx512vl")]
14045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14046#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14047#[rustc_legacy_const_generics(3)]
14048pub unsafe fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
14049    src: __m128i,
14050    k: __mmask8,
14051    a: __m256,
14052) -> __m128i {
14053    static_assert_uimm_bits!(IMM8, 8);
14054    let a = a.as_f32x8();
14055    let src = src.as_i16x8();
14056    let r = vcvtps2ph256(a, IMM8, src, k);
14057    transmute(r)
14058}
14059
14060/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14061/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14062/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14063/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14064/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14065/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14066/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14067///    
14068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
14069#[inline]
14070#[target_feature(enable = "avx512f,avx512vl")]
14071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14072#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14073#[rustc_legacy_const_generics(2)]
14074pub unsafe fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
14075    static_assert_uimm_bits!(IMM8, 8);
14076    let a = a.as_f32x8();
14077    let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
14078    transmute(r)
14079}
14080
14081/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14082/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14087/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14088///    
14089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)   
14090#[inline]
14091#[target_feature(enable = "avx512f,avx512vl")]
14092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14093#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14094#[rustc_legacy_const_generics(3)]
14095pub unsafe fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(
14096    src: __m128i,
14097    k: __mmask8,
14098    a: __m128,
14099) -> __m128i {
14100    static_assert_uimm_bits!(IMM8, 8);
14101    let a = a.as_f32x4();
14102    let src = src.as_i16x8();
14103    let r = vcvtps2ph128(a, IMM8, src, k);
14104    transmute(r)
14105}
14106
14107/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14108/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14109/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14110/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14111/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14112/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14114///    
14115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
14116#[inline]
14117#[target_feature(enable = "avx512f,avx512vl")]
14118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14119#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14120#[rustc_legacy_const_generics(2)]
14121pub unsafe fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
14122    static_assert_uimm_bits!(IMM8, 8);
14123    let a = a.as_f32x4();
14124    let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
14125    transmute(r)
14126}
14127
14128/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
14129/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14130///    
14131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
14132#[inline]
14133#[target_feature(enable = "avx512f")]
14134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14135#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14136#[rustc_legacy_const_generics(1)]
14137pub unsafe fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
14138    static_assert_sae!(SAE);
14139    let a = a.as_f32x16();
14140    let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
14141    transmute(r)
14142}
14143
14144/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14145/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14146///    
14147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
14148#[inline]
14149#[target_feature(enable = "avx512f")]
14150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14151#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14152#[rustc_legacy_const_generics(3)]
14153pub unsafe fn _mm512_mask_cvtps_ph<const SAE: i32>(
14154    src: __m256i,
14155    k: __mmask16,
14156    a: __m512,
14157) -> __m256i {
14158    static_assert_sae!(SAE);
14159    let a = a.as_f32x16();
14160    let src = src.as_i16x16();
14161    let r = vcvtps2ph(a, SAE, src, k);
14162    transmute(r)
14163}
14164
14165/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14166/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14167///    
14168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
14169#[inline]
14170#[target_feature(enable = "avx512f")]
14171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14172#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
14173#[rustc_legacy_const_generics(2)]
14174pub unsafe fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
14175    static_assert_sae!(SAE);
14176    let a = a.as_f32x16();
14177    let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
14178    transmute(r)
14179}
14180
14181/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
14184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
14185/// * [`_MM_FROUND_TO_POS_INF`] : round up
14186/// * [`_MM_FROUND_TO_ZERO`] : truncate
14187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14188///    
14189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
14190#[inline]
14191#[target_feature(enable = "avx512f,avx512vl")]
14192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14193#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14194#[rustc_legacy_const_generics(3)]
14195pub unsafe fn _mm256_mask_cvtps_ph<const IMM8: i32>(
14196    src: __m128i,
14197    k: __mmask8,
14198    a: __m256,
14199) -> __m128i {
14200    static_assert_uimm_bits!(IMM8, 8);
14201    let a = a.as_f32x8();
14202    let src = src.as_i16x8();
14203    let r = vcvtps2ph256(a, IMM8, src, k);
14204    transmute(r)
14205}
14206
14207/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14208/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14209/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
14210/// * [`_MM_FROUND_TO_NEG_INF`] : round down
14211/// * [`_MM_FROUND_TO_POS_INF`] : round up
14212/// * [`_MM_FROUND_TO_ZERO`] : truncate
14213/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14214///    
14215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
14216#[inline]
14217#[target_feature(enable = "avx512f,avx512vl")]
14218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14219#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14220#[rustc_legacy_const_generics(2)]
14221pub unsafe fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
14222    static_assert_uimm_bits!(IMM8, 8);
14223    let a = a.as_f32x8();
14224    let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
14225    transmute(r)
14226}
14227
14228/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14229/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14230/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
14231/// * [`_MM_FROUND_TO_NEG_INF`] : round down
14232/// * [`_MM_FROUND_TO_POS_INF`] : round up
14233/// * [`_MM_FROUND_TO_ZERO`] : truncate
14234/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14235///    
14236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
14237#[inline]
14238#[target_feature(enable = "avx512f,avx512vl")]
14239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14240#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14241#[rustc_legacy_const_generics(3)]
14242pub unsafe fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
14243    static_assert_uimm_bits!(IMM8, 8);
14244    let a = a.as_f32x4();
14245    let src = src.as_i16x8();
14246    let r = vcvtps2ph128(a, IMM8, src, k);
14247    transmute(r)
14248}
14249
14250/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14251/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
14252/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
14253/// * [`_MM_FROUND_TO_NEG_INF`] : round down
14254/// * [`_MM_FROUND_TO_POS_INF`] : round up
14255/// * [`_MM_FROUND_TO_ZERO`] : truncate
14256/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14257///    
14258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
14259#[inline]
14260#[target_feature(enable = "avx512f,avx512vl")]
14261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14262#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
14263#[rustc_legacy_const_generics(2)]
14264pub unsafe fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
14265    static_assert_uimm_bits!(IMM8, 8);
14266    let a = a.as_f32x4();
14267    let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
14268    transmute(r)
14269}
14270
14271/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14272/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14273///    
14274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
14275#[inline]
14276#[target_feature(enable = "avx512f")]
14277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14278#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
14279#[rustc_legacy_const_generics(1)]
14280pub unsafe fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
14281    static_assert_sae!(SAE);
14282    let a = a.as_i16x16();
14283    let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
14284    transmute(r)
14285}
14286
14287/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14288/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14289///    
14290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
14291#[inline]
14292#[target_feature(enable = "avx512f")]
14293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14294#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
14295#[rustc_legacy_const_generics(3)]
14296pub unsafe fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(
14297    src: __m512,
14298    k: __mmask16,
14299    a: __m256i,
14300) -> __m512 {
14301    static_assert_sae!(SAE);
14302    let a = a.as_i16x16();
14303    let src = src.as_f32x16();
14304    let r = vcvtph2ps(a, src, k, SAE);
14305    transmute(r)
14306}
14307
14308/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14309/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14310///    
14311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
14312#[inline]
14313#[target_feature(enable = "avx512f")]
14314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14315#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
14316#[rustc_legacy_const_generics(2)]
14317pub unsafe fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
14318    static_assert_sae!(SAE);
14319    let a = a.as_i16x16();
14320    let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
14321    transmute(r)
14322}
14323
14324/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
14327#[inline]
14328#[target_feature(enable = "avx512f")]
14329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330#[cfg_attr(test, assert_instr(vcvtph2ps))]
14331pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
14332    transmute(vcvtph2ps(
14333        a.as_i16x16(),
14334        f32x16::ZERO,
14335        0b11111111_11111111,
14336        _MM_FROUND_NO_EXC,
14337    ))
14338}
14339
14340/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14341///
14342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
14343#[inline]
14344#[target_feature(enable = "avx512f")]
14345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14346#[cfg_attr(test, assert_instr(vcvtph2ps))]
14347pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
14348    transmute(vcvtph2ps(
14349        a.as_i16x16(),
14350        src.as_f32x16(),
14351        k,
14352        _MM_FROUND_NO_EXC,
14353    ))
14354}
14355
14356/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14357///
14358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
14359#[inline]
14360#[target_feature(enable = "avx512f")]
14361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14362#[cfg_attr(test, assert_instr(vcvtph2ps))]
14363pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
14364    transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC))
14365}
14366
14367/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14368///
14369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
14370#[inline]
14371#[target_feature(enable = "avx512f,avx512vl")]
14372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14373#[cfg_attr(test, assert_instr(vcvtph2ps))]
14374pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
14375    let convert = _mm256_cvtph_ps(a);
14376    transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
14377}
14378
14379/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
14382#[inline]
14383#[target_feature(enable = "avx512f,avx512vl")]
14384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385#[cfg_attr(test, assert_instr(vcvtph2ps))]
14386pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
14387    let convert = _mm256_cvtph_ps(a);
14388    transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
14389}
14390
14391/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14392///
14393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
14394#[inline]
14395#[target_feature(enable = "avx512f,avx512vl")]
14396#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14397#[cfg_attr(test, assert_instr(vcvtph2ps))]
14398pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
14399    let convert = _mm_cvtph_ps(a);
14400    transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
14401}
14402
14403/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14404///
14405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
14406#[inline]
14407#[target_feature(enable = "avx512f,avx512vl")]
14408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14409#[cfg_attr(test, assert_instr(vcvtph2ps))]
14410pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
14411    let convert = _mm_cvtph_ps(a);
14412    transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
14413}
14414
14415/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
14416/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14417///    
14418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)   
14419#[inline]
14420#[target_feature(enable = "avx512f")]
14421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14422#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
14423#[rustc_legacy_const_generics(1)]
14424pub unsafe fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
14425    static_assert_sae!(SAE);
14426    let a = a.as_f32x16();
14427    let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
14428    transmute(r)
14429}
14430
14431/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14432/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14433///
14434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
14435#[inline]
14436#[target_feature(enable = "avx512f")]
14437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14438#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
14439#[rustc_legacy_const_generics(3)]
14440pub unsafe fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
14441    src: __m512i,
14442    k: __mmask16,
14443    a: __m512,
14444) -> __m512i {
14445    static_assert_sae!(SAE);
14446    let a = a.as_f32x16();
14447    let src = src.as_i32x16();
14448    let r = vcvttps2dq(a, src, k, SAE);
14449    transmute(r)
14450}
14451
14452/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14453/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14454///
14455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
14456#[inline]
14457#[target_feature(enable = "avx512f")]
14458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14459#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
14460#[rustc_legacy_const_generics(2)]
14461pub unsafe fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
14462    static_assert_sae!(SAE);
14463    let a = a.as_f32x16();
14464    let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
14465    transmute(r)
14466}
14467
14468/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
14469/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14470///    
14471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)   
14472#[inline]
14473#[target_feature(enable = "avx512f")]
14474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14475#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
14476#[rustc_legacy_const_generics(1)]
14477pub unsafe fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
14478    static_assert_sae!(SAE);
14479    let a = a.as_f32x16();
14480    let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
14481    transmute(r)
14482}
14483
14484/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14485/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14486///
14487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
14488#[inline]
14489#[target_feature(enable = "avx512f")]
14490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14491#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
14492#[rustc_legacy_const_generics(3)]
14493pub unsafe fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
14494    src: __m512i,
14495    k: __mmask16,
14496    a: __m512,
14497) -> __m512i {
14498    static_assert_sae!(SAE);
14499    let a = a.as_f32x16();
14500    let src = src.as_u32x16();
14501    let r = vcvttps2udq(a, src, k, SAE);
14502    transmute(r)
14503}
14504
14505/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14506/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14507///
14508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
14509#[inline]
14510#[target_feature(enable = "avx512f")]
14511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14512#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
14513#[rustc_legacy_const_generics(2)]
14514pub unsafe fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
14515    static_assert_sae!(SAE);
14516    let a = a.as_f32x16();
14517    let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
14518    transmute(r)
14519}
14520
14521/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
14522/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14523///    
14524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
14525#[inline]
14526#[target_feature(enable = "avx512f")]
14527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14528#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
14529#[rustc_legacy_const_generics(1)]
14530pub unsafe fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
14531    static_assert_sae!(SAE);
14532    let a = a.as_f64x8();
14533    let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
14534    transmute(r)
14535}
14536
14537/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14539///
14540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
14541#[inline]
14542#[target_feature(enable = "avx512f")]
14543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14544#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
14545#[rustc_legacy_const_generics(3)]
14546pub unsafe fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
14547    src: __m256i,
14548    k: __mmask8,
14549    a: __m512d,
14550) -> __m256i {
14551    static_assert_sae!(SAE);
14552    let a = a.as_f64x8();
14553    let src = src.as_i32x8();
14554    let r = vcvttpd2dq(a, src, k, SAE);
14555    transmute(r)
14556}
14557
14558/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14559/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14560///
14561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
14562#[inline]
14563#[target_feature(enable = "avx512f")]
14564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14565#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
14566#[rustc_legacy_const_generics(2)]
14567pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
14568    static_assert_sae!(SAE);
14569    let a = a.as_f64x8();
14570    let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
14571    transmute(r)
14572}
14573
14574/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
14575/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14576///    
14577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
14578#[inline]
14579#[target_feature(enable = "avx512f")]
14580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14581#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
14582#[rustc_legacy_const_generics(1)]
14583pub unsafe fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
14584    static_assert_sae!(SAE);
14585    let a = a.as_f64x8();
14586    let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
14587    transmute(r)
14588}
14589
14590/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14591/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14592///
14593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
14594#[inline]
14595#[target_feature(enable = "avx512f")]
14596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14597#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
14598#[rustc_legacy_const_generics(3)]
14599pub unsafe fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
14600    src: __m256i,
14601    k: __mmask8,
14602    a: __m512d,
14603) -> __m256i {
14604    static_assert_sae!(SAE);
14605    let a = a.as_f64x8();
14606    let src = src.as_i32x8();
14607    let r = vcvttpd2udq(a, src, k, SAE);
14608    transmute(r)
14609}
14610
14611/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.    
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617#[cfg_attr(test, assert_instr(vcvttps2dq))]
14618pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
14619    transmute(vcvttps2dq(
14620        a.as_f32x16(),
14621        i32x16::ZERO,
14622        0b11111111_11111111,
14623        _MM_FROUND_CUR_DIRECTION,
14624    ))
14625}
14626
14627/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14628///
14629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
14630#[inline]
14631#[target_feature(enable = "avx512f")]
14632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14633#[cfg_attr(test, assert_instr(vcvttps2dq))]
14634pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
14635    transmute(vcvttps2dq(
14636        a.as_f32x16(),
14637        src.as_i32x16(),
14638        k,
14639        _MM_FROUND_CUR_DIRECTION,
14640    ))
14641}
14642
14643/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14644///
14645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
14646#[inline]
14647#[target_feature(enable = "avx512f")]
14648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14649#[cfg_attr(test, assert_instr(vcvttps2dq))]
14650pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
14651    transmute(vcvttps2dq(
14652        a.as_f32x16(),
14653        i32x16::ZERO,
14654        k,
14655        _MM_FROUND_CUR_DIRECTION,
14656    ))
14657}
14658
14659/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14660///
14661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
14662#[inline]
14663#[target_feature(enable = "avx512f,avx512vl")]
14664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14665#[cfg_attr(test, assert_instr(vcvttps2dq))]
14666pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
14667    transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k))
14668}
14669
14670/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14671///
14672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
14673#[inline]
14674#[target_feature(enable = "avx512f,avx512vl")]
14675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14676#[cfg_attr(test, assert_instr(vcvttps2dq))]
14677pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
14678    transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k))
14679}
14680
14681/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14682///
14683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
14684#[inline]
14685#[target_feature(enable = "avx512f,avx512vl")]
14686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14687#[cfg_attr(test, assert_instr(vcvttps2dq))]
14688pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
14689    transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k))
14690}
14691
14692/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14693///
14694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
14695#[inline]
14696#[target_feature(enable = "avx512f,avx512vl")]
14697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14698#[cfg_attr(test, assert_instr(vcvttps2dq))]
14699pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
14700    transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k))
14701}
14702
14703/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.    
14704///
14705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
14706#[inline]
14707#[target_feature(enable = "avx512f")]
14708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14709#[cfg_attr(test, assert_instr(vcvttps2udq))]
14710pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
14711    transmute(vcvttps2udq(
14712        a.as_f32x16(),
14713        u32x16::ZERO,
14714        0b11111111_11111111,
14715        _MM_FROUND_CUR_DIRECTION,
14716    ))
14717}
14718
14719/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14720///
14721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
14722#[inline]
14723#[target_feature(enable = "avx512f")]
14724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14725#[cfg_attr(test, assert_instr(vcvttps2udq))]
14726pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
14727    transmute(vcvttps2udq(
14728        a.as_f32x16(),
14729        src.as_u32x16(),
14730        k,
14731        _MM_FROUND_CUR_DIRECTION,
14732    ))
14733}
14734
14735/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14736///
14737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
14738#[inline]
14739#[target_feature(enable = "avx512f")]
14740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14741#[cfg_attr(test, assert_instr(vcvttps2udq))]
14742pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
14743    transmute(vcvttps2udq(
14744        a.as_f32x16(),
14745        u32x16::ZERO,
14746        k,
14747        _MM_FROUND_CUR_DIRECTION,
14748    ))
14749}
14750
14751/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.    
14752///
14753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
14754#[inline]
14755#[target_feature(enable = "avx512f,avx512vl")]
14756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14757#[cfg_attr(test, assert_instr(vcvttps2udq))]
14758pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
14759    transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111))
14760}
14761
14762/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14763///
14764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
14765#[inline]
14766#[target_feature(enable = "avx512f,avx512vl")]
14767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14768#[cfg_attr(test, assert_instr(vcvttps2udq))]
14769pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
14770    transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k))
14771}
14772
14773/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
14776#[inline]
14777#[target_feature(enable = "avx512f,avx512vl")]
14778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779#[cfg_attr(test, assert_instr(vcvttps2udq))]
14780pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
14781    transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k))
14782}
14783
14784/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.    
14785///
14786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
14787#[inline]
14788#[target_feature(enable = "avx512f,avx512vl")]
14789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14790#[cfg_attr(test, assert_instr(vcvttps2udq))]
14791pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i {
14792    transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111))
14793}
14794
14795/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14796///
14797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
14798#[inline]
14799#[target_feature(enable = "avx512f,avx512vl")]
14800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14801#[cfg_attr(test, assert_instr(vcvttps2udq))]
14802pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
14803    transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k))
14804}
14805
14806/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14807///
14808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
14809#[inline]
14810#[target_feature(enable = "avx512f,avx512vl")]
14811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14812#[cfg_attr(test, assert_instr(vcvttps2udq))]
14813pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
14814    transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k))
14815}
14816
14817/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14819///
14820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
14821#[inline]
14822#[target_feature(enable = "avx512f")]
14823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14824#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
14825#[rustc_legacy_const_generics(2)]
14826pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
14827    static_assert_sae!(SAE);
14828    let a = a.as_f64x8();
14829    let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
14830    transmute(r)
14831}
14832
14833/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.  
14834///
14835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
14836#[inline]
14837#[target_feature(enable = "avx512f")]
14838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14839#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14840pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
14841    transmute(vcvttpd2dq(
14842        a.as_f64x8(),
14843        i32x8::ZERO,
14844        0b11111111,
14845        _MM_FROUND_CUR_DIRECTION,
14846    ))
14847}
14848
14849/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14850///
14851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
14852#[inline]
14853#[target_feature(enable = "avx512f")]
14854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14855#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14856pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
14857    transmute(vcvttpd2dq(
14858        a.as_f64x8(),
14859        src.as_i32x8(),
14860        k,
14861        _MM_FROUND_CUR_DIRECTION,
14862    ))
14863}
14864
14865/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14866///
14867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
14868#[inline]
14869#[target_feature(enable = "avx512f")]
14870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14871#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14872pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
14873    transmute(vcvttpd2dq(
14874        a.as_f64x8(),
14875        i32x8::ZERO,
14876        k,
14877        _MM_FROUND_CUR_DIRECTION,
14878    ))
14879}
14880
14881/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14882///
14883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
14884#[inline]
14885#[target_feature(enable = "avx512f,avx512vl")]
14886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14887#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14888pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
14889    transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k))
14890}
14891
14892/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14893///
14894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
14895#[inline]
14896#[target_feature(enable = "avx512f,avx512vl")]
14897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14898#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14899pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
14900    transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k))
14901}
14902
14903/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
14906#[inline]
14907#[target_feature(enable = "avx512f,avx512vl")]
14908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14910pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
14911    transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k))
14912}
14913
14914/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14915///
14916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
14917#[inline]
14918#[target_feature(enable = "avx512f,avx512vl")]
14919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14920#[cfg_attr(test, assert_instr(vcvttpd2dq))]
14921pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
14922    transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k))
14923}
14924
14925/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.    
14926///
14927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
14928#[inline]
14929#[target_feature(enable = "avx512f")]
14930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14931#[cfg_attr(test, assert_instr(vcvttpd2udq))]
14932pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
14933    transmute(vcvttpd2udq(
14934        a.as_f64x8(),
14935        i32x8::ZERO,
14936        0b11111111,
14937        _MM_FROUND_CUR_DIRECTION,
14938    ))
14939}
14940
14941/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14942///
14943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
14944#[inline]
14945#[target_feature(enable = "avx512f")]
14946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14947#[cfg_attr(test, assert_instr(vcvttpd2udq))]
14948pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
14949    transmute(vcvttpd2udq(
14950        a.as_f64x8(),
14951        src.as_i32x8(),
14952        k,
14953        _MM_FROUND_CUR_DIRECTION,
14954    ))
14955}
14956
14957/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14958///
14959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
14960#[inline]
14961#[target_feature(enable = "avx512f")]
14962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14963#[cfg_attr(test, assert_instr(vcvttpd2udq))]
14964pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
14965    transmute(vcvttpd2udq(
14966        a.as_f64x8(),
14967        i32x8::ZERO,
14968        k,
14969        _MM_FROUND_CUR_DIRECTION,
14970    ))
14971}
14972
14973/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.    
14974///
14975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
14976#[inline]
14977#[target_feature(enable = "avx512f,avx512vl")]
14978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14979#[cfg_attr(test, assert_instr(vcvttpd2udq))]
14980pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
14981    transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111))
14982}
14983
14984/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14985///
14986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
14987#[inline]
14988#[target_feature(enable = "avx512f,avx512vl")]
14989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14990#[cfg_attr(test, assert_instr(vcvttpd2udq))]
14991pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
14992    transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k))
14993}
14994
14995/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14996///
14997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
14998#[inline]
14999#[target_feature(enable = "avx512f,avx512vl")]
15000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15001#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15002pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
15003    transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k))
15004}
15005
15006/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.    
15007///
15008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
15009#[inline]
15010#[target_feature(enable = "avx512f,avx512vl")]
15011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15012#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15013pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
15014    transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111))
15015}
15016
15017/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15018///
15019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
15020#[inline]
15021#[target_feature(enable = "avx512f,avx512vl")]
15022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15023#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15024pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
15025    transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k))
15026}
15027
15028/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15029///
15030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
15031#[inline]
15032#[target_feature(enable = "avx512f,avx512vl")]
15033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15034#[cfg_attr(test, assert_instr(vcvttpd2udq))]
15035pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
15036    transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k))
15037}
15038
15039/// Returns vector of type `__m512d` with all elements set to zero.
15040///
15041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
15042#[inline]
15043#[target_feature(enable = "avx512f")]
15044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15045#[cfg_attr(test, assert_instr(vxorps))]
15046pub unsafe fn _mm512_setzero_pd() -> __m512d {
15047    // All-0 is a properly initialized __m512d
15048    const { mem::zeroed() }
15049}
15050
15051/// Returns vector of type `__m512` with all elements set to zero.
15052///
15053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
15054#[inline]
15055#[target_feature(enable = "avx512f")]
15056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15057#[cfg_attr(test, assert_instr(vxorps))]
15058pub unsafe fn _mm512_setzero_ps() -> __m512 {
15059    // All-0 is a properly initialized __m512
15060    const { mem::zeroed() }
15061}
15062
15063/// Return vector of type `__m512` with all elements set to zero.
15064///
15065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
15066#[inline]
15067#[target_feature(enable = "avx512f")]
15068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15069#[cfg_attr(test, assert_instr(vxorps))]
15070pub unsafe fn _mm512_setzero() -> __m512 {
15071    // All-0 is a properly initialized __m512
15072    const { mem::zeroed() }
15073}
15074
15075/// Returns vector of type `__m512i` with all elements set to zero.
15076///
15077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
15078#[inline]
15079#[target_feature(enable = "avx512f")]
15080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15081#[cfg_attr(test, assert_instr(vxorps))]
15082pub unsafe fn _mm512_setzero_si512() -> __m512i {
15083    // All-0 is a properly initialized __m512i
15084    const { mem::zeroed() }
15085}
15086
15087/// Return vector of type `__m512i` with all elements set to zero.
15088///
15089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
15090#[inline]
15091#[target_feature(enable = "avx512f")]
15092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15093#[cfg_attr(test, assert_instr(vxorps))]
15094pub unsafe fn _mm512_setzero_epi32() -> __m512i {
15095    // All-0 is a properly initialized __m512i
15096    const { mem::zeroed() }
15097}
15098
15099/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
15100/// order.
15101///
15102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
15103#[inline]
15104#[target_feature(enable = "avx512f")]
15105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15106pub unsafe fn _mm512_setr_epi32(
15107    e15: i32,
15108    e14: i32,
15109    e13: i32,
15110    e12: i32,
15111    e11: i32,
15112    e10: i32,
15113    e9: i32,
15114    e8: i32,
15115    e7: i32,
15116    e6: i32,
15117    e5: i32,
15118    e4: i32,
15119    e3: i32,
15120    e2: i32,
15121    e1: i32,
15122    e0: i32,
15123) -> __m512i {
15124    let r = i32x16::new(
15125        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
15126    );
15127    transmute(r)
15128}
15129
15130/// Set packed 8-bit integers in dst with the supplied values.
15131///
15132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
15133#[inline]
15134#[target_feature(enable = "avx512f")]
15135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15136pub unsafe fn _mm512_set_epi8(
15137    e63: i8,
15138    e62: i8,
15139    e61: i8,
15140    e60: i8,
15141    e59: i8,
15142    e58: i8,
15143    e57: i8,
15144    e56: i8,
15145    e55: i8,
15146    e54: i8,
15147    e53: i8,
15148    e52: i8,
15149    e51: i8,
15150    e50: i8,
15151    e49: i8,
15152    e48: i8,
15153    e47: i8,
15154    e46: i8,
15155    e45: i8,
15156    e44: i8,
15157    e43: i8,
15158    e42: i8,
15159    e41: i8,
15160    e40: i8,
15161    e39: i8,
15162    e38: i8,
15163    e37: i8,
15164    e36: i8,
15165    e35: i8,
15166    e34: i8,
15167    e33: i8,
15168    e32: i8,
15169    e31: i8,
15170    e30: i8,
15171    e29: i8,
15172    e28: i8,
15173    e27: i8,
15174    e26: i8,
15175    e25: i8,
15176    e24: i8,
15177    e23: i8,
15178    e22: i8,
15179    e21: i8,
15180    e20: i8,
15181    e19: i8,
15182    e18: i8,
15183    e17: i8,
15184    e16: i8,
15185    e15: i8,
15186    e14: i8,
15187    e13: i8,
15188    e12: i8,
15189    e11: i8,
15190    e10: i8,
15191    e9: i8,
15192    e8: i8,
15193    e7: i8,
15194    e6: i8,
15195    e5: i8,
15196    e4: i8,
15197    e3: i8,
15198    e2: i8,
15199    e1: i8,
15200    e0: i8,
15201) -> __m512i {
15202    let r = i8x64::new(
15203        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
15204        e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37,
15205        e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55,
15206        e56, e57, e58, e59, e60, e61, e62, e63,
15207    );
15208    transmute(r)
15209}
15210
15211/// Set packed 16-bit integers in dst with the supplied values.
15212///
15213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
15214#[inline]
15215#[target_feature(enable = "avx512f")]
15216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15217pub unsafe fn _mm512_set_epi16(
15218    e31: i16,
15219    e30: i16,
15220    e29: i16,
15221    e28: i16,
15222    e27: i16,
15223    e26: i16,
15224    e25: i16,
15225    e24: i16,
15226    e23: i16,
15227    e22: i16,
15228    e21: i16,
15229    e20: i16,
15230    e19: i16,
15231    e18: i16,
15232    e17: i16,
15233    e16: i16,
15234    e15: i16,
15235    e14: i16,
15236    e13: i16,
15237    e12: i16,
15238    e11: i16,
15239    e10: i16,
15240    e9: i16,
15241    e8: i16,
15242    e7: i16,
15243    e6: i16,
15244    e5: i16,
15245    e4: i16,
15246    e3: i16,
15247    e2: i16,
15248    e1: i16,
15249    e0: i16,
15250) -> __m512i {
15251    let r = i16x32::new(
15252        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
15253        e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
15254    );
15255    transmute(r)
15256}
15257
15258/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
15259///
15260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
15261#[inline]
15262#[target_feature(enable = "avx512f")]
15263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15264pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
15265    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
15266}
15267
15268/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
15269///
15270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
15271#[inline]
15272#[target_feature(enable = "avx512f")]
15273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15274pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
15275    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
15276}
15277
15278/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
15279///
15280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
15281#[inline]
15282#[target_feature(enable = "avx512f")]
15283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15284pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
15285    _mm512_set_pd(d, c, b, a, d, c, b, a)
15286}
15287
15288/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
15289///
15290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
15291#[inline]
15292#[target_feature(enable = "avx512f")]
15293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15294pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
15295    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
15296}
15297
15298/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
15299///
15300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
15301#[inline]
15302#[target_feature(enable = "avx512f")]
15303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15304pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
15305    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
15306}
15307
15308/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
15309///
15310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
15311#[inline]
15312#[target_feature(enable = "avx512f")]
15313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15314pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
15315    _mm512_set_pd(a, b, c, d, a, b, c, d)
15316}
15317
15318/// Set packed 64-bit integers in dst with the supplied values.
15319///
15320/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
15321#[inline]
15322#[target_feature(enable = "avx512f")]
15323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15324pub unsafe fn _mm512_set_epi64(
15325    e0: i64,
15326    e1: i64,
15327    e2: i64,
15328    e3: i64,
15329    e4: i64,
15330    e5: i64,
15331    e6: i64,
15332    e7: i64,
15333) -> __m512i {
15334    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
15335}
15336
15337/// Set packed 64-bit integers in dst with the supplied values in reverse order.
15338///
15339/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
15340#[inline]
15341#[target_feature(enable = "avx512f")]
15342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15343pub unsafe fn _mm512_setr_epi64(
15344    e0: i64,
15345    e1: i64,
15346    e2: i64,
15347    e3: i64,
15348    e4: i64,
15349    e5: i64,
15350    e6: i64,
15351    e7: i64,
15352) -> __m512i {
15353    let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
15354    transmute(r)
15355}
15356
15357/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15358///
15359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
15360#[inline]
15361#[target_feature(enable = "avx512f")]
15362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15363#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
15364#[rustc_legacy_const_generics(2)]
15365pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
15366    static_assert_imm8_scale!(SCALE);
15367    let zero = f64x8::ZERO;
15368    let neg_one = -1;
15369    let slice = slice as *const i8;
15370    let offsets = offsets.as_i32x8();
15371    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
15372    transmute(r)
15373}
15374
15375/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15376///
15377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
15378#[inline]
15379#[target_feature(enable = "avx512f")]
15380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15381#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
15382#[rustc_legacy_const_generics(4)]
15383pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
15384    src: __m512d,
15385    mask: __mmask8,
15386    offsets: __m256i,
15387    slice: *const u8,
15388) -> __m512d {
15389    static_assert_imm8_scale!(SCALE);
15390    let src = src.as_f64x8();
15391    let slice = slice as *const i8;
15392    let offsets = offsets.as_i32x8();
15393    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
15394    transmute(r)
15395}
15396
15397/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15398///
15399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
15400#[inline]
15401#[target_feature(enable = "avx512f")]
15402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15403#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
15404#[rustc_legacy_const_generics(2)]
15405pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
15406    static_assert_imm8_scale!(SCALE);
15407    let zero = f64x8::ZERO;
15408    let neg_one = -1;
15409    let slice = slice as *const i8;
15410    let offsets = offsets.as_i64x8();
15411    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
15412    transmute(r)
15413}
15414
15415/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15416///
15417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
15418#[inline]
15419#[target_feature(enable = "avx512f")]
15420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15421#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
15422#[rustc_legacy_const_generics(4)]
15423pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
15424    src: __m512d,
15425    mask: __mmask8,
15426    offsets: __m512i,
15427    slice: *const u8,
15428) -> __m512d {
15429    static_assert_imm8_scale!(SCALE);
15430    let src = src.as_f64x8();
15431    let slice = slice as *const i8;
15432    let offsets = offsets.as_i64x8();
15433    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
15434    transmute(r)
15435}
15436
15437/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15438///
15439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
15440#[inline]
15441#[target_feature(enable = "avx512f")]
15442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15443#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
15444#[rustc_legacy_const_generics(2)]
15445pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
15446    static_assert_imm8_scale!(SCALE);
15447    let zero = f32x8::ZERO;
15448    let neg_one = -1;
15449    let slice = slice as *const i8;
15450    let offsets = offsets.as_i64x8();
15451    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
15452    transmute(r)
15453}
15454
15455/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15456///
15457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
15458#[inline]
15459#[target_feature(enable = "avx512f")]
15460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15461#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
15462#[rustc_legacy_const_generics(4)]
15463pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
15464    src: __m256,
15465    mask: __mmask8,
15466    offsets: __m512i,
15467    slice: *const u8,
15468) -> __m256 {
15469    static_assert_imm8_scale!(SCALE);
15470    let src = src.as_f32x8();
15471    let slice = slice as *const i8;
15472    let offsets = offsets.as_i64x8();
15473    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
15474    transmute(r)
15475}
15476
15477/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15478///
15479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
15480#[inline]
15481#[target_feature(enable = "avx512f")]
15482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15483#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
15484#[rustc_legacy_const_generics(2)]
15485pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
15486    static_assert_imm8_scale!(SCALE);
15487    let zero = f32x16::ZERO;
15488    let neg_one = -1;
15489    let slice = slice as *const i8;
15490    let offsets = offsets.as_i32x16();
15491    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
15492    transmute(r)
15493}
15494
15495/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15496///
15497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
15498#[inline]
15499#[target_feature(enable = "avx512f")]
15500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15501#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
15502#[rustc_legacy_const_generics(4)]
15503pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
15504    src: __m512,
15505    mask: __mmask16,
15506    offsets: __m512i,
15507    slice: *const u8,
15508) -> __m512 {
15509    static_assert_imm8_scale!(SCALE);
15510    let src = src.as_f32x16();
15511    let slice = slice as *const i8;
15512    let offsets = offsets.as_i32x16();
15513    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
15514    transmute(r)
15515}
15516
15517/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15518///
15519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
15520#[inline]
15521#[target_feature(enable = "avx512f")]
15522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15523#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
15524#[rustc_legacy_const_generics(2)]
15525pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
15526    offsets: __m512i,
15527    slice: *const u8,
15528) -> __m512i {
15529    static_assert_imm8_scale!(SCALE);
15530    let zero = i32x16::ZERO;
15531    let neg_one = -1;
15532    let slice = slice as *const i8;
15533    let offsets = offsets.as_i32x16();
15534    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
15535    transmute(r)
15536}
15537
15538/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15539///
15540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
15541#[inline]
15542#[target_feature(enable = "avx512f")]
15543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15544#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
15545#[rustc_legacy_const_generics(4)]
15546pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
15547    src: __m512i,
15548    mask: __mmask16,
15549    offsets: __m512i,
15550    slice: *const u8,
15551) -> __m512i {
15552    static_assert_imm8_scale!(SCALE);
15553    let src = src.as_i32x16();
15554    let mask = mask as i16;
15555    let slice = slice as *const i8;
15556    let offsets = offsets.as_i32x16();
15557    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
15558    transmute(r)
15559}
15560
15561/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15562///
15563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
15564#[inline]
15565#[target_feature(enable = "avx512f")]
15566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15567#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
15568#[rustc_legacy_const_generics(2)]
15569pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
15570    offsets: __m256i,
15571    slice: *const u8,
15572) -> __m512i {
15573    static_assert_imm8_scale!(SCALE);
15574    let zero = i64x8::ZERO;
15575    let neg_one = -1;
15576    let slice = slice as *const i8;
15577    let offsets = offsets.as_i32x8();
15578    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
15579    transmute(r)
15580}
15581
15582/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15583///
15584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
15585#[inline]
15586#[target_feature(enable = "avx512f")]
15587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15588#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
15589#[rustc_legacy_const_generics(4)]
15590pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
15591    src: __m512i,
15592    mask: __mmask8,
15593    offsets: __m256i,
15594    slice: *const u8,
15595) -> __m512i {
15596    static_assert_imm8_scale!(SCALE);
15597    let src = src.as_i64x8();
15598    let mask = mask as i8;
15599    let slice = slice as *const i8;
15600    let offsets = offsets.as_i32x8();
15601    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
15602    transmute(r)
15603}
15604
15605/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15606///
15607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
15608#[inline]
15609#[target_feature(enable = "avx512f")]
15610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15611#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
15612#[rustc_legacy_const_generics(2)]
15613pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
15614    offsets: __m512i,
15615    slice: *const u8,
15616) -> __m512i {
15617    static_assert_imm8_scale!(SCALE);
15618    let zero = i64x8::ZERO;
15619    let neg_one = -1;
15620    let slice = slice as *const i8;
15621    let offsets = offsets.as_i64x8();
15622    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
15623    transmute(r)
15624}
15625
15626/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15627///
15628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
15629#[inline]
15630#[target_feature(enable = "avx512f")]
15631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15632#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
15633#[rustc_legacy_const_generics(4)]
15634pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
15635    src: __m512i,
15636    mask: __mmask8,
15637    offsets: __m512i,
15638    slice: *const u8,
15639) -> __m512i {
15640    static_assert_imm8_scale!(SCALE);
15641    let src = src.as_i64x8();
15642    let mask = mask as i8;
15643    let slice = slice as *const i8;
15644    let offsets = offsets.as_i64x8();
15645    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
15646    transmute(r)
15647}
15648
15649/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
15650///
15651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
15652#[inline]
15653#[target_feature(enable = "avx512f")]
15654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15655#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
15656#[rustc_legacy_const_generics(2)]
15657pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
15658    offsets: __m512i,
15659    slice: *const u8,
15660) -> __m256i {
15661    static_assert_imm8_scale!(SCALE);
15662    let zeros = i32x8::ZERO;
15663    let neg_one = -1;
15664    let slice = slice as *const i8;
15665    let offsets = offsets.as_i64x8();
15666    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
15667    transmute(r)
15668}
15669
15670/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15671///
15672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
15673#[inline]
15674#[target_feature(enable = "avx512f")]
15675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15676#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
15677#[rustc_legacy_const_generics(4)]
15678pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
15679    src: __m256i,
15680    mask: __mmask8,
15681    offsets: __m512i,
15682    slice: *const u8,
15683) -> __m256i {
15684    static_assert_imm8_scale!(SCALE);
15685    let src = src.as_i32x8();
15686    let mask = mask as i8;
15687    let slice = slice as *const i8;
15688    let offsets = offsets.as_i64x8();
15689    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
15690    transmute(r)
15691}
15692
15693/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15694///
15695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
15696#[inline]
15697#[target_feature(enable = "avx512f")]
15698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15699#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
15700#[rustc_legacy_const_generics(3)]
15701pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
15702    slice: *mut u8,
15703    offsets: __m256i,
15704    src: __m512d,
15705) {
15706    static_assert_imm8_scale!(SCALE);
15707    let src = src.as_f64x8();
15708    let neg_one = -1;
15709    let slice = slice as *mut i8;
15710    let offsets = offsets.as_i32x8();
15711    vscatterdpd(slice, neg_one, offsets, src, SCALE);
15712}
15713
15714/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15715///
15716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
15717#[inline]
15718#[target_feature(enable = "avx512f")]
15719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15720#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
15721#[rustc_legacy_const_generics(4)]
15722pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
15723    slice: *mut u8,
15724    mask: __mmask8,
15725    offsets: __m256i,
15726    src: __m512d,
15727) {
15728    static_assert_imm8_scale!(SCALE);
15729    let src = src.as_f64x8();
15730    let slice = slice as *mut i8;
15731    let offsets = offsets.as_i32x8();
15732    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
15733}
15734
15735/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15736///
15737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
15738#[inline]
15739#[target_feature(enable = "avx512f")]
15740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15741#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
15742#[rustc_legacy_const_generics(3)]
15743pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
15744    slice: *mut u8,
15745    offsets: __m512i,
15746    src: __m512d,
15747) {
15748    static_assert_imm8_scale!(SCALE);
15749    let src = src.as_f64x8();
15750    let neg_one = -1;
15751    let slice = slice as *mut i8;
15752    let offsets = offsets.as_i64x8();
15753    vscatterqpd(slice, neg_one, offsets, src, SCALE);
15754}
15755
15756/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15757///
15758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
15759#[inline]
15760#[target_feature(enable = "avx512f")]
15761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15762#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
15763#[rustc_legacy_const_generics(4)]
15764pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
15765    slice: *mut u8,
15766    mask: __mmask8,
15767    offsets: __m512i,
15768    src: __m512d,
15769) {
15770    static_assert_imm8_scale!(SCALE);
15771    let src = src.as_f64x8();
15772    let slice = slice as *mut i8;
15773    let offsets = offsets.as_i64x8();
15774    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
15775}
15776
15777/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15778///
15779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
15780#[inline]
15781#[target_feature(enable = "avx512f")]
15782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15783#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
15784#[rustc_legacy_const_generics(3)]
15785pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
15786    slice: *mut u8,
15787    offsets: __m512i,
15788    src: __m512,
15789) {
15790    static_assert_imm8_scale!(SCALE);
15791    let src = src.as_f32x16();
15792    let neg_one = -1;
15793    let slice = slice as *mut i8;
15794    let offsets = offsets.as_i32x16();
15795    vscatterdps(slice, neg_one, offsets, src, SCALE);
15796}
15797
15798/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15799///
15800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
15801#[inline]
15802#[target_feature(enable = "avx512f")]
15803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15804#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
15805#[rustc_legacy_const_generics(4)]
15806pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
15807    slice: *mut u8,
15808    mask: __mmask16,
15809    offsets: __m512i,
15810    src: __m512,
15811) {
15812    static_assert_imm8_scale!(SCALE);
15813    let src = src.as_f32x16();
15814    let slice = slice as *mut i8;
15815    let offsets = offsets.as_i32x16();
15816    vscatterdps(slice, mask as i16, offsets, src, SCALE);
15817}
15818
15819/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15820///
15821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
15822#[inline]
15823#[target_feature(enable = "avx512f")]
15824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15825#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
15826#[rustc_legacy_const_generics(3)]
15827pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
15828    slice: *mut u8,
15829    offsets: __m512i,
15830    src: __m256,
15831) {
15832    static_assert_imm8_scale!(SCALE);
15833    let src = src.as_f32x8();
15834    let neg_one = -1;
15835    let slice = slice as *mut i8;
15836    let offsets = offsets.as_i64x8();
15837    vscatterqps(slice, neg_one, offsets, src, SCALE);
15838}
15839
15840/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15841///
15842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
15843#[inline]
15844#[target_feature(enable = "avx512f")]
15845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15846#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
15847#[rustc_legacy_const_generics(4)]
15848pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
15849    slice: *mut u8,
15850    mask: __mmask8,
15851    offsets: __m512i,
15852    src: __m256,
15853) {
15854    static_assert_imm8_scale!(SCALE);
15855    let src = src.as_f32x8();
15856    let slice = slice as *mut i8;
15857    let offsets = offsets.as_i64x8();
15858    vscatterqps(slice, mask as i8, offsets, src, SCALE);
15859}
15860
15861/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15862///
15863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
15864#[inline]
15865#[target_feature(enable = "avx512f")]
15866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15867#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
15868#[rustc_legacy_const_generics(3)]
15869pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
15870    slice: *mut u8,
15871    offsets: __m256i,
15872    src: __m512i,
15873) {
15874    static_assert_imm8_scale!(SCALE);
15875    let src = src.as_i64x8();
15876    let neg_one = -1;
15877    let slice = slice as *mut i8;
15878    let offsets = offsets.as_i32x8();
15879    vpscatterdq(slice, neg_one, offsets, src, SCALE);
15880}
15881
15882/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15883///
15884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
15885#[inline]
15886#[target_feature(enable = "avx512f")]
15887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15888#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
15889#[rustc_legacy_const_generics(4)]
15890pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
15891    slice: *mut u8,
15892    mask: __mmask8,
15893    offsets: __m256i,
15894    src: __m512i,
15895) {
15896    static_assert_imm8_scale!(SCALE);
15897    let src = src.as_i64x8();
15898    let mask = mask as i8;
15899    let slice = slice as *mut i8;
15900    let offsets = offsets.as_i32x8();
15901    vpscatterdq(slice, mask, offsets, src, SCALE);
15902}
15903
15904/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15905///
15906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
15907#[inline]
15908#[target_feature(enable = "avx512f")]
15909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15910#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
15911#[rustc_legacy_const_generics(3)]
15912pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
15913    slice: *mut u8,
15914    offsets: __m512i,
15915    src: __m512i,
15916) {
15917    static_assert_imm8_scale!(SCALE);
15918    let src = src.as_i64x8();
15919    let neg_one = -1;
15920    let slice = slice as *mut i8;
15921    let offsets = offsets.as_i64x8();
15922    vpscatterqq(slice, neg_one, offsets, src, SCALE);
15923}
15924
15925/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15926///
15927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
15928#[inline]
15929#[target_feature(enable = "avx512f")]
15930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15931#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
15932#[rustc_legacy_const_generics(4)]
15933pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
15934    slice: *mut u8,
15935    mask: __mmask8,
15936    offsets: __m512i,
15937    src: __m512i,
15938) {
15939    static_assert_imm8_scale!(SCALE);
15940    let src = src.as_i64x8();
15941    let mask = mask as i8;
15942    let slice = slice as *mut i8;
15943    let offsets = offsets.as_i64x8();
15944    vpscatterqq(slice, mask, offsets, src, SCALE);
15945}
15946
15947/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15948///
15949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
15950#[inline]
15951#[target_feature(enable = "avx512f")]
15952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15953#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
15954#[rustc_legacy_const_generics(3)]
15955pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
15956    slice: *mut u8,
15957    offsets: __m512i,
15958    src: __m512i,
15959) {
15960    static_assert_imm8_scale!(SCALE);
15961    let src = src.as_i32x16();
15962    let neg_one = -1;
15963    let slice = slice as *mut i8;
15964    let offsets = offsets.as_i32x16();
15965    vpscatterdd(slice, neg_one, offsets, src, SCALE);
15966}
15967
15968/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
15969///
15970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
15971#[inline]
15972#[target_feature(enable = "avx512f")]
15973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15974#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
15975#[rustc_legacy_const_generics(4)]
15976pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
15977    slice: *mut u8,
15978    mask: __mmask16,
15979    offsets: __m512i,
15980    src: __m512i,
15981) {
15982    static_assert_imm8_scale!(SCALE);
15983    let src = src.as_i32x16();
15984    let mask = mask as i16;
15985    let slice = slice as *mut i8;
15986    let offsets = offsets.as_i32x16();
15987    vpscatterdd(slice, mask, offsets, src, SCALE);
15988}
15989
15990/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15991///
15992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
15993#[inline]
15994#[target_feature(enable = "avx512f")]
15995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15996#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
15997#[rustc_legacy_const_generics(3)]
15998pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
15999    slice: *mut u8,
16000    offsets: __m512i,
16001    src: __m256i,
16002) {
16003    static_assert_imm8_scale!(SCALE);
16004    let src = src.as_i32x8();
16005    let neg_one = -1;
16006    let slice = slice as *mut i8;
16007    let offsets = offsets.as_i64x8();
16008    vpscatterqd(slice, neg_one, offsets, src, SCALE);
16009}
16010
16011/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16012///
16013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
16014#[inline]
16015#[target_feature(enable = "avx512f")]
16016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16017#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16018#[rustc_legacy_const_generics(4)]
16019pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
16020    slice: *mut u8,
16021    mask: __mmask8,
16022    offsets: __m512i,
16023    src: __m256i,
16024) {
16025    static_assert_imm8_scale!(SCALE);
16026    let src = src.as_i32x8();
16027    let mask = mask as i8;
16028    let slice = slice as *mut i8;
16029    let offsets = offsets.as_i64x8();
16030    vpscatterqd(slice, mask, offsets, src, SCALE);
16031}
16032
16033/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
16034/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
16035///
16036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
16037#[inline]
16038#[target_feature(enable = "avx512f")]
16039#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16040#[rustc_legacy_const_generics(2)]
16041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16042pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
16043    vindex: __m512i,
16044    base_addr: *const u8,
16045) -> __m512i {
16046    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr as _)
16047}
16048
16049/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
16050/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
16051/// (elements are copied from src when the corresponding mask bit is not set).
16052///
16053/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
16054#[inline]
16055#[target_feature(enable = "avx512f")]
16056#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16057#[rustc_legacy_const_generics(4)]
16058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16059pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
16060    src: __m512i,
16061    k: __mmask8,
16062    vindex: __m512i,
16063    base_addr: *const u8,
16064) -> __m512i {
16065    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
16066}
16067
16068/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
16069/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
16070///
16071/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
16072#[inline]
16073#[target_feature(enable = "avx512f")]
16074#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16075#[rustc_legacy_const_generics(2)]
16076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16077pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
16078    vindex: __m512i,
16079    base_addr: *const u8,
16080) -> __m512d {
16081    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr as _)
16082}
16083
16084/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
16085/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
16086/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
16087///
16088/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
16089#[inline]
16090#[target_feature(enable = "avx512f")]
16091#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16092#[rustc_legacy_const_generics(4)]
16093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16094pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
16095    src: __m512d,
16096    k: __mmask8,
16097    vindex: __m512i,
16098    base_addr: *const u8,
16099) -> __m512d {
16100    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
16101}
16102
16103/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16104/// indices stored in the lower half of vindex scaled by scale.
16105///
16106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
16107#[inline]
16108#[target_feature(enable = "avx512f")]
16109#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16110#[rustc_legacy_const_generics(3)]
16111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16112pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
16113    base_addr: *mut u8,
16114    vindex: __m512i,
16115    a: __m512i,
16116) {
16117    _mm512_i32scatter_epi64::<SCALE>(base_addr as _, _mm512_castsi512_si256(vindex), a)
16118}
16119
16120/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16121/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
16122/// mask bit is not set are not written to memory).
16123///
16124/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
16125#[inline]
16126#[target_feature(enable = "avx512f")]
16127#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16128#[rustc_legacy_const_generics(4)]
16129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16130pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
16131    base_addr: *mut u8,
16132    k: __mmask8,
16133    vindex: __m512i,
16134    a: __m512i,
16135) {
16136    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
16137}
16138
16139/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16140/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
16141///
16142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
16143#[inline]
16144#[target_feature(enable = "avx512f")]
16145#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16146#[rustc_legacy_const_generics(3)]
16147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16148pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
16149    base_addr: *mut u8,
16150    vindex: __m512i,
16151    a: __m512d,
16152) {
16153    _mm512_i32scatter_pd::<SCALE>(base_addr as _, _mm512_castsi512_si256(vindex), a)
16154}
16155
16156/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16157/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
16158/// (elements whose corresponding mask bit is not set are not written to memory).
16159///
16160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
16161#[inline]
16162#[target_feature(enable = "avx512f")]
16163#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16164#[rustc_legacy_const_generics(4)]
16165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16166pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
16167    base_addr: *mut u8,
16168    k: __mmask8,
16169    vindex: __m512i,
16170    a: __m512d,
16171) {
16172    _mm512_mask_i32scatter_pd::<SCALE>(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
16173}
16174
16175/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16176/// indices stored in vindex scaled by scale
16177///
16178/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
16179#[inline]
16180#[target_feature(enable = "avx512f,avx512vl")]
16181#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
16182#[rustc_legacy_const_generics(3)]
16183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16184pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
16185    base_addr: *mut u8,
16186    vindex: __m256i,
16187    a: __m256i,
16188) {
16189    static_assert_imm8_scale!(SCALE);
16190    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
16191}
16192
16193/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16194/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16195/// are not written to memory).
16196///
16197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
16198#[inline]
16199#[target_feature(enable = "avx512f,avx512vl")]
16200#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
16201#[rustc_legacy_const_generics(4)]
16202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16203pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
16204    base_addr: *mut u8,
16205    k: __mmask8,
16206    vindex: __m256i,
16207    a: __m256i,
16208) {
16209    static_assert_imm8_scale!(SCALE);
16210    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
16211}
16212
16213/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16214///
16215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
16216#[inline]
16217#[target_feature(enable = "avx512f,avx512vl")]
16218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16219#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16220#[rustc_legacy_const_generics(3)]
16221pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
16222    slice: *mut u8,
16223    offsets: __m128i,
16224    src: __m256i,
16225) {
16226    static_assert_imm8_scale!(SCALE);
16227    let src = src.as_i64x4();
16228    let slice = slice as *mut i8;
16229    let offsets = offsets.as_i32x4();
16230    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
16231}
16232
16233/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16234/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16235/// are not written to memory).
16236///
16237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
16238#[inline]
16239#[target_feature(enable = "avx512f,avx512vl")]
16240#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16241#[rustc_legacy_const_generics(4)]
16242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16243pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
16244    base_addr: *mut u8,
16245    k: __mmask8,
16246    vindex: __m128i,
16247    a: __m256i,
16248) {
16249    static_assert_imm8_scale!(SCALE);
16250    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
16251}
16252
16253/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16254/// at packed 32-bit integer indices stored in vindex scaled by scale
16255///
16256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
16257#[inline]
16258#[target_feature(enable = "avx512f,avx512vl")]
16259#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16260#[rustc_legacy_const_generics(3)]
16261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16262pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
16263    base_addr: *mut u8,
16264    vindex: __m128i,
16265    a: __m256d,
16266) {
16267    static_assert_imm8_scale!(SCALE);
16268    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
16269}
16270
16271/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16272/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16273/// mask bit is not set are not written to memory).
16274///
16275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
16276#[inline]
16277#[target_feature(enable = "avx512f,avx512vl")]
16278#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16279#[rustc_legacy_const_generics(4)]
16280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16281pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
16282    base_addr: *mut u8,
16283    k: __mmask8,
16284    vindex: __m128i,
16285    a: __m256d,
16286) {
16287    static_assert_imm8_scale!(SCALE);
16288    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
16289}
16290
16291/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16292/// at packed 32-bit integer indices stored in vindex scaled by scale
16293///
16294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
16295#[inline]
16296#[target_feature(enable = "avx512f,avx512vl")]
16297#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16298#[rustc_legacy_const_generics(3)]
16299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16300pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
16301    base_addr: *mut u8,
16302    vindex: __m256i,
16303    a: __m256,
16304) {
16305    static_assert_imm8_scale!(SCALE);
16306    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
16307}
16308
16309/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16310/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16311/// mask bit is not set are not written to memory).
16312///
16313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
16314#[inline]
16315#[target_feature(enable = "avx512f,avx512vl")]
16316#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16317#[rustc_legacy_const_generics(4)]
16318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16319pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
16320    base_addr: *mut u8,
16321    k: __mmask8,
16322    vindex: __m256i,
16323    a: __m256,
16324) {
16325    static_assert_imm8_scale!(SCALE);
16326    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
16327}
16328
16329/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16330/// indices stored in vindex scaled by scale
16331///
16332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
16333#[inline]
16334#[target_feature(enable = "avx512f,avx512vl")]
16335#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16336#[rustc_legacy_const_generics(3)]
16337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16338pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
16339    base_addr: *mut u8,
16340    vindex: __m256i,
16341    a: __m128i,
16342) {
16343    static_assert_imm8_scale!(SCALE);
16344    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
16345}
16346
16347/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16348/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16349/// are not written to memory).
16350///
16351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
16352#[inline]
16353#[target_feature(enable = "avx512f,avx512vl")]
16354#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16355#[rustc_legacy_const_generics(4)]
16356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16357pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
16358    base_addr: *mut u8,
16359    k: __mmask8,
16360    vindex: __m256i,
16361    a: __m128i,
16362) {
16363    static_assert_imm8_scale!(SCALE);
16364    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
16365}
16366
16367/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16368/// indices stored in vindex scaled by scale
16369///
16370/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
16371#[inline]
16372#[target_feature(enable = "avx512f,avx512vl")]
16373#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
16374#[rustc_legacy_const_generics(3)]
16375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16376pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
16377    base_addr: *mut u8,
16378    vindex: __m256i,
16379    a: __m256i,
16380) {
16381    static_assert_imm8_scale!(SCALE);
16382    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
16383}
16384
16385/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16386/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16387/// are not written to memory).
16388///
16389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
16390#[inline]
16391#[target_feature(enable = "avx512f,avx512vl")]
16392#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
16393#[rustc_legacy_const_generics(4)]
16394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16395pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
16396    base_addr: *mut u8,
16397    k: __mmask8,
16398    vindex: __m256i,
16399    a: __m256i,
16400) {
16401    static_assert_imm8_scale!(SCALE);
16402    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
16403}
16404
16405/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16406/// at packed 64-bit integer indices stored in vindex scaled by scale
16407///
16408/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
16409#[inline]
16410#[target_feature(enable = "avx512f,avx512vl")]
16411#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16412#[rustc_legacy_const_generics(3)]
16413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16414pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
16415    base_addr: *mut u8,
16416    vindex: __m256i,
16417    a: __m256d,
16418) {
16419    static_assert_imm8_scale!(SCALE);
16420    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
16421}
16422
16423/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16424/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16425/// mask bit is not set are not written to memory).
16426///
16427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
16428#[inline]
16429#[target_feature(enable = "avx512f,avx512vl")]
16430#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16431#[rustc_legacy_const_generics(4)]
16432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16433pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
16434    base_addr: *mut u8,
16435    k: __mmask8,
16436    vindex: __m256i,
16437    a: __m256d,
16438) {
16439    static_assert_imm8_scale!(SCALE);
16440    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
16441}
16442
16443/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16444/// at packed 64-bit integer indices stored in vindex scaled by scale
16445///
16446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
16447#[inline]
16448#[target_feature(enable = "avx512f,avx512vl")]
16449#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16450#[rustc_legacy_const_generics(3)]
16451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16452pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
16453    base_addr: *mut u8,
16454    vindex: __m256i,
16455    a: __m128,
16456) {
16457    static_assert_imm8_scale!(SCALE);
16458    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
16459}
16460
16461/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16462/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16463/// mask bit is not set are not written to memory).
16464///
16465/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
16466#[inline]
16467#[target_feature(enable = "avx512f,avx512vl")]
16468#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16469#[rustc_legacy_const_generics(4)]
16470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16471pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
16472    base_addr: *mut u8,
16473    k: __mmask8,
16474    vindex: __m256i,
16475    a: __m128,
16476) {
16477    static_assert_imm8_scale!(SCALE);
16478    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
16479}
16480
16481/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
16482/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
16483/// mask bit is not set).
16484///
16485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
16486#[inline]
16487#[target_feature(enable = "avx512f,avx512vl")]
16488#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16489#[rustc_legacy_const_generics(4)]
16490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16491pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
16492    src: __m256i,
16493    k: __mmask8,
16494    vindex: __m256i,
16495    base_addr: *const u8,
16496) -> __m256i {
16497    static_assert_imm8_scale!(SCALE);
16498    transmute(vpgatherdd_256(
16499        src.as_i32x8(),
16500        base_addr as _,
16501        vindex.as_i32x8(),
16502        k,
16503        SCALE,
16504    ))
16505}
16506
16507/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
16508/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
16509/// mask bit is not set).
16510///
16511/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
16512#[inline]
16513#[target_feature(enable = "avx512f,avx512vl")]
16514#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16515#[rustc_legacy_const_generics(4)]
16516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16517pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
16518    src: __m256i,
16519    k: __mmask8,
16520    vindex: __m128i,
16521    base_addr: *const u8,
16522) -> __m256i {
16523    static_assert_imm8_scale!(SCALE);
16524    transmute(vpgatherdq_256(
16525        src.as_i64x4(),
16526        base_addr as _,
16527        vindex.as_i32x4(),
16528        k,
16529        SCALE,
16530    ))
16531}
16532
16533/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
16534/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
16535/// from src when the corresponding mask bit is not set).
16536///
16537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
16538#[inline]
16539#[target_feature(enable = "avx512f,avx512vl")]
16540#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16541#[rustc_legacy_const_generics(4)]
16542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16543pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
16544    src: __m256d,
16545    k: __mmask8,
16546    vindex: __m128i,
16547    base_addr: *const u8,
16548) -> __m256d {
16549    static_assert_imm8_scale!(SCALE);
16550    transmute(vgatherdpd_256(
16551        src.as_f64x4(),
16552        base_addr as _,
16553        vindex.as_i32x4(),
16554        k,
16555        SCALE,
16556    ))
16557}
16558
16559/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
16560/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
16561/// from src when the corresponding mask bit is not set).
16562///
16563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
16564#[inline]
16565#[target_feature(enable = "avx512f,avx512vl")]
16566#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16567#[rustc_legacy_const_generics(4)]
16568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16569pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
16570    src: __m256,
16571    k: __mmask8,
16572    vindex: __m256i,
16573    base_addr: *const u8,
16574) -> __m256 {
16575    static_assert_imm8_scale!(SCALE);
16576    transmute(vgatherdps_256(
16577        src.as_f32x8(),
16578        base_addr as _,
16579        vindex.as_i32x8(),
16580        k,
16581        SCALE,
16582    ))
16583}
16584
16585/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
16586/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
16587/// mask bit is not set).
16588///
16589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
16590#[inline]
16591#[target_feature(enable = "avx512f,avx512vl")]
16592#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16593#[rustc_legacy_const_generics(4)]
16594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16595pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
16596    src: __m128i,
16597    k: __mmask8,
16598    vindex: __m256i,
16599    base_addr: *const u8,
16600) -> __m128i {
16601    static_assert_imm8_scale!(SCALE);
16602    transmute(vpgatherqd_256(
16603        src.as_i32x4(),
16604        base_addr as _,
16605        vindex.as_i64x4(),
16606        k,
16607        SCALE,
16608    ))
16609}
16610
16611/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
16612/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
16613/// mask bit is not set).
16614///
16615/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
16616#[inline]
16617#[target_feature(enable = "avx512f,avx512vl")]
16618#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16619#[rustc_legacy_const_generics(4)]
16620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16621pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
16622    src: __m256i,
16623    k: __mmask8,
16624    vindex: __m256i,
16625    base_addr: *const u8,
16626) -> __m256i {
16627    static_assert_imm8_scale!(SCALE);
16628    transmute(vpgatherqq_256(
16629        src.as_i64x4(),
16630        base_addr as _,
16631        vindex.as_i64x4(),
16632        k,
16633        SCALE,
16634    ))
16635}
16636
16637/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
16638/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
16639/// from src when the corresponding mask bit is not set).
16640///
16641/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
16642#[inline]
16643#[target_feature(enable = "avx512f,avx512vl")]
16644#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16645#[rustc_legacy_const_generics(4)]
16646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16647pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
16648    src: __m256d,
16649    k: __mmask8,
16650    vindex: __m256i,
16651    base_addr: *const u8,
16652) -> __m256d {
16653    static_assert_imm8_scale!(SCALE);
16654    transmute(vgatherqpd_256(
16655        src.as_f64x4(),
16656        base_addr as _,
16657        vindex.as_i64x4(),
16658        k,
16659        SCALE,
16660    ))
16661}
16662
16663/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
16664/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
16665/// from src when the corresponding mask bit is not set).
16666///
16667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
16668#[inline]
16669#[target_feature(enable = "avx512f,avx512vl")]
16670#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16671#[rustc_legacy_const_generics(4)]
16672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16673pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
16674    src: __m128,
16675    k: __mmask8,
16676    vindex: __m256i,
16677    base_addr: *const u8,
16678) -> __m128 {
16679    static_assert_imm8_scale!(SCALE);
16680    transmute(vgatherqps_256(
16681        src.as_f32x4(),
16682        base_addr as _,
16683        vindex.as_i64x4(),
16684        k,
16685        SCALE,
16686    ))
16687}
16688
16689/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16690/// indices stored in vindex scaled by scale
16691///
16692/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
16693#[inline]
16694#[target_feature(enable = "avx512f,avx512vl")]
16695#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
16696#[rustc_legacy_const_generics(3)]
16697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16698pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
16699    base_addr: *mut u8,
16700    vindex: __m128i,
16701    a: __m128i,
16702) {
16703    static_assert_imm8_scale!(SCALE);
16704    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
16705}
16706
16707/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16708/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16709/// are not written to memory).
16710///
16711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
16712#[inline]
16713#[target_feature(enable = "avx512f,avx512vl")]
16714#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
16715#[rustc_legacy_const_generics(4)]
16716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16717pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
16718    base_addr: *mut u8,
16719    k: __mmask8,
16720    vindex: __m128i,
16721    a: __m128i,
16722) {
16723    static_assert_imm8_scale!(SCALE);
16724    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
16725}
16726
16727/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16728/// indices stored in vindex scaled by scale
16729///
16730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
16731#[inline]
16732#[target_feature(enable = "avx512f,avx512vl")]
16733#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16734#[rustc_legacy_const_generics(3)]
16735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16736pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
16737    base_addr: *mut u8,
16738    vindex: __m128i,
16739    a: __m128i,
16740) {
16741    static_assert_imm8_scale!(SCALE);
16742    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
16743}
16744
16745/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
16746/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16747/// are not written to memory).
16748///
16749/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
16750#[inline]
16751#[target_feature(enable = "avx512f,avx512vl")]
16752#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
16753#[rustc_legacy_const_generics(4)]
16754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16755pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
16756    base_addr: *mut u8,
16757    k: __mmask8,
16758    vindex: __m128i,
16759    a: __m128i,
16760) {
16761    static_assert_imm8_scale!(SCALE);
16762    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
16763}
16764
16765/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16766/// at packed 32-bit integer indices stored in vindex scaled by scale
16767///
16768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
16769#[inline]
16770#[target_feature(enable = "avx512f,avx512vl")]
16771#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16772#[rustc_legacy_const_generics(3)]
16773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16774pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
16775    static_assert_imm8_scale!(SCALE);
16776    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
16777}
16778
16779/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16780/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16781/// mask bit is not set are not written to memory).
16782///
16783/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
16784#[inline]
16785#[target_feature(enable = "avx512f,avx512vl")]
16786#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16787#[rustc_legacy_const_generics(4)]
16788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16789pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
16790    base_addr: *mut u8,
16791    k: __mmask8,
16792    vindex: __m128i,
16793    a: __m128d,
16794) {
16795    static_assert_imm8_scale!(SCALE);
16796    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
16797}
16798
16799/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16800/// at packed 32-bit integer indices stored in vindex scaled by scale
16801///
16802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
16803#[inline]
16804#[target_feature(enable = "avx512f,avx512vl")]
16805#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16806#[rustc_legacy_const_generics(3)]
16807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16808pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
16809    static_assert_imm8_scale!(SCALE);
16810    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
16811}
16812
16813/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16814/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16815/// mask bit is not set are not written to memory).
16816///
16817/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
16818#[inline]
16819#[target_feature(enable = "avx512f,avx512vl")]
16820#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16821#[rustc_legacy_const_generics(4)]
16822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16823pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
16824    base_addr: *mut u8,
16825    k: __mmask8,
16826    vindex: __m128i,
16827    a: __m128,
16828) {
16829    static_assert_imm8_scale!(SCALE);
16830    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
16831}
16832
16833/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16834/// indices stored in vindex scaled by scale
16835///
16836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
16837#[inline]
16838#[target_feature(enable = "avx512f,avx512vl")]
16839#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16840#[rustc_legacy_const_generics(3)]
16841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16842pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
16843    base_addr: *mut u8,
16844    vindex: __m128i,
16845    a: __m128i,
16846) {
16847    static_assert_imm8_scale!(SCALE);
16848    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
16849}
16850
16851/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16852/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16853/// are not written to memory).
16854///
16855/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
16856#[inline]
16857#[target_feature(enable = "avx512f,avx512vl")]
16858#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
16859#[rustc_legacy_const_generics(4)]
16860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16861pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
16862    base_addr: *mut u8,
16863    k: __mmask8,
16864    vindex: __m128i,
16865    a: __m128i,
16866) {
16867    static_assert_imm8_scale!(SCALE);
16868    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
16869}
16870
16871/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16872/// indices stored in vindex scaled by scale
16873///
16874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
16875#[inline]
16876#[target_feature(enable = "avx512f,avx512vl")]
16877#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
16878#[rustc_legacy_const_generics(3)]
16879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16880pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
16881    base_addr: *mut u8,
16882    vindex: __m128i,
16883    a: __m128i,
16884) {
16885    static_assert_imm8_scale!(SCALE);
16886    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
16887}
16888
16889/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
16890/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
16891/// are not written to memory).
16892///
16893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
16894#[inline]
16895#[target_feature(enable = "avx512f,avx512vl")]
16896#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
16897#[rustc_legacy_const_generics(4)]
16898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16899pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
16900    base_addr: *mut u8,
16901    k: __mmask8,
16902    vindex: __m128i,
16903    a: __m128i,
16904) {
16905    static_assert_imm8_scale!(SCALE);
16906    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
16907}
16908
16909/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16910/// at packed 64-bit integer indices stored in vindex scaled by scale
16911///
16912/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
16913#[inline]
16914#[target_feature(enable = "avx512f,avx512vl")]
16915#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16916#[rustc_legacy_const_generics(3)]
16917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16918pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
16919    static_assert_imm8_scale!(SCALE);
16920    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
16921}
16922
16923/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
16924/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16925/// mask bit is not set are not written to memory).
16926///
16927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
16928#[inline]
16929#[target_feature(enable = "avx512f,avx512vl")]
16930#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16931#[rustc_legacy_const_generics(4)]
16932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16933pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
16934    base_addr: *mut u8,
16935    k: __mmask8,
16936    vindex: __m128i,
16937    a: __m128d,
16938) {
16939    static_assert_imm8_scale!(SCALE);
16940    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
16941}
16942
16943/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16944/// at packed 64-bit integer indices stored in vindex scaled by scale
16945///
16946/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
16947#[inline]
16948#[target_feature(enable = "avx512f,avx512vl")]
16949#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16950#[rustc_legacy_const_generics(3)]
16951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16952pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
16953    static_assert_imm8_scale!(SCALE);
16954    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
16955}
16956
16957/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
16958/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
16959///
16960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
16961#[inline]
16962#[target_feature(enable = "avx512f,avx512vl")]
16963#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16964#[rustc_legacy_const_generics(4)]
16965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16966pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
16967    base_addr: *mut u8,
16968    k: __mmask8,
16969    vindex: __m128i,
16970    a: __m128,
16971) {
16972    static_assert_imm8_scale!(SCALE);
16973    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
16974}
16975
16976/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
16977/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
16978/// mask bit is not set).
16979///
16980/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
16981#[inline]
16982#[target_feature(enable = "avx512f,avx512vl")]
16983#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16984#[rustc_legacy_const_generics(4)]
16985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16986pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
16987    src: __m128i,
16988    k: __mmask8,
16989    vindex: __m128i,
16990    base_addr: *const u8,
16991) -> __m128i {
16992    static_assert_imm8_scale!(SCALE);
16993    transmute(vpgatherdd_128(
16994        src.as_i32x4(),
16995        base_addr as _,
16996        vindex.as_i32x4(),
16997        k,
16998        SCALE,
16999    ))
17000}
17001
17002/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17003/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17004/// mask bit is not set).
17005///
17006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
17007#[inline]
17008#[target_feature(enable = "avx512f,avx512vl")]
17009#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17010#[rustc_legacy_const_generics(4)]
17011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17012pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
17013    src: __m128i,
17014    k: __mmask8,
17015    vindex: __m128i,
17016    base_addr: *const u8,
17017) -> __m128i {
17018    static_assert_imm8_scale!(SCALE);
17019    transmute(vpgatherdq_128(
17020        src.as_i64x2(),
17021        base_addr as _,
17022        vindex.as_i32x4(),
17023        k,
17024        SCALE,
17025    ))
17026}
17027
17028/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17029/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17030/// from src when the corresponding mask bit is not set).
17031///
17032/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
17033#[inline]
17034#[target_feature(enable = "avx512f,avx512vl")]
17035#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17036#[rustc_legacy_const_generics(4)]
17037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17038pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
17039    src: __m128d,
17040    k: __mmask8,
17041    vindex: __m128i,
17042    base_addr: *const u8,
17043) -> __m128d {
17044    static_assert_imm8_scale!(SCALE);
17045    transmute(vgatherdpd_128(
17046        src.as_f64x2(),
17047        base_addr as _,
17048        vindex.as_i32x4(),
17049        k,
17050        SCALE,
17051    ))
17052}
17053
17054/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17055/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17056/// from src when the corresponding mask bit is not set).
17057///
17058/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
17059#[inline]
17060#[target_feature(enable = "avx512f,avx512vl")]
17061#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17062#[rustc_legacy_const_generics(4)]
17063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17064pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
17065    src: __m128,
17066    k: __mmask8,
17067    vindex: __m128i,
17068    base_addr: *const u8,
17069) -> __m128 {
17070    static_assert_imm8_scale!(SCALE);
17071    transmute(vgatherdps_128(
17072        src.as_f32x4(),
17073        base_addr as _,
17074        vindex.as_i32x4(),
17075        k,
17076        SCALE,
17077    ))
17078}
17079
17080/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17081/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17082/// mask bit is not set).
17083///
17084/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
17085#[inline]
17086#[target_feature(enable = "avx512f,avx512vl")]
17087#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17088#[rustc_legacy_const_generics(4)]
17089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17090pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
17091    src: __m128i,
17092    k: __mmask8,
17093    vindex: __m128i,
17094    base_addr: *const u8,
17095) -> __m128i {
17096    static_assert_imm8_scale!(SCALE);
17097    transmute(vpgatherqd_128(
17098        src.as_i32x4(),
17099        base_addr as _,
17100        vindex.as_i64x2(),
17101        k,
17102        SCALE,
17103    ))
17104}
17105
17106/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17107/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17108/// mask bit is not set).
17109///
17110/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
17111#[inline]
17112#[target_feature(enable = "avx512f,avx512vl")]
17113#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17114#[rustc_legacy_const_generics(4)]
17115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17116pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
17117    src: __m128i,
17118    k: __mmask8,
17119    vindex: __m128i,
17120    base_addr: *const u8,
17121) -> __m128i {
17122    static_assert_imm8_scale!(SCALE);
17123    transmute(vpgatherqq_128(
17124        src.as_i64x2(),
17125        base_addr as _,
17126        vindex.as_i64x2(),
17127        k,
17128        SCALE,
17129    ))
17130}
17131
17132/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17133/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17134/// from src when the corresponding mask bit is not set).
17135///
17136/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
17137#[inline]
17138#[target_feature(enable = "avx512f,avx512vl")]
17139#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17140#[rustc_legacy_const_generics(4)]
17141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17142pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
17143    src: __m128d,
17144    k: __mmask8,
17145    vindex: __m128i,
17146    base_addr: *const u8,
17147) -> __m128d {
17148    static_assert_imm8_scale!(SCALE);
17149    transmute(vgatherqpd_128(
17150        src.as_f64x2(),
17151        base_addr as _,
17152        vindex.as_i64x2(),
17153        k,
17154        SCALE,
17155    ))
17156}
17157
17158/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17159/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17160/// from src when the corresponding mask bit is not set).
17161///
17162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
17163#[inline]
17164#[target_feature(enable = "avx512f,avx512vl")]
17165#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17166#[rustc_legacy_const_generics(4)]
17167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17168pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
17169    src: __m128,
17170    k: __mmask8,
17171    vindex: __m128i,
17172    base_addr: *const u8,
17173) -> __m128 {
17174    static_assert_imm8_scale!(SCALE);
17175    transmute(vgatherqps_128(
17176        src.as_f32x4(),
17177        base_addr as _,
17178        vindex.as_i64x2(),
17179        k,
17180        SCALE,
17181    ))
17182}
17183
17184/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17185///
17186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
17187#[inline]
17188#[target_feature(enable = "avx512f")]
17189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17190#[cfg_attr(test, assert_instr(vpcompressd))]
17191pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
17192    transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k))
17193}
17194
17195/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17196///
17197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
17198#[inline]
17199#[target_feature(enable = "avx512f")]
17200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17201#[cfg_attr(test, assert_instr(vpcompressd))]
17202pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
17203    transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k))
17204}
17205
17206/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17207///
17208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
17209#[inline]
17210#[target_feature(enable = "avx512f,avx512vl")]
17211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17212#[cfg_attr(test, assert_instr(vpcompressd))]
17213pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17214    transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k))
17215}
17216
17217/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17218///
17219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
17220#[inline]
17221#[target_feature(enable = "avx512f,avx512vl")]
17222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17223#[cfg_attr(test, assert_instr(vpcompressd))]
17224pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
17225    transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k))
17226}
17227
17228/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17229///
17230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
17231#[inline]
17232#[target_feature(enable = "avx512f,avx512vl")]
17233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17234#[cfg_attr(test, assert_instr(vpcompressd))]
17235pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17236    transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k))
17237}
17238
17239/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17240///
17241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
17242#[inline]
17243#[target_feature(enable = "avx512f,avx512vl")]
17244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17245#[cfg_attr(test, assert_instr(vpcompressd))]
17246pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
17247    transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k))
17248}
17249
17250/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17251///
17252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
17253#[inline]
17254#[target_feature(enable = "avx512f")]
17255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17256#[cfg_attr(test, assert_instr(vpcompressq))]
17257pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
17258    transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k))
17259}
17260
17261/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17262///
17263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
17264#[inline]
17265#[target_feature(enable = "avx512f")]
17266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17267#[cfg_attr(test, assert_instr(vpcompressq))]
17268pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
17269    transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k))
17270}
17271
17272/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17273///
17274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
17275#[inline]
17276#[target_feature(enable = "avx512f,avx512vl")]
17277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17278#[cfg_attr(test, assert_instr(vpcompressq))]
17279pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17280    transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k))
17281}
17282
17283/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17284///
17285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
17286#[inline]
17287#[target_feature(enable = "avx512f,avx512vl")]
17288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17289#[cfg_attr(test, assert_instr(vpcompressq))]
17290pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
17291    transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k))
17292}
17293
17294/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17295///
17296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
17297#[inline]
17298#[target_feature(enable = "avx512f,avx512vl")]
17299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17300#[cfg_attr(test, assert_instr(vpcompressq))]
17301pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17302    transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k))
17303}
17304
17305/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17306///
17307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
17308#[inline]
17309#[target_feature(enable = "avx512f,avx512vl")]
17310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17311#[cfg_attr(test, assert_instr(vpcompressq))]
17312pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
17313    transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k))
17314}
17315
17316/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17317///
17318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
17319#[inline]
17320#[target_feature(enable = "avx512f")]
17321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17322#[cfg_attr(test, assert_instr(vcompressps))]
17323pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
17324    transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k))
17325}
17326
17327/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17328///
17329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
17330#[inline]
17331#[target_feature(enable = "avx512f")]
17332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17333#[cfg_attr(test, assert_instr(vcompressps))]
17334pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
17335    transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k))
17336}
17337
17338/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17339///
17340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
17341#[inline]
17342#[target_feature(enable = "avx512f,avx512vl")]
17343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17344#[cfg_attr(test, assert_instr(vcompressps))]
17345pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
17346    transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k))
17347}
17348
17349/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17350///
17351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
17352#[inline]
17353#[target_feature(enable = "avx512f,avx512vl")]
17354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17355#[cfg_attr(test, assert_instr(vcompressps))]
17356pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
17357    transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k))
17358}
17359
17360/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17361///
17362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
17363#[inline]
17364#[target_feature(enable = "avx512f,avx512vl")]
17365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17366#[cfg_attr(test, assert_instr(vcompressps))]
17367pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
17368    transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k))
17369}
17370
17371/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17372///
17373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
17374#[inline]
17375#[target_feature(enable = "avx512f,avx512vl")]
17376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17377#[cfg_attr(test, assert_instr(vcompressps))]
17378pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
17379    transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k))
17380}
17381
17382/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17383///
17384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
17385#[inline]
17386#[target_feature(enable = "avx512f")]
17387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17388#[cfg_attr(test, assert_instr(vcompresspd))]
17389pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
17390    transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k))
17391}
17392
17393/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17394///
17395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
17396#[inline]
17397#[target_feature(enable = "avx512f")]
17398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17399#[cfg_attr(test, assert_instr(vcompresspd))]
17400pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
17401    transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k))
17402}
17403
17404/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17405///
17406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
17407#[inline]
17408#[target_feature(enable = "avx512f,avx512vl")]
17409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17410#[cfg_attr(test, assert_instr(vcompresspd))]
17411pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
17412    transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k))
17413}
17414
17415/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17416///
17417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
17418#[inline]
17419#[target_feature(enable = "avx512f,avx512vl")]
17420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17421#[cfg_attr(test, assert_instr(vcompresspd))]
17422pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
17423    transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k))
17424}
17425
17426/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
17427///
17428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
17429#[inline]
17430#[target_feature(enable = "avx512f,avx512vl")]
17431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17432#[cfg_attr(test, assert_instr(vcompresspd))]
17433pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
17434    transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k))
17435}
17436
17437/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17438///
17439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
17440#[inline]
17441#[target_feature(enable = "avx512f,avx512vl")]
17442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17443#[cfg_attr(test, assert_instr(vcompresspd))]
17444pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
17445    transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k))
17446}
17447
17448/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17449///
17450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
17451#[inline]
17452#[target_feature(enable = "avx512f")]
17453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17454#[cfg_attr(test, assert_instr(vpcompressd))]
17455pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
17456    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
17457}
17458
17459/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17460///
17461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
17462#[inline]
17463#[target_feature(enable = "avx512f,avx512vl")]
17464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17465#[cfg_attr(test, assert_instr(vpcompressd))]
17466pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
17467    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
17468}
17469
17470/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17471///
17472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
17473#[inline]
17474#[target_feature(enable = "avx512f,avx512vl")]
17475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17476#[cfg_attr(test, assert_instr(vpcompressd))]
17477pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
17478    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
17479}
17480
17481/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17482///
17483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
17484#[inline]
17485#[target_feature(enable = "avx512f")]
17486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17487#[cfg_attr(test, assert_instr(vpcompressq))]
17488pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
17489    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
17490}
17491
17492/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17493///
17494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
17495#[inline]
17496#[target_feature(enable = "avx512f,avx512vl")]
17497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17498#[cfg_attr(test, assert_instr(vpcompressq))]
17499pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
17500    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
17501}
17502
17503/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17504///
17505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
17506#[inline]
17507#[target_feature(enable = "avx512f,avx512vl")]
17508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17509#[cfg_attr(test, assert_instr(vpcompressq))]
17510pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
17511    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
17512}
17513
17514/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17515///
17516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
17517#[inline]
17518#[target_feature(enable = "avx512f")]
17519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17520#[cfg_attr(test, assert_instr(vcompressps))]
17521pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
17522    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
17523}
17524
17525/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17526///
17527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
17528#[inline]
17529#[target_feature(enable = "avx512f,avx512vl")]
17530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17531#[cfg_attr(test, assert_instr(vcompressps))]
17532pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
17533    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
17534}
17535
17536/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17537///
17538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
17539#[inline]
17540#[target_feature(enable = "avx512f,avx512vl")]
17541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17542#[cfg_attr(test, assert_instr(vcompressps))]
17543pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
17544    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
17545}
17546
17547/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17548///
17549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
17550#[inline]
17551#[target_feature(enable = "avx512f")]
17552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17553#[cfg_attr(test, assert_instr(vcompresspd))]
17554pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
17555    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
17556}
17557
17558/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17559///
17560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
17561#[inline]
17562#[target_feature(enable = "avx512f,avx512vl")]
17563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17564#[cfg_attr(test, assert_instr(vcompresspd))]
17565pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
17566    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
17567}
17568
17569/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
17570///
17571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
17572#[inline]
17573#[target_feature(enable = "avx512f,avx512vl")]
17574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17575#[cfg_attr(test, assert_instr(vcompresspd))]
17576pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
17577    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
17578}
17579
17580/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17581///
17582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
17583#[inline]
17584#[target_feature(enable = "avx512f")]
17585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17586#[cfg_attr(test, assert_instr(vpexpandd))]
17587pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
17588    transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k))
17589}
17590
17591/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17592///
17593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
17594#[inline]
17595#[target_feature(enable = "avx512f")]
17596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17597#[cfg_attr(test, assert_instr(vpexpandd))]
17598pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
17599    transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k))
17600}
17601
17602/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17603///
17604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
17605#[inline]
17606#[target_feature(enable = "avx512f,avx512vl")]
17607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17608#[cfg_attr(test, assert_instr(vpexpandd))]
17609pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17610    transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k))
17611}
17612
17613/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17614///
17615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
17616#[inline]
17617#[target_feature(enable = "avx512f,avx512vl")]
17618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17619#[cfg_attr(test, assert_instr(vpexpandd))]
17620pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
17621    transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k))
17622}
17623
17624/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17625///
17626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
17627#[inline]
17628#[target_feature(enable = "avx512f,avx512vl")]
17629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17630#[cfg_attr(test, assert_instr(vpexpandd))]
17631pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17632    transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k))
17633}
17634
17635/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17636///
17637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
17638#[inline]
17639#[target_feature(enable = "avx512f,avx512vl")]
17640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17641#[cfg_attr(test, assert_instr(vpexpandd))]
17642pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
17643    transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k))
17644}
17645
17646/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17647///
17648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
17649#[inline]
17650#[target_feature(enable = "avx512f")]
17651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17652#[cfg_attr(test, assert_instr(vpexpandq))]
17653pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
17654    transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k))
17655}
17656
17657/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17658///
17659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
17660#[inline]
17661#[target_feature(enable = "avx512f")]
17662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17663#[cfg_attr(test, assert_instr(vpexpandq))]
17664pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
17665    transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k))
17666}
17667
17668/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17669///
17670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
17671#[inline]
17672#[target_feature(enable = "avx512f,avx512vl")]
17673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17674#[cfg_attr(test, assert_instr(vpexpandq))]
17675pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
17676    transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k))
17677}
17678
17679/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17680///
17681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
17682#[inline]
17683#[target_feature(enable = "avx512f,avx512vl")]
17684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17685#[cfg_attr(test, assert_instr(vpexpandq))]
17686pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
17687    transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k))
17688}
17689
17690/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17691///
17692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
17693#[inline]
17694#[target_feature(enable = "avx512f,avx512vl")]
17695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17696#[cfg_attr(test, assert_instr(vpexpandq))]
17697pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
17698    transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k))
17699}
17700
17701/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17702///
17703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
17704#[inline]
17705#[target_feature(enable = "avx512f,avx512vl")]
17706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17707#[cfg_attr(test, assert_instr(vpexpandq))]
17708pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
17709    transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k))
17710}
17711
17712/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17713///
17714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
17715#[inline]
17716#[target_feature(enable = "avx512f")]
17717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17718#[cfg_attr(test, assert_instr(vexpandps))]
17719pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
17720    transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k))
17721}
17722
17723/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17724///
17725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
17726#[inline]
17727#[target_feature(enable = "avx512f")]
17728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17729#[cfg_attr(test, assert_instr(vexpandps))]
17730pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
17731    transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k))
17732}
17733
17734/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17735///
17736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
17737#[inline]
17738#[target_feature(enable = "avx512f,avx512vl")]
17739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17740#[cfg_attr(test, assert_instr(vexpandps))]
17741pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
17742    transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k))
17743}
17744
17745/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17746///
17747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
17748#[inline]
17749#[target_feature(enable = "avx512f,avx512vl")]
17750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17751#[cfg_attr(test, assert_instr(vexpandps))]
17752pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
17753    transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k))
17754}
17755
17756/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17757///
17758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
17759#[inline]
17760#[target_feature(enable = "avx512f,avx512vl")]
17761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17762#[cfg_attr(test, assert_instr(vexpandps))]
17763pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
17764    transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k))
17765}
17766
17767/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17768///
17769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
17770#[inline]
17771#[target_feature(enable = "avx512f,avx512vl")]
17772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17773#[cfg_attr(test, assert_instr(vexpandps))]
17774pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
17775    transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k))
17776}
17777
17778/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17779///
17780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
17781#[inline]
17782#[target_feature(enable = "avx512f")]
17783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17784#[cfg_attr(test, assert_instr(vexpandpd))]
17785pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
17786    transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k))
17787}
17788
17789/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17790///
17791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
17792#[inline]
17793#[target_feature(enable = "avx512f")]
17794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17795#[cfg_attr(test, assert_instr(vexpandpd))]
17796pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
17797    transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k))
17798}
17799
17800/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17801///
17802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
17803#[inline]
17804#[target_feature(enable = "avx512f,avx512vl")]
17805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17806#[cfg_attr(test, assert_instr(vexpandpd))]
17807pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
17808    transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k))
17809}
17810
17811/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17812///
17813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
17814#[inline]
17815#[target_feature(enable = "avx512f,avx512vl")]
17816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17817#[cfg_attr(test, assert_instr(vexpandpd))]
17818pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
17819    transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k))
17820}
17821
17822/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17823///
17824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
17825#[inline]
17826#[target_feature(enable = "avx512f,avx512vl")]
17827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17828#[cfg_attr(test, assert_instr(vexpandpd))]
17829pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
17830    transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k))
17831}
17832
17833/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17834///
17835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
17836#[inline]
17837#[target_feature(enable = "avx512f,avx512vl")]
17838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17839#[cfg_attr(test, assert_instr(vexpandpd))]
17840pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
17841    transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k))
17842}
17843
17844/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17845///
17846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
17847#[inline]
17848#[target_feature(enable = "avx512f")]
17849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17850#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17851#[rustc_legacy_const_generics(1)]
17852pub unsafe fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
17853    static_assert_uimm_bits!(IMM8, 8);
17854    let a = a.as_i32x16();
17855    let r = vprold(a, IMM8);
17856    transmute(r)
17857}
17858
17859/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17860///
17861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
17862#[inline]
17863#[target_feature(enable = "avx512f")]
17864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17865#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17866#[rustc_legacy_const_generics(3)]
17867pub unsafe fn _mm512_mask_rol_epi32<const IMM8: i32>(
17868    src: __m512i,
17869    k: __mmask16,
17870    a: __m512i,
17871) -> __m512i {
17872    static_assert_uimm_bits!(IMM8, 8);
17873    let a = a.as_i32x16();
17874    let r = vprold(a, IMM8);
17875    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
17876}
17877
17878/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17879///
17880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
17881#[inline]
17882#[target_feature(enable = "avx512f")]
17883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17884#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17885#[rustc_legacy_const_generics(2)]
17886pub unsafe fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
17887    static_assert_uimm_bits!(IMM8, 8);
17888    let a = a.as_i32x16();
17889    let r = vprold(a, IMM8);
17890    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
17891}
17892
17893/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17894///
17895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
17896#[inline]
17897#[target_feature(enable = "avx512f,avx512vl")]
17898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17899#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17900#[rustc_legacy_const_generics(1)]
17901pub unsafe fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
17902    static_assert_uimm_bits!(IMM8, 8);
17903    let a = a.as_i32x8();
17904    let r = vprold256(a, IMM8);
17905    transmute(r)
17906}
17907
17908/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17909///
17910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
17911#[inline]
17912#[target_feature(enable = "avx512f,avx512vl")]
17913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17914#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17915#[rustc_legacy_const_generics(3)]
17916pub unsafe fn _mm256_mask_rol_epi32<const IMM8: i32>(
17917    src: __m256i,
17918    k: __mmask8,
17919    a: __m256i,
17920) -> __m256i {
17921    static_assert_uimm_bits!(IMM8, 8);
17922    let a = a.as_i32x8();
17923    let r = vprold256(a, IMM8);
17924    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
17925}
17926
17927/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17928///
17929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
17930#[inline]
17931#[target_feature(enable = "avx512f,avx512vl")]
17932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17933#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17934#[rustc_legacy_const_generics(2)]
17935pub unsafe fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
17936    static_assert_uimm_bits!(IMM8, 8);
17937    let a = a.as_i32x8();
17938    let r = vprold256(a, IMM8);
17939    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
17940}
17941
17942/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
17943///
17944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
17945#[inline]
17946#[target_feature(enable = "avx512f,avx512vl")]
17947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17948#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17949#[rustc_legacy_const_generics(1)]
17950pub unsafe fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
17951    static_assert_uimm_bits!(IMM8, 8);
17952    let a = a.as_i32x4();
17953    let r = vprold128(a, IMM8);
17954    transmute(r)
17955}
17956
17957/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
17958///
17959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
17960#[inline]
17961#[target_feature(enable = "avx512f,avx512vl")]
17962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17963#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17964#[rustc_legacy_const_generics(3)]
17965pub unsafe fn _mm_mask_rol_epi32<const IMM8: i32>(
17966    src: __m128i,
17967    k: __mmask8,
17968    a: __m128i,
17969) -> __m128i {
17970    static_assert_uimm_bits!(IMM8, 8);
17971    let a = a.as_i32x4();
17972    let r = vprold128(a, IMM8);
17973    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
17974}
17975
17976/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
17977///
17978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
17979#[inline]
17980#[target_feature(enable = "avx512f,avx512vl")]
17981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17982#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17983#[rustc_legacy_const_generics(2)]
17984pub unsafe fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
17985    static_assert_uimm_bits!(IMM8, 8);
17986    let a = a.as_i32x4();
17987    let r = vprold128(a, IMM8);
17988    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
17989}
17990
17991/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
17992///
17993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
17994#[inline]
17995#[target_feature(enable = "avx512f")]
17996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17997#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
17998#[rustc_legacy_const_generics(1)]
17999pub unsafe fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
18000    static_assert_uimm_bits!(IMM8, 8);
18001    let a = a.as_i32x16();
18002    let r = vprord(a, IMM8);
18003    transmute(r)
18004}
18005
18006/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18007///
18008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
18009#[inline]
18010#[target_feature(enable = "avx512f")]
18011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18012#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
18013#[rustc_legacy_const_generics(3)]
18014pub unsafe fn _mm512_mask_ror_epi32<const IMM8: i32>(
18015    src: __m512i,
18016    k: __mmask16,
18017    a: __m512i,
18018) -> __m512i {
18019    static_assert_uimm_bits!(IMM8, 8);
18020    let a = a.as_i32x16();
18021    let r = vprord(a, IMM8);
18022    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
18023}
18024
18025/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18026///
18027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
18028#[inline]
18029#[target_feature(enable = "avx512f")]
18030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18031#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
18032#[rustc_legacy_const_generics(2)]
18033pub unsafe fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
18034    static_assert_uimm_bits!(IMM8, 8);
18035    let a = a.as_i32x16();
18036    let r = vprord(a, IMM8);
18037    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
18038}
18039
18040/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18041///
18042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
18043#[inline]
18044#[target_feature(enable = "avx512f,avx512vl")]
18045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18046#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
18047#[rustc_legacy_const_generics(1)]
18048pub unsafe fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
18049    static_assert_uimm_bits!(IMM8, 8);
18050    let a = a.as_i32x8();
18051    let r = vprord256(a, IMM8);
18052    transmute(r)
18053}
18054
18055/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18056///
18057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
18058#[inline]
18059#[target_feature(enable = "avx512f,avx512vl")]
18060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18061#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
18062#[rustc_legacy_const_generics(3)]
18063pub unsafe fn _mm256_mask_ror_epi32<const IMM8: i32>(
18064    src: __m256i,
18065    k: __mmask8,
18066    a: __m256i,
18067) -> __m256i {
18068    static_assert_uimm_bits!(IMM8, 8);
18069    let a = a.as_i32x8();
18070    let r = vprord256(a, IMM8);
18071    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
18072}
18073
18074/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18075///
18076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
18077#[inline]
18078#[target_feature(enable = "avx512f,avx512vl")]
18079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18080#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
18081#[rustc_legacy_const_generics(2)]
18082pub unsafe fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18083    static_assert_uimm_bits!(IMM8, 8);
18084    let a = a.as_i32x8();
18085    let r = vprord256(a, IMM8);
18086    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
18087}
18088
18089/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18090///
18091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
18092#[inline]
18093#[target_feature(enable = "avx512f,avx512vl")]
18094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18095#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
18096#[rustc_legacy_const_generics(1)]
18097pub unsafe fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
18098    static_assert_uimm_bits!(IMM8, 8);
18099    let a = a.as_i32x4();
18100    let r = vprord128(a, IMM8);
18101    transmute(r)
18102}
18103
18104/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18105///
18106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
18107#[inline]
18108#[target_feature(enable = "avx512f,avx512vl")]
18109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18110#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
18111#[rustc_legacy_const_generics(3)]
18112pub unsafe fn _mm_mask_ror_epi32<const IMM8: i32>(
18113    src: __m128i,
18114    k: __mmask8,
18115    a: __m128i,
18116) -> __m128i {
18117    static_assert_uimm_bits!(IMM8, 8);
18118    let a = a.as_i32x4();
18119    let r = vprord128(a, IMM8);
18120    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
18121}
18122
18123/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18124///
18125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
18126#[inline]
18127#[target_feature(enable = "avx512f,avx512vl")]
18128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18129#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
18130#[rustc_legacy_const_generics(2)]
18131pub unsafe fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18132    static_assert_uimm_bits!(IMM8, 8);
18133    let a = a.as_i32x4();
18134    let r = vprord128(a, IMM8);
18135    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
18136}
18137
18138/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18139///
18140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
18141#[inline]
18142#[target_feature(enable = "avx512f")]
18143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18144#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18145#[rustc_legacy_const_generics(1)]
18146pub unsafe fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
18147    static_assert_uimm_bits!(IMM8, 8);
18148    let a = a.as_i64x8();
18149    let r = vprolq(a, IMM8);
18150    transmute(r)
18151}
18152
18153/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18154///
18155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
18156#[inline]
18157#[target_feature(enable = "avx512f")]
18158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18159#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18160#[rustc_legacy_const_generics(3)]
18161pub unsafe fn _mm512_mask_rol_epi64<const IMM8: i32>(
18162    src: __m512i,
18163    k: __mmask8,
18164    a: __m512i,
18165) -> __m512i {
18166    static_assert_uimm_bits!(IMM8, 8);
18167    let a = a.as_i64x8();
18168    let r = vprolq(a, IMM8);
18169    transmute(simd_select_bitmask(k, r, src.as_i64x8()))
18170}
18171
18172/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18173///
18174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
18175#[inline]
18176#[target_feature(enable = "avx512f")]
18177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18178#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18179#[rustc_legacy_const_generics(2)]
18180pub unsafe fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
18181    static_assert_uimm_bits!(IMM8, 8);
18182    let a = a.as_i64x8();
18183    let r = vprolq(a, IMM8);
18184    transmute(simd_select_bitmask(k, r, i64x8::ZERO))
18185}
18186
18187/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18188///
18189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
18190#[inline]
18191#[target_feature(enable = "avx512f,avx512vl")]
18192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18193#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18194#[rustc_legacy_const_generics(1)]
18195pub unsafe fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
18196    static_assert_uimm_bits!(IMM8, 8);
18197    let a = a.as_i64x4();
18198    let r = vprolq256(a, IMM8);
18199    transmute(r)
18200}
18201
18202/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18203///
18204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
18205#[inline]
18206#[target_feature(enable = "avx512f,avx512vl")]
18207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18208#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18209#[rustc_legacy_const_generics(3)]
18210pub unsafe fn _mm256_mask_rol_epi64<const IMM8: i32>(
18211    src: __m256i,
18212    k: __mmask8,
18213    a: __m256i,
18214) -> __m256i {
18215    static_assert_uimm_bits!(IMM8, 8);
18216    let a = a.as_i64x4();
18217    let r = vprolq256(a, IMM8);
18218    transmute(simd_select_bitmask(k, r, src.as_i64x4()))
18219}
18220
18221/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18222///
18223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
18224#[inline]
18225#[target_feature(enable = "avx512f,avx512vl")]
18226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18227#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18228#[rustc_legacy_const_generics(2)]
18229pub unsafe fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18230    static_assert_uimm_bits!(IMM8, 8);
18231    let a = a.as_i64x4();
18232    let r = vprolq256(a, IMM8);
18233    transmute(simd_select_bitmask(k, r, i64x4::ZERO))
18234}
18235
18236/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18237///
18238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
18239#[inline]
18240#[target_feature(enable = "avx512f,avx512vl")]
18241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18242#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18243#[rustc_legacy_const_generics(1)]
18244pub unsafe fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
18245    static_assert_uimm_bits!(IMM8, 8);
18246    let a = a.as_i64x2();
18247    let r = vprolq128(a, IMM8);
18248    transmute(r)
18249}
18250
18251/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18252///
18253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
18254#[inline]
18255#[target_feature(enable = "avx512f,avx512vl")]
18256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18257#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18258#[rustc_legacy_const_generics(3)]
18259pub unsafe fn _mm_mask_rol_epi64<const IMM8: i32>(
18260    src: __m128i,
18261    k: __mmask8,
18262    a: __m128i,
18263) -> __m128i {
18264    static_assert_uimm_bits!(IMM8, 8);
18265    let a = a.as_i64x2();
18266    let r = vprolq128(a, IMM8);
18267    transmute(simd_select_bitmask(k, r, src.as_i64x2()))
18268}
18269
18270/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18271///
18272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
18273#[inline]
18274#[target_feature(enable = "avx512f,avx512vl")]
18275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18276#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
18277#[rustc_legacy_const_generics(2)]
18278pub unsafe fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18279    static_assert_uimm_bits!(IMM8, 8);
18280    let a = a.as_i64x2();
18281    let r = vprolq128(a, IMM8);
18282    transmute(simd_select_bitmask(k, r, i64x2::ZERO))
18283}
18284
18285/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18286///
18287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
18288#[inline]
18289#[target_feature(enable = "avx512f")]
18290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18291#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18292#[rustc_legacy_const_generics(1)]
18293pub unsafe fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
18294    static_assert_uimm_bits!(IMM8, 8);
18295    let a = a.as_i64x8();
18296    let r = vprorq(a, IMM8);
18297    transmute(r)
18298}
18299
18300/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18301///
18302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
18303#[inline]
18304#[target_feature(enable = "avx512f")]
18305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18306#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18307#[rustc_legacy_const_generics(3)]
18308pub unsafe fn _mm512_mask_ror_epi64<const IMM8: i32>(
18309    src: __m512i,
18310    k: __mmask8,
18311    a: __m512i,
18312) -> __m512i {
18313    static_assert_uimm_bits!(IMM8, 8);
18314    let a = a.as_i64x8();
18315    let r = vprorq(a, IMM8);
18316    transmute(simd_select_bitmask(k, r, src.as_i64x8()))
18317}
18318
18319/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18320///
18321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
18322#[inline]
18323#[target_feature(enable = "avx512f")]
18324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18325#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18326#[rustc_legacy_const_generics(2)]
18327pub unsafe fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
18328    static_assert_uimm_bits!(IMM8, 8);
18329    let a = a.as_i64x8();
18330    let r = vprorq(a, IMM8);
18331    transmute(simd_select_bitmask(k, r, i64x8::ZERO))
18332}
18333
18334/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18335///
18336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
18337#[inline]
18338#[target_feature(enable = "avx512f,avx512vl")]
18339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18340#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18341#[rustc_legacy_const_generics(1)]
18342pub unsafe fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
18343    static_assert_uimm_bits!(IMM8, 8);
18344    let a = a.as_i64x4();
18345    let r = vprorq256(a, IMM8);
18346    transmute(r)
18347}
18348
18349/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18350///
18351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
18352#[inline]
18353#[target_feature(enable = "avx512f,avx512vl")]
18354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18355#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18356#[rustc_legacy_const_generics(3)]
18357pub unsafe fn _mm256_mask_ror_epi64<const IMM8: i32>(
18358    src: __m256i,
18359    k: __mmask8,
18360    a: __m256i,
18361) -> __m256i {
18362    static_assert_uimm_bits!(IMM8, 8);
18363    let a = a.as_i64x4();
18364    let r = vprorq256(a, IMM8);
18365    transmute(simd_select_bitmask(k, r, src.as_i64x4()))
18366}
18367
18368/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18369///
18370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
18371#[inline]
18372#[target_feature(enable = "avx512f,avx512vl")]
18373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18374#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18375#[rustc_legacy_const_generics(2)]
18376pub unsafe fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
18377    static_assert_uimm_bits!(IMM8, 8);
18378    let a = a.as_i64x4();
18379    let r = vprorq256(a, IMM8);
18380    transmute(simd_select_bitmask(k, r, i64x4::ZERO))
18381}
18382
18383/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
18384///
18385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
18386#[inline]
18387#[target_feature(enable = "avx512f,avx512vl")]
18388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18389#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18390#[rustc_legacy_const_generics(1)]
18391pub unsafe fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
18392    static_assert_uimm_bits!(IMM8, 8);
18393    let a = a.as_i64x2();
18394    let r = vprorq128(a, IMM8);
18395    transmute(r)
18396}
18397
18398/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18399///
18400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
18401#[inline]
18402#[target_feature(enable = "avx512f,avx512vl")]
18403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18404#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18405#[rustc_legacy_const_generics(3)]
18406pub unsafe fn _mm_mask_ror_epi64<const IMM8: i32>(
18407    src: __m128i,
18408    k: __mmask8,
18409    a: __m128i,
18410) -> __m128i {
18411    static_assert_uimm_bits!(IMM8, 8);
18412    let a = a.as_i64x2();
18413    let r = vprorq128(a, IMM8);
18414    transmute(simd_select_bitmask(k, r, src.as_i64x2()))
18415}
18416
18417/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18418///
18419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
18420#[inline]
18421#[target_feature(enable = "avx512f,avx512vl")]
18422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18423#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
18424#[rustc_legacy_const_generics(2)]
18425pub unsafe fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
18426    static_assert_uimm_bits!(IMM8, 8);
18427    let a = a.as_i64x2();
18428    let r = vprorq128(a, IMM8);
18429    transmute(simd_select_bitmask(k, r, i64x2::ZERO))
18430}
18431
18432/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
18433///
18434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
18435#[inline]
18436#[target_feature(enable = "avx512f")]
18437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18438#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18439#[rustc_legacy_const_generics(1)]
18440pub unsafe fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
18441    static_assert_uimm_bits!(IMM8, 8);
18442    if IMM8 >= 32 {
18443        _mm512_setzero_si512()
18444    } else {
18445        transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
18446    }
18447}
18448
18449/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18450///
18451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
18452#[inline]
18453#[target_feature(enable = "avx512f")]
18454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18455#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18456#[rustc_legacy_const_generics(3)]
18457pub unsafe fn _mm512_mask_slli_epi32<const IMM8: u32>(
18458    src: __m512i,
18459    k: __mmask16,
18460    a: __m512i,
18461) -> __m512i {
18462    static_assert_uimm_bits!(IMM8, 8);
18463    let shf = if IMM8 >= 32 {
18464        u32x16::ZERO
18465    } else {
18466        simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
18467    };
18468    transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
18469}
18470
18471/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18472///
18473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
18474#[inline]
18475#[target_feature(enable = "avx512f")]
18476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18477#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18478#[rustc_legacy_const_generics(2)]
18479pub unsafe fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
18480    static_assert_uimm_bits!(IMM8, 8);
18481    if IMM8 >= 32 {
18482        _mm512_setzero_si512()
18483    } else {
18484        let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
18485        transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
18486    }
18487}
18488
18489/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18490///
18491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
18492#[inline]
18493#[target_feature(enable = "avx512f,avx512vl")]
18494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18495#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18496#[rustc_legacy_const_generics(3)]
18497pub unsafe fn _mm256_mask_slli_epi32<const IMM8: u32>(
18498    src: __m256i,
18499    k: __mmask8,
18500    a: __m256i,
18501) -> __m256i {
18502    static_assert_uimm_bits!(IMM8, 8);
18503    let r = if IMM8 >= 32 {
18504        u32x8::ZERO
18505    } else {
18506        simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
18507    };
18508    transmute(simd_select_bitmask(k, r, src.as_u32x8()))
18509}
18510
18511/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18512///
18513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
18514#[inline]
18515#[target_feature(enable = "avx512f,avx512vl")]
18516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18517#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18518#[rustc_legacy_const_generics(2)]
18519pub unsafe fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18520    static_assert_uimm_bits!(IMM8, 8);
18521    if IMM8 >= 32 {
18522        _mm256_setzero_si256()
18523    } else {
18524        let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
18525        transmute(simd_select_bitmask(k, r, u32x8::ZERO))
18526    }
18527}
18528
18529/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18530///
18531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
18532#[inline]
18533#[target_feature(enable = "avx512f,avx512vl")]
18534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18535#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18536#[rustc_legacy_const_generics(3)]
18537pub unsafe fn _mm_mask_slli_epi32<const IMM8: u32>(
18538    src: __m128i,
18539    k: __mmask8,
18540    a: __m128i,
18541) -> __m128i {
18542    static_assert_uimm_bits!(IMM8, 8);
18543    let r = if IMM8 >= 32 {
18544        u32x4::ZERO
18545    } else {
18546        simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
18547    };
18548    transmute(simd_select_bitmask(k, r, src.as_u32x4()))
18549}
18550
18551/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18552///
18553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
18554#[inline]
18555#[target_feature(enable = "avx512f,avx512vl")]
18556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18557#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
18558#[rustc_legacy_const_generics(2)]
18559pub unsafe fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18560    static_assert_uimm_bits!(IMM8, 8);
18561    if IMM8 >= 32 {
18562        _mm_setzero_si128()
18563    } else {
18564        let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
18565        transmute(simd_select_bitmask(k, r, u32x4::ZERO))
18566    }
18567}
18568
18569/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
18570///
18571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
18572#[inline]
18573#[target_feature(enable = "avx512f")]
18574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18575#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18576#[rustc_legacy_const_generics(1)]
18577pub unsafe fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
18578    static_assert_uimm_bits!(IMM8, 8);
18579    if IMM8 >= 32 {
18580        _mm512_setzero_si512()
18581    } else {
18582        transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
18583    }
18584}
18585
18586/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18587///
18588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
18589#[inline]
18590#[target_feature(enable = "avx512f")]
18591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18592#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18593#[rustc_legacy_const_generics(3)]
18594pub unsafe fn _mm512_mask_srli_epi32<const IMM8: u32>(
18595    src: __m512i,
18596    k: __mmask16,
18597    a: __m512i,
18598) -> __m512i {
18599    static_assert_uimm_bits!(IMM8, 8);
18600    let shf = if IMM8 >= 32 {
18601        u32x16::ZERO
18602    } else {
18603        simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
18604    };
18605    transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
18606}
18607
18608/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18609///
18610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
18611#[inline]
18612#[target_feature(enable = "avx512f")]
18613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18614#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18615#[rustc_legacy_const_generics(2)]
18616pub unsafe fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
18617    static_assert_uimm_bits!(IMM8, 8);
18618    if IMM8 >= 32 {
18619        _mm512_setzero_si512()
18620    } else {
18621        let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
18622        transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
18623    }
18624}
18625
18626/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18627///
18628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
18629#[inline]
18630#[target_feature(enable = "avx512f,avx512vl")]
18631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18632#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18633#[rustc_legacy_const_generics(3)]
18634pub unsafe fn _mm256_mask_srli_epi32<const IMM8: u32>(
18635    src: __m256i,
18636    k: __mmask8,
18637    a: __m256i,
18638) -> __m256i {
18639    static_assert_uimm_bits!(IMM8, 8);
18640    let r = if IMM8 >= 32 {
18641        u32x8::ZERO
18642    } else {
18643        simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
18644    };
18645    transmute(simd_select_bitmask(k, r, src.as_u32x8()))
18646}
18647
18648/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18649///
18650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
18651#[inline]
18652#[target_feature(enable = "avx512f,avx512vl")]
18653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18654#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18655#[rustc_legacy_const_generics(2)]
18656pub unsafe fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18657    static_assert_uimm_bits!(IMM8, 8);
18658    if IMM8 >= 32 {
18659        _mm256_setzero_si256()
18660    } else {
18661        let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
18662        transmute(simd_select_bitmask(k, r, u32x8::ZERO))
18663    }
18664}
18665
18666/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18667///
18668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
18669#[inline]
18670#[target_feature(enable = "avx512f,avx512vl")]
18671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18672#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18673#[rustc_legacy_const_generics(3)]
18674pub unsafe fn _mm_mask_srli_epi32<const IMM8: u32>(
18675    src: __m128i,
18676    k: __mmask8,
18677    a: __m128i,
18678) -> __m128i {
18679    static_assert_uimm_bits!(IMM8, 8);
18680    let r = if IMM8 >= 32 {
18681        u32x4::ZERO
18682    } else {
18683        simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
18684    };
18685    transmute(simd_select_bitmask(k, r, src.as_u32x4()))
18686}
18687
18688/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18689///
18690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
18691#[inline]
18692#[target_feature(enable = "avx512f,avx512vl")]
18693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18694#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
18695#[rustc_legacy_const_generics(2)]
18696pub unsafe fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18697    static_assert_uimm_bits!(IMM8, 8);
18698    if IMM8 >= 32 {
18699        _mm_setzero_si128()
18700    } else {
18701        let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
18702        transmute(simd_select_bitmask(k, r, u32x4::ZERO))
18703    }
18704}
18705
18706/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
18707///
18708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
18709#[inline]
18710#[target_feature(enable = "avx512f")]
18711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18712#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18713#[rustc_legacy_const_generics(1)]
18714pub unsafe fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
18715    static_assert_uimm_bits!(IMM8, 8);
18716    if IMM8 >= 64 {
18717        _mm512_setzero_si512()
18718    } else {
18719        transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
18720    }
18721}
18722
18723/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18724///
18725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
18726#[inline]
18727#[target_feature(enable = "avx512f")]
18728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18729#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18730#[rustc_legacy_const_generics(3)]
18731pub unsafe fn _mm512_mask_slli_epi64<const IMM8: u32>(
18732    src: __m512i,
18733    k: __mmask8,
18734    a: __m512i,
18735) -> __m512i {
18736    static_assert_uimm_bits!(IMM8, 8);
18737    let shf = if IMM8 >= 64 {
18738        u64x8::ZERO
18739    } else {
18740        simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
18741    };
18742    transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
18743}
18744
18745/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18746///
18747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
18748#[inline]
18749#[target_feature(enable = "avx512f")]
18750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18751#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18752#[rustc_legacy_const_generics(2)]
18753pub unsafe fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
18754    static_assert_uimm_bits!(IMM8, 8);
18755    if IMM8 >= 64 {
18756        _mm512_setzero_si512()
18757    } else {
18758        let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
18759        transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
18760    }
18761}
18762
18763/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18764///
18765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
18766#[inline]
18767#[target_feature(enable = "avx512f,avx512vl")]
18768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18769#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18770#[rustc_legacy_const_generics(3)]
18771pub unsafe fn _mm256_mask_slli_epi64<const IMM8: u32>(
18772    src: __m256i,
18773    k: __mmask8,
18774    a: __m256i,
18775) -> __m256i {
18776    static_assert_uimm_bits!(IMM8, 8);
18777    let r = if IMM8 >= 64 {
18778        u64x4::ZERO
18779    } else {
18780        simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
18781    };
18782    transmute(simd_select_bitmask(k, r, src.as_u64x4()))
18783}
18784
18785/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18786///
18787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
18788#[inline]
18789#[target_feature(enable = "avx512f,avx512vl")]
18790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18791#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18792#[rustc_legacy_const_generics(2)]
18793pub unsafe fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18794    static_assert_uimm_bits!(IMM8, 8);
18795    if IMM8 >= 64 {
18796        _mm256_setzero_si256()
18797    } else {
18798        let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
18799        transmute(simd_select_bitmask(k, r, u64x4::ZERO))
18800    }
18801}
18802
18803/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18804///
18805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
18806#[inline]
18807#[target_feature(enable = "avx512f,avx512vl")]
18808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18809#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18810#[rustc_legacy_const_generics(3)]
18811pub unsafe fn _mm_mask_slli_epi64<const IMM8: u32>(
18812    src: __m128i,
18813    k: __mmask8,
18814    a: __m128i,
18815) -> __m128i {
18816    static_assert_uimm_bits!(IMM8, 8);
18817    let r = if IMM8 >= 64 {
18818        u64x2::ZERO
18819    } else {
18820        simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
18821    };
18822    transmute(simd_select_bitmask(k, r, src.as_u64x2()))
18823}
18824
18825/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18826///
18827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
18828#[inline]
18829#[target_feature(enable = "avx512f,avx512vl")]
18830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18831#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
18832#[rustc_legacy_const_generics(2)]
18833pub unsafe fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18834    static_assert_uimm_bits!(IMM8, 8);
18835    if IMM8 >= 64 {
18836        _mm_setzero_si128()
18837    } else {
18838        let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
18839        transmute(simd_select_bitmask(k, r, u64x2::ZERO))
18840    }
18841}
18842
18843/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
18844///
18845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
18846#[inline]
18847#[target_feature(enable = "avx512f")]
18848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18849#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18850#[rustc_legacy_const_generics(1)]
18851pub unsafe fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
18852    static_assert_uimm_bits!(IMM8, 8);
18853    if IMM8 >= 64 {
18854        _mm512_setzero_si512()
18855    } else {
18856        transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
18857    }
18858}
18859
18860/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18861///
18862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
18863#[inline]
18864#[target_feature(enable = "avx512f")]
18865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18866#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18867#[rustc_legacy_const_generics(3)]
18868pub unsafe fn _mm512_mask_srli_epi64<const IMM8: u32>(
18869    src: __m512i,
18870    k: __mmask8,
18871    a: __m512i,
18872) -> __m512i {
18873    static_assert_uimm_bits!(IMM8, 8);
18874    let shf = if IMM8 >= 64 {
18875        u64x8::ZERO
18876    } else {
18877        simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
18878    };
18879    transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
18880}
18881
18882/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18883///
18884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
18885#[inline]
18886#[target_feature(enable = "avx512f")]
18887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18888#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18889#[rustc_legacy_const_generics(2)]
18890pub unsafe fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
18891    static_assert_uimm_bits!(IMM8, 8);
18892    if IMM8 >= 64 {
18893        _mm512_setzero_si512()
18894    } else {
18895        let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
18896        transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
18897    }
18898}
18899
18900/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18901///
18902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
18903#[inline]
18904#[target_feature(enable = "avx512f,avx512vl")]
18905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18906#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18907#[rustc_legacy_const_generics(3)]
18908pub unsafe fn _mm256_mask_srli_epi64<const IMM8: u32>(
18909    src: __m256i,
18910    k: __mmask8,
18911    a: __m256i,
18912) -> __m256i {
18913    static_assert_uimm_bits!(IMM8, 8);
18914    let r = if IMM8 >= 64 {
18915        u64x4::ZERO
18916    } else {
18917        simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
18918    };
18919    transmute(simd_select_bitmask(k, r, src.as_u64x4()))
18920}
18921
18922/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18923///
18924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
18925#[inline]
18926#[target_feature(enable = "avx512f,avx512vl")]
18927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18928#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18929#[rustc_legacy_const_generics(2)]
18930pub unsafe fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
18931    static_assert_uimm_bits!(IMM8, 8);
18932    if IMM8 >= 64 {
18933        _mm256_setzero_si256()
18934    } else {
18935        let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
18936        transmute(simd_select_bitmask(k, r, u64x4::ZERO))
18937    }
18938}
18939
18940/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18941///
18942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
18943#[inline]
18944#[target_feature(enable = "avx512f,avx512vl")]
18945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18946#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18947#[rustc_legacy_const_generics(3)]
18948pub unsafe fn _mm_mask_srli_epi64<const IMM8: u32>(
18949    src: __m128i,
18950    k: __mmask8,
18951    a: __m128i,
18952) -> __m128i {
18953    static_assert_uimm_bits!(IMM8, 8);
18954    let r = if IMM8 >= 64 {
18955        u64x2::ZERO
18956    } else {
18957        simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
18958    };
18959    transmute(simd_select_bitmask(k, r, src.as_u64x2()))
18960}
18961
18962/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18963///
18964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
18965#[inline]
18966#[target_feature(enable = "avx512f,avx512vl")]
18967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18968#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
18969#[rustc_legacy_const_generics(2)]
18970pub unsafe fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
18971    static_assert_uimm_bits!(IMM8, 8);
18972    if IMM8 >= 64 {
18973        _mm_setzero_si128()
18974    } else {
18975        let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
18976        transmute(simd_select_bitmask(k, r, u64x2::ZERO))
18977    }
18978}
18979
18980/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
18981///
18982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
18983#[inline]
18984#[target_feature(enable = "avx512f")]
18985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18986#[cfg_attr(test, assert_instr(vpslld))]
18987pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
18988    transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
18989}
18990
18991/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18992///
18993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
18994#[inline]
18995#[target_feature(enable = "avx512f")]
18996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18997#[cfg_attr(test, assert_instr(vpslld))]
18998pub unsafe fn _mm512_mask_sll_epi32(
18999    src: __m512i,
19000    k: __mmask16,
19001    a: __m512i,
19002    count: __m128i,
19003) -> __m512i {
19004    let shf = _mm512_sll_epi32(a, count).as_i32x16();
19005    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
19006}
19007
19008/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19009///
19010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
19011#[inline]
19012#[target_feature(enable = "avx512f")]
19013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19014#[cfg_attr(test, assert_instr(vpslld))]
19015pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
19016    let shf = _mm512_sll_epi32(a, count).as_i32x16();
19017    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
19018}
19019
19020/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19021///
19022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
19023#[inline]
19024#[target_feature(enable = "avx512f,avx512vl")]
19025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19026#[cfg_attr(test, assert_instr(vpslld))]
19027pub unsafe fn _mm256_mask_sll_epi32(
19028    src: __m256i,
19029    k: __mmask8,
19030    a: __m256i,
19031    count: __m128i,
19032) -> __m256i {
19033    let shf = _mm256_sll_epi32(a, count).as_i32x8();
19034    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
19035}
19036
19037/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19038///
19039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
19040#[inline]
19041#[target_feature(enable = "avx512f,avx512vl")]
19042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19043#[cfg_attr(test, assert_instr(vpslld))]
19044pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19045    let shf = _mm256_sll_epi32(a, count).as_i32x8();
19046    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
19047}
19048
19049/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19050///
19051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
19052#[inline]
19053#[target_feature(enable = "avx512f,avx512vl")]
19054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19055#[cfg_attr(test, assert_instr(vpslld))]
19056pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19057    let shf = _mm_sll_epi32(a, count).as_i32x4();
19058    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
19059}
19060
19061/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19062///
19063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
19064#[inline]
19065#[target_feature(enable = "avx512f,avx512vl")]
19066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19067#[cfg_attr(test, assert_instr(vpslld))]
19068pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19069    let shf = _mm_sll_epi32(a, count).as_i32x4();
19070    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
19071}
19072
19073/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
19074///
19075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
19076#[inline]
19077#[target_feature(enable = "avx512f")]
19078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19079#[cfg_attr(test, assert_instr(vpsrld))]
19080pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
19081    transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
19082}
19083
19084/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19085///
19086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
19087#[inline]
19088#[target_feature(enable = "avx512f")]
19089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19090#[cfg_attr(test, assert_instr(vpsrld))]
19091pub unsafe fn _mm512_mask_srl_epi32(
19092    src: __m512i,
19093    k: __mmask16,
19094    a: __m512i,
19095    count: __m128i,
19096) -> __m512i {
19097    let shf = _mm512_srl_epi32(a, count).as_i32x16();
19098    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
19099}
19100
19101/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19102///
19103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
19104#[inline]
19105#[target_feature(enable = "avx512f")]
19106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19107#[cfg_attr(test, assert_instr(vpsrld))]
19108pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
19109    let shf = _mm512_srl_epi32(a, count).as_i32x16();
19110    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
19111}
19112
19113/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19114///
19115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
19116#[inline]
19117#[target_feature(enable = "avx512f,avx512vl")]
19118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19119#[cfg_attr(test, assert_instr(vpsrld))]
19120pub unsafe fn _mm256_mask_srl_epi32(
19121    src: __m256i,
19122    k: __mmask8,
19123    a: __m256i,
19124    count: __m128i,
19125) -> __m256i {
19126    let shf = _mm256_srl_epi32(a, count).as_i32x8();
19127    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
19128}
19129
19130/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19131///
19132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
19133#[inline]
19134#[target_feature(enable = "avx512f,avx512vl")]
19135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19136#[cfg_attr(test, assert_instr(vpsrld))]
19137pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19138    let shf = _mm256_srl_epi32(a, count).as_i32x8();
19139    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
19140}
19141
19142/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19143///
19144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
19145#[inline]
19146#[target_feature(enable = "avx512f,avx512vl")]
19147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19148#[cfg_attr(test, assert_instr(vpsrld))]
19149pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19150    let shf = _mm_srl_epi32(a, count).as_i32x4();
19151    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
19152}
19153
19154/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19155///
19156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
19157#[inline]
19158#[target_feature(enable = "avx512f,avx512vl")]
19159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19160#[cfg_attr(test, assert_instr(vpsrld))]
19161pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19162    let shf = _mm_srl_epi32(a, count).as_i32x4();
19163    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
19164}
19165
19166/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
19167///
19168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
19169#[inline]
19170#[target_feature(enable = "avx512f")]
19171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19172#[cfg_attr(test, assert_instr(vpsllq))]
19173pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
19174    transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
19175}
19176
19177/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19178///
19179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
19180#[inline]
19181#[target_feature(enable = "avx512f")]
19182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19183#[cfg_attr(test, assert_instr(vpsllq))]
19184pub unsafe fn _mm512_mask_sll_epi64(
19185    src: __m512i,
19186    k: __mmask8,
19187    a: __m512i,
19188    count: __m128i,
19189) -> __m512i {
19190    let shf = _mm512_sll_epi64(a, count).as_i64x8();
19191    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
19192}
19193
19194/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19195///
19196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
19197#[inline]
19198#[target_feature(enable = "avx512f")]
19199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19200#[cfg_attr(test, assert_instr(vpsllq))]
19201pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19202    let shf = _mm512_sll_epi64(a, count).as_i64x8();
19203    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
19204}
19205
19206/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19207///
19208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
19209#[inline]
19210#[target_feature(enable = "avx512f,avx512vl")]
19211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19212#[cfg_attr(test, assert_instr(vpsllq))]
19213pub unsafe fn _mm256_mask_sll_epi64(
19214    src: __m256i,
19215    k: __mmask8,
19216    a: __m256i,
19217    count: __m128i,
19218) -> __m256i {
19219    let shf = _mm256_sll_epi64(a, count).as_i64x4();
19220    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
19221}
19222
19223/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19224///
19225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
19226#[inline]
19227#[target_feature(enable = "avx512f,avx512vl")]
19228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19229#[cfg_attr(test, assert_instr(vpsllq))]
19230pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19231    let shf = _mm256_sll_epi64(a, count).as_i64x4();
19232    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
19233}
19234
19235/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19236///
19237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
19238#[inline]
19239#[target_feature(enable = "avx512f,avx512vl")]
19240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19241#[cfg_attr(test, assert_instr(vpsllq))]
19242pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19243    let shf = _mm_sll_epi64(a, count).as_i64x2();
19244    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
19245}
19246
19247/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19248///
19249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
19250#[inline]
19251#[target_feature(enable = "avx512f,avx512vl")]
19252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19253#[cfg_attr(test, assert_instr(vpsllq))]
19254pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19255    let shf = _mm_sll_epi64(a, count).as_i64x2();
19256    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
19257}
19258
19259/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
19260///
19261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
19262#[inline]
19263#[target_feature(enable = "avx512f")]
19264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19265#[cfg_attr(test, assert_instr(vpsrlq))]
19266pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
19267    transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
19268}
19269
19270/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19271///
19272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
19273#[inline]
19274#[target_feature(enable = "avx512f")]
19275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19276#[cfg_attr(test, assert_instr(vpsrlq))]
19277pub unsafe fn _mm512_mask_srl_epi64(
19278    src: __m512i,
19279    k: __mmask8,
19280    a: __m512i,
19281    count: __m128i,
19282) -> __m512i {
19283    let shf = _mm512_srl_epi64(a, count).as_i64x8();
19284    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
19285}
19286
19287/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19288///
19289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
19290#[inline]
19291#[target_feature(enable = "avx512f")]
19292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19293#[cfg_attr(test, assert_instr(vpsrlq))]
19294pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19295    let shf = _mm512_srl_epi64(a, count).as_i64x8();
19296    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
19297}
19298
19299/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19300///
19301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
19302#[inline]
19303#[target_feature(enable = "avx512f,avx512vl")]
19304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19305#[cfg_attr(test, assert_instr(vpsrlq))]
19306pub unsafe fn _mm256_mask_srl_epi64(
19307    src: __m256i,
19308    k: __mmask8,
19309    a: __m256i,
19310    count: __m128i,
19311) -> __m256i {
19312    let shf = _mm256_srl_epi64(a, count).as_i64x4();
19313    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
19314}
19315
19316/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19317///
19318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
19319#[inline]
19320#[target_feature(enable = "avx512f,avx512vl")]
19321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19322#[cfg_attr(test, assert_instr(vpsrlq))]
19323pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19324    let shf = _mm256_srl_epi64(a, count).as_i64x4();
19325    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
19326}
19327
19328/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19329///
19330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
19331#[inline]
19332#[target_feature(enable = "avx512f,avx512vl")]
19333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19334#[cfg_attr(test, assert_instr(vpsrlq))]
19335pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19336    let shf = _mm_srl_epi64(a, count).as_i64x2();
19337    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
19338}
19339
19340/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19341///
19342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
19343#[inline]
19344#[target_feature(enable = "avx512f,avx512vl")]
19345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19346#[cfg_attr(test, assert_instr(vpsrlq))]
19347pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19348    let shf = _mm_srl_epi64(a, count).as_i64x2();
19349    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
19350}
19351
19352/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19353///
19354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
19355#[inline]
19356#[target_feature(enable = "avx512f")]
19357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19358#[cfg_attr(test, assert_instr(vpsrad))]
19359pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
19360    transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
19361}
19362
19363/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19364///
19365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
19366#[inline]
19367#[target_feature(enable = "avx512f")]
19368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19369#[cfg_attr(test, assert_instr(vpsrad))]
19370pub unsafe fn _mm512_mask_sra_epi32(
19371    src: __m512i,
19372    k: __mmask16,
19373    a: __m512i,
19374    count: __m128i,
19375) -> __m512i {
19376    let shf = _mm512_sra_epi32(a, count).as_i32x16();
19377    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
19378}
19379
19380/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19381///
19382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
19383#[inline]
19384#[target_feature(enable = "avx512f")]
19385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19386#[cfg_attr(test, assert_instr(vpsrad))]
19387pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
19388    let shf = _mm512_sra_epi32(a, count).as_i32x16();
19389    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
19390}
19391
19392/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19393///
19394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
19395#[inline]
19396#[target_feature(enable = "avx512f,avx512vl")]
19397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19398#[cfg_attr(test, assert_instr(vpsrad))]
19399pub unsafe fn _mm256_mask_sra_epi32(
19400    src: __m256i,
19401    k: __mmask8,
19402    a: __m256i,
19403    count: __m128i,
19404) -> __m256i {
19405    let shf = _mm256_sra_epi32(a, count).as_i32x8();
19406    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
19407}
19408
19409/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19410///
19411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
19412#[inline]
19413#[target_feature(enable = "avx512f,avx512vl")]
19414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19415#[cfg_attr(test, assert_instr(vpsrad))]
19416pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19417    let shf = _mm256_sra_epi32(a, count).as_i32x8();
19418    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
19419}
19420
19421/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19422///
19423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
19424#[inline]
19425#[target_feature(enable = "avx512f,avx512vl")]
19426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19427#[cfg_attr(test, assert_instr(vpsrad))]
19428pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19429    let shf = _mm_sra_epi32(a, count).as_i32x4();
19430    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
19431}
19432
19433/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19434///
19435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
19436#[inline]
19437#[target_feature(enable = "avx512f,avx512vl")]
19438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19439#[cfg_attr(test, assert_instr(vpsrad))]
19440pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19441    let shf = _mm_sra_epi32(a, count).as_i32x4();
19442    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
19443}
19444
19445/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19446///
19447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
19448#[inline]
19449#[target_feature(enable = "avx512f")]
19450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19451#[cfg_attr(test, assert_instr(vpsraq))]
19452pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
19453    transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
19454}
19455
19456/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19457///
19458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
19459#[inline]
19460#[target_feature(enable = "avx512f")]
19461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19462#[cfg_attr(test, assert_instr(vpsraq))]
19463pub unsafe fn _mm512_mask_sra_epi64(
19464    src: __m512i,
19465    k: __mmask8,
19466    a: __m512i,
19467    count: __m128i,
19468) -> __m512i {
19469    let shf = _mm512_sra_epi64(a, count).as_i64x8();
19470    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
19471}
19472
19473/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19474///
19475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
19476#[inline]
19477#[target_feature(enable = "avx512f")]
19478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479#[cfg_attr(test, assert_instr(vpsraq))]
19480pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
19481    let shf = _mm512_sra_epi64(a, count).as_i64x8();
19482    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
19483}
19484
19485/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19486///
19487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
19488#[inline]
19489#[target_feature(enable = "avx512f,avx512vl")]
19490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19491#[cfg_attr(test, assert_instr(vpsraq))]
19492pub unsafe fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
19493    transmute(vpsraq256(a.as_i64x4(), count.as_i64x2()))
19494}
19495
19496/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19497///
19498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
19499#[inline]
19500#[target_feature(enable = "avx512f,avx512vl")]
19501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19502#[cfg_attr(test, assert_instr(vpsraq))]
19503pub unsafe fn _mm256_mask_sra_epi64(
19504    src: __m256i,
19505    k: __mmask8,
19506    a: __m256i,
19507    count: __m128i,
19508) -> __m256i {
19509    let shf = _mm256_sra_epi64(a, count).as_i64x4();
19510    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
19511}
19512
19513/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19514///
19515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
19516#[inline]
19517#[target_feature(enable = "avx512f,avx512vl")]
19518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19519#[cfg_attr(test, assert_instr(vpsraq))]
19520pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
19521    let shf = _mm256_sra_epi64(a, count).as_i64x4();
19522    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
19523}
19524
19525/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
19526///
19527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
19528#[inline]
19529#[target_feature(enable = "avx512f,avx512vl")]
19530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19531#[cfg_attr(test, assert_instr(vpsraq))]
19532pub unsafe fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
19533    transmute(vpsraq128(a.as_i64x2(), count.as_i64x2()))
19534}
19535
19536/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19537///
19538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
19539#[inline]
19540#[target_feature(enable = "avx512f,avx512vl")]
19541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19542#[cfg_attr(test, assert_instr(vpsraq))]
19543pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19544    let shf = _mm_sra_epi64(a, count).as_i64x2();
19545    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
19546}
19547
19548/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19549///
19550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
19551#[inline]
19552#[target_feature(enable = "avx512f,avx512vl")]
19553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19554#[cfg_attr(test, assert_instr(vpsraq))]
19555pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19556    let shf = _mm_sra_epi64(a, count).as_i64x2();
19557    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
19558}
19559
19560/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19561///
19562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
19563#[inline]
19564#[target_feature(enable = "avx512f")]
19565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19566#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19567#[rustc_legacy_const_generics(1)]
19568pub unsafe fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19569    static_assert_uimm_bits!(IMM8, 8);
19570    transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
19571}
19572
19573/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19574///
19575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
19576#[inline]
19577#[target_feature(enable = "avx512f")]
19578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19579#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19580#[rustc_legacy_const_generics(3)]
19581pub unsafe fn _mm512_mask_srai_epi32<const IMM8: u32>(
19582    src: __m512i,
19583    k: __mmask16,
19584    a: __m512i,
19585) -> __m512i {
19586    static_assert_uimm_bits!(IMM8, 8);
19587    let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
19588    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
19589}
19590
19591/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19592///
19593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
19594#[inline]
19595#[target_feature(enable = "avx512f")]
19596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19597#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19598#[rustc_legacy_const_generics(2)]
19599pub unsafe fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19600    static_assert_uimm_bits!(IMM8, 8);
19601    let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
19602    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
19603}
19604
19605/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19606///
19607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
19608#[inline]
19609#[target_feature(enable = "avx512f,avx512vl")]
19610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19611#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19612#[rustc_legacy_const_generics(3)]
19613pub unsafe fn _mm256_mask_srai_epi32<const IMM8: u32>(
19614    src: __m256i,
19615    k: __mmask8,
19616    a: __m256i,
19617) -> __m256i {
19618    let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
19619    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
19620}
19621
19622/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19623///
19624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
19625#[inline]
19626#[target_feature(enable = "avx512f,avx512vl")]
19627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19628#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19629#[rustc_legacy_const_generics(2)]
19630pub unsafe fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19631    let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
19632    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
19633}
19634
19635/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19636///
19637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
19638#[inline]
19639#[target_feature(enable = "avx512f,avx512vl")]
19640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19641#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19642#[rustc_legacy_const_generics(3)]
19643pub unsafe fn _mm_mask_srai_epi32<const IMM8: u32>(
19644    src: __m128i,
19645    k: __mmask8,
19646    a: __m128i,
19647) -> __m128i {
19648    let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
19649    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
19650}
19651
19652/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19653///
19654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
19655#[inline]
19656#[target_feature(enable = "avx512f,avx512vl")]
19657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19658#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
19659#[rustc_legacy_const_generics(2)]
19660pub unsafe fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19661    let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
19662    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
19663}
19664
19665/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19666///
19667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
19668#[inline]
19669#[target_feature(enable = "avx512f")]
19670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19671#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19672#[rustc_legacy_const_generics(1)]
19673pub unsafe fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19674    static_assert_uimm_bits!(IMM8, 8);
19675    transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
19676}
19677
19678/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19679///
19680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
19681#[inline]
19682#[target_feature(enable = "avx512f")]
19683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19684#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19685#[rustc_legacy_const_generics(3)]
19686pub unsafe fn _mm512_mask_srai_epi64<const IMM8: u32>(
19687    src: __m512i,
19688    k: __mmask8,
19689    a: __m512i,
19690) -> __m512i {
19691    static_assert_uimm_bits!(IMM8, 8);
19692    let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
19693    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
19694}
19695
19696/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19697///
19698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
19699#[inline]
19700#[target_feature(enable = "avx512f")]
19701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19702#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19703#[rustc_legacy_const_generics(2)]
19704pub unsafe fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19705    static_assert_uimm_bits!(IMM8, 8);
19706    let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
19707    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
19708}
19709
19710/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19711///
19712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
19713#[inline]
19714#[target_feature(enable = "avx512f,avx512vl")]
19715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19716#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19717#[rustc_legacy_const_generics(1)]
19718pub unsafe fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
19719    static_assert_uimm_bits!(IMM8, 8);
19720    transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
19721}
19722
19723/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19724///
19725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
19726#[inline]
19727#[target_feature(enable = "avx512f,avx512vl")]
19728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19729#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19730#[rustc_legacy_const_generics(3)]
19731pub unsafe fn _mm256_mask_srai_epi64<const IMM8: u32>(
19732    src: __m256i,
19733    k: __mmask8,
19734    a: __m256i,
19735) -> __m256i {
19736    static_assert_uimm_bits!(IMM8, 8);
19737    let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
19738    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
19739}
19740
19741/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19742///
19743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
19744#[inline]
19745#[target_feature(enable = "avx512f,avx512vl")]
19746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19747#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19748#[rustc_legacy_const_generics(2)]
19749pub unsafe fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19750    static_assert_uimm_bits!(IMM8, 8);
19751    let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
19752    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
19753}
19754
19755/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
19756///
19757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
19758#[inline]
19759#[target_feature(enable = "avx512f,avx512vl")]
19760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19761#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19762#[rustc_legacy_const_generics(1)]
19763pub unsafe fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
19764    static_assert_uimm_bits!(IMM8, 8);
19765    transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
19766}
19767
19768/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19769///
19770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
19771#[inline]
19772#[target_feature(enable = "avx512f,avx512vl")]
19773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19774#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19775#[rustc_legacy_const_generics(3)]
19776pub unsafe fn _mm_mask_srai_epi64<const IMM8: u32>(
19777    src: __m128i,
19778    k: __mmask8,
19779    a: __m128i,
19780) -> __m128i {
19781    static_assert_uimm_bits!(IMM8, 8);
19782    let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
19783    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
19784}
19785
19786/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19787///
19788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
19789#[inline]
19790#[target_feature(enable = "avx512f,avx512vl")]
19791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19792#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
19793#[rustc_legacy_const_generics(2)]
19794pub unsafe fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19795    static_assert_uimm_bits!(IMM8, 8);
19796    let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
19797    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
19798}
19799
19800/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19801///
19802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
19803#[inline]
19804#[target_feature(enable = "avx512f")]
19805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19806#[cfg_attr(test, assert_instr(vpsravd))]
19807pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
19808    transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
19809}
19810
19811/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19812///
19813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
19814#[inline]
19815#[target_feature(enable = "avx512f")]
19816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19817#[cfg_attr(test, assert_instr(vpsravd))]
19818pub unsafe fn _mm512_mask_srav_epi32(
19819    src: __m512i,
19820    k: __mmask16,
19821    a: __m512i,
19822    count: __m512i,
19823) -> __m512i {
19824    let shf = _mm512_srav_epi32(a, count).as_i32x16();
19825    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
19826}
19827
19828/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19829///
19830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
19831#[inline]
19832#[target_feature(enable = "avx512f")]
19833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19834#[cfg_attr(test, assert_instr(vpsravd))]
19835pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
19836    let shf = _mm512_srav_epi32(a, count).as_i32x16();
19837    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
19838}
19839
19840/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19841///
19842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
19843#[inline]
19844#[target_feature(enable = "avx512f,avx512vl")]
19845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19846#[cfg_attr(test, assert_instr(vpsravd))]
19847pub unsafe fn _mm256_mask_srav_epi32(
19848    src: __m256i,
19849    k: __mmask8,
19850    a: __m256i,
19851    count: __m256i,
19852) -> __m256i {
19853    let shf = _mm256_srav_epi32(a, count).as_i32x8();
19854    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
19855}
19856
19857/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19858///
19859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
19860#[inline]
19861#[target_feature(enable = "avx512f,avx512vl")]
19862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19863#[cfg_attr(test, assert_instr(vpsravd))]
19864pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
19865    let shf = _mm256_srav_epi32(a, count).as_i32x8();
19866    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
19867}
19868
19869/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19870///
19871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
19872#[inline]
19873#[target_feature(enable = "avx512f,avx512vl")]
19874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19875#[cfg_attr(test, assert_instr(vpsravd))]
19876pub unsafe fn _mm_mask_srav_epi32(
19877    src: __m128i,
19878    k: __mmask8,
19879    a: __m128i,
19880    count: __m128i,
19881) -> __m128i {
19882    let shf = _mm_srav_epi32(a, count).as_i32x4();
19883    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
19884}
19885
19886/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19887///
19888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
19889#[inline]
19890#[target_feature(enable = "avx512f,avx512vl")]
19891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19892#[cfg_attr(test, assert_instr(vpsravd))]
19893pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
19894    let shf = _mm_srav_epi32(a, count).as_i32x4();
19895    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
19896}
19897
19898/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19899///
19900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
19901#[inline]
19902#[target_feature(enable = "avx512f")]
19903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19904#[cfg_attr(test, assert_instr(vpsravq))]
19905pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
19906    transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
19907}
19908
19909/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19910///
19911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
19912#[inline]
19913#[target_feature(enable = "avx512f")]
19914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19915#[cfg_attr(test, assert_instr(vpsravq))]
19916pub unsafe fn _mm512_mask_srav_epi64(
19917    src: __m512i,
19918    k: __mmask8,
19919    a: __m512i,
19920    count: __m512i,
19921) -> __m512i {
19922    let shf = _mm512_srav_epi64(a, count).as_i64x8();
19923    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
19924}
19925
19926/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19927///
19928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
19929#[inline]
19930#[target_feature(enable = "avx512f")]
19931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19932#[cfg_attr(test, assert_instr(vpsravq))]
19933pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
19934    let shf = _mm512_srav_epi64(a, count).as_i64x8();
19935    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
19936}
19937
19938/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19939///
19940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
19941#[inline]
19942#[target_feature(enable = "avx512f,avx512vl")]
19943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19944#[cfg_attr(test, assert_instr(vpsravq))]
19945pub unsafe fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
19946    transmute(vpsravq256(a.as_i64x4(), count.as_i64x4()))
19947}
19948
19949/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19950///
19951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
19952#[inline]
19953#[target_feature(enable = "avx512f,avx512vl")]
19954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19955#[cfg_attr(test, assert_instr(vpsravq))]
19956pub unsafe fn _mm256_mask_srav_epi64(
19957    src: __m256i,
19958    k: __mmask8,
19959    a: __m256i,
19960    count: __m256i,
19961) -> __m256i {
19962    let shf = _mm256_srav_epi64(a, count).as_i64x4();
19963    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
19964}
19965
19966/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19967///
19968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
19969#[inline]
19970#[target_feature(enable = "avx512f,avx512vl")]
19971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19972#[cfg_attr(test, assert_instr(vpsravq))]
19973pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
19974    let shf = _mm256_srav_epi64(a, count).as_i64x4();
19975    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
19976}
19977
19978/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
19979///
19980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
19981#[inline]
19982#[target_feature(enable = "avx512f,avx512vl")]
19983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19984#[cfg_attr(test, assert_instr(vpsravq))]
19985pub unsafe fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
19986    transmute(vpsravq128(a.as_i64x2(), count.as_i64x2()))
19987}
19988
19989/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19990///
19991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
19992#[inline]
19993#[target_feature(enable = "avx512f,avx512vl")]
19994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19995#[cfg_attr(test, assert_instr(vpsravq))]
19996pub unsafe fn _mm_mask_srav_epi64(
19997    src: __m128i,
19998    k: __mmask8,
19999    a: __m128i,
20000    count: __m128i,
20001) -> __m128i {
20002    let shf = _mm_srav_epi64(a, count).as_i64x2();
20003    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20004}
20005
20006/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20007///
20008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
20009#[inline]
20010#[target_feature(enable = "avx512f,avx512vl")]
20011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20012#[cfg_attr(test, assert_instr(vpsravq))]
20013pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20014    let shf = _mm_srav_epi64(a, count).as_i64x2();
20015    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20016}
20017
20018/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20019///
20020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
20021#[inline]
20022#[target_feature(enable = "avx512f")]
20023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20024#[cfg_attr(test, assert_instr(vprolvd))]
20025pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
20026    transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
20027}
20028
20029/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20030///
20031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
20032#[inline]
20033#[target_feature(enable = "avx512f")]
20034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20035#[cfg_attr(test, assert_instr(vprolvd))]
20036pub unsafe fn _mm512_mask_rolv_epi32(
20037    src: __m512i,
20038    k: __mmask16,
20039    a: __m512i,
20040    b: __m512i,
20041) -> __m512i {
20042    let rol = _mm512_rolv_epi32(a, b).as_i32x16();
20043    transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
20044}
20045
20046/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20047///
20048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
20049#[inline]
20050#[target_feature(enable = "avx512f")]
20051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20052#[cfg_attr(test, assert_instr(vprolvd))]
20053pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
20054    let rol = _mm512_rolv_epi32(a, b).as_i32x16();
20055    transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
20056}
20057
20058/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20059///
20060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
20061#[inline]
20062#[target_feature(enable = "avx512f,avx512vl")]
20063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20064#[cfg_attr(test, assert_instr(vprolvd))]
20065pub unsafe fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
20066    transmute(vprolvd256(a.as_i32x8(), b.as_i32x8()))
20067}
20068
20069/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20070///
20071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
20072#[inline]
20073#[target_feature(enable = "avx512f,avx512vl")]
20074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20075#[cfg_attr(test, assert_instr(vprolvd))]
20076pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20077    let rol = _mm256_rolv_epi32(a, b).as_i32x8();
20078    transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
20079}
20080
20081/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20082///
20083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
20084#[inline]
20085#[target_feature(enable = "avx512f,avx512vl")]
20086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20087#[cfg_attr(test, assert_instr(vprolvd))]
20088pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20089    let rol = _mm256_rolv_epi32(a, b).as_i32x8();
20090    transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
20091}
20092
20093/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20094///
20095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
20096#[inline]
20097#[target_feature(enable = "avx512f,avx512vl")]
20098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20099#[cfg_attr(test, assert_instr(vprolvd))]
20100pub unsafe fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
20101    transmute(vprolvd128(a.as_i32x4(), b.as_i32x4()))
20102}
20103
20104/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20105///
20106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
20107#[inline]
20108#[target_feature(enable = "avx512f,avx512vl")]
20109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20110#[cfg_attr(test, assert_instr(vprolvd))]
20111pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20112    let rol = _mm_rolv_epi32(a, b).as_i32x4();
20113    transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
20114}
20115
20116/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20117///
20118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
20119#[inline]
20120#[target_feature(enable = "avx512f,avx512vl")]
20121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20122#[cfg_attr(test, assert_instr(vprolvd))]
20123pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20124    let rol = _mm_rolv_epi32(a, b).as_i32x4();
20125    transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
20126}
20127
20128/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20129///
20130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
20131#[inline]
20132#[target_feature(enable = "avx512f")]
20133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20134#[cfg_attr(test, assert_instr(vprorvd))]
20135pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
20136    transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
20137}
20138
20139/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20140///
20141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
20142#[inline]
20143#[target_feature(enable = "avx512f")]
20144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20145#[cfg_attr(test, assert_instr(vprorvd))]
20146pub unsafe fn _mm512_mask_rorv_epi32(
20147    src: __m512i,
20148    k: __mmask16,
20149    a: __m512i,
20150    b: __m512i,
20151) -> __m512i {
20152    let ror = _mm512_rorv_epi32(a, b).as_i32x16();
20153    transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
20154}
20155
20156/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20157///
20158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
20159#[inline]
20160#[target_feature(enable = "avx512f")]
20161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20162#[cfg_attr(test, assert_instr(vprorvd))]
20163pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
20164    let ror = _mm512_rorv_epi32(a, b).as_i32x16();
20165    transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
20166}
20167
20168/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20169///
20170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
20171#[inline]
20172#[target_feature(enable = "avx512f,avx512vl")]
20173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20174#[cfg_attr(test, assert_instr(vprorvd))]
20175pub unsafe fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
20176    transmute(vprorvd256(a.as_i32x8(), b.as_i32x8()))
20177}
20178
20179/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20180///
20181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
20182#[inline]
20183#[target_feature(enable = "avx512f,avx512vl")]
20184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20185#[cfg_attr(test, assert_instr(vprorvd))]
20186pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20187    let ror = _mm256_rorv_epi32(a, b).as_i32x8();
20188    transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
20189}
20190
20191/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20192///
20193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
20194#[inline]
20195#[target_feature(enable = "avx512f,avx512vl")]
20196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20197#[cfg_attr(test, assert_instr(vprorvd))]
20198pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20199    let ror = _mm256_rorv_epi32(a, b).as_i32x8();
20200    transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
20201}
20202
20203/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20204///
20205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
20206#[inline]
20207#[target_feature(enable = "avx512f,avx512vl")]
20208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20209#[cfg_attr(test, assert_instr(vprorvd))]
20210pub unsafe fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
20211    transmute(vprorvd128(a.as_i32x4(), b.as_i32x4()))
20212}
20213
20214/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20215///
20216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
20217#[inline]
20218#[target_feature(enable = "avx512f,avx512vl")]
20219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20220#[cfg_attr(test, assert_instr(vprorvd))]
20221pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20222    let ror = _mm_rorv_epi32(a, b).as_i32x4();
20223    transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
20224}
20225
20226/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20227///
20228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
20229#[inline]
20230#[target_feature(enable = "avx512f,avx512vl")]
20231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20232#[cfg_attr(test, assert_instr(vprorvd))]
20233pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20234    let ror = _mm_rorv_epi32(a, b).as_i32x4();
20235    transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
20236}
20237
20238/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20239///
20240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
20241#[inline]
20242#[target_feature(enable = "avx512f")]
20243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20244#[cfg_attr(test, assert_instr(vprolvq))]
20245pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
20246    transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
20247}
20248
20249/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20250///
20251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
20252#[inline]
20253#[target_feature(enable = "avx512f")]
20254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20255#[cfg_attr(test, assert_instr(vprolvq))]
20256pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20257    let rol = _mm512_rolv_epi64(a, b).as_i64x8();
20258    transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
20259}
20260
20261/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20262///
20263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
20264#[inline]
20265#[target_feature(enable = "avx512f")]
20266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20267#[cfg_attr(test, assert_instr(vprolvq))]
20268pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20269    let rol = _mm512_rolv_epi64(a, b).as_i64x8();
20270    transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
20271}
20272
20273/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20274///
20275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
20276#[inline]
20277#[target_feature(enable = "avx512f,avx512vl")]
20278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20279#[cfg_attr(test, assert_instr(vprolvq))]
20280pub unsafe fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
20281    transmute(vprolvq256(a.as_i64x4(), b.as_i64x4()))
20282}
20283
20284/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20285///
20286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
20287#[inline]
20288#[target_feature(enable = "avx512f,avx512vl")]
20289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20290#[cfg_attr(test, assert_instr(vprolvq))]
20291pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20292    let rol = _mm256_rolv_epi64(a, b).as_i64x4();
20293    transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
20294}
20295
20296/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20297///
20298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
20299#[inline]
20300#[target_feature(enable = "avx512f,avx512vl")]
20301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20302#[cfg_attr(test, assert_instr(vprolvq))]
20303pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20304    let rol = _mm256_rolv_epi64(a, b).as_i64x4();
20305    transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
20306}
20307
20308/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
20309///
20310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
20311#[inline]
20312#[target_feature(enable = "avx512f,avx512vl")]
20313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20314#[cfg_attr(test, assert_instr(vprolvq))]
20315pub unsafe fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
20316    transmute(vprolvq128(a.as_i64x2(), b.as_i64x2()))
20317}
20318
20319/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20320///
20321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
20322#[inline]
20323#[target_feature(enable = "avx512f,avx512vl")]
20324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20325#[cfg_attr(test, assert_instr(vprolvq))]
20326pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20327    let rol = _mm_rolv_epi64(a, b).as_i64x2();
20328    transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
20329}
20330
20331/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20332///
20333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
20334#[inline]
20335#[target_feature(enable = "avx512f,avx512vl")]
20336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20337#[cfg_attr(test, assert_instr(vprolvq))]
20338pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20339    let rol = _mm_rolv_epi64(a, b).as_i64x2();
20340    transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
20341}
20342
20343/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20344///
20345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
20346#[inline]
20347#[target_feature(enable = "avx512f")]
20348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20349#[cfg_attr(test, assert_instr(vprorvq))]
20350pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
20351    transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
20352}
20353
20354/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20355///
20356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
20357#[inline]
20358#[target_feature(enable = "avx512f")]
20359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20360#[cfg_attr(test, assert_instr(vprorvq))]
20361pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20362    let ror = _mm512_rorv_epi64(a, b).as_i64x8();
20363    transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
20364}
20365
20366/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20367///
20368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
20369#[inline]
20370#[target_feature(enable = "avx512f")]
20371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20372#[cfg_attr(test, assert_instr(vprorvq))]
20373pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
20374    let ror = _mm512_rorv_epi64(a, b).as_i64x8();
20375    transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
20376}
20377
20378/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20379///
20380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
20381#[inline]
20382#[target_feature(enable = "avx512f,avx512vl")]
20383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20384#[cfg_attr(test, assert_instr(vprorvq))]
20385pub unsafe fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
20386    transmute(vprorvq256(a.as_i64x4(), b.as_i64x4()))
20387}
20388
20389/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20390///
20391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
20392#[inline]
20393#[target_feature(enable = "avx512f,avx512vl")]
20394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20395#[cfg_attr(test, assert_instr(vprorvq))]
20396pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20397    let ror = _mm256_rorv_epi64(a, b).as_i64x4();
20398    transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
20399}
20400
20401/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20402///
20403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
20404#[inline]
20405#[target_feature(enable = "avx512f,avx512vl")]
20406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20407#[cfg_attr(test, assert_instr(vprorvq))]
20408pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
20409    let ror = _mm256_rorv_epi64(a, b).as_i64x4();
20410    transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
20411}
20412
20413/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
20414///
20415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
20416#[inline]
20417#[target_feature(enable = "avx512f,avx512vl")]
20418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20419#[cfg_attr(test, assert_instr(vprorvq))]
20420pub unsafe fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
20421    transmute(vprorvq128(a.as_i64x2(), b.as_i64x2()))
20422}
20423
20424/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20425///
20426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
20427#[inline]
20428#[target_feature(enable = "avx512f,avx512vl")]
20429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20430#[cfg_attr(test, assert_instr(vprorvq))]
20431pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20432    let ror = _mm_rorv_epi64(a, b).as_i64x2();
20433    transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
20434}
20435
20436/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20437///
20438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
20439#[inline]
20440#[target_feature(enable = "avx512f,avx512vl")]
20441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20442#[cfg_attr(test, assert_instr(vprorvq))]
20443pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
20444    let ror = _mm_rorv_epi64(a, b).as_i64x2();
20445    transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
20446}
20447
20448/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20449///
20450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
20451#[inline]
20452#[target_feature(enable = "avx512f")]
20453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20454#[cfg_attr(test, assert_instr(vpsllvd))]
20455pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
20456    transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
20457}
20458
20459/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20460///
20461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
20462#[inline]
20463#[target_feature(enable = "avx512f")]
20464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20465#[cfg_attr(test, assert_instr(vpsllvd))]
20466pub unsafe fn _mm512_mask_sllv_epi32(
20467    src: __m512i,
20468    k: __mmask16,
20469    a: __m512i,
20470    count: __m512i,
20471) -> __m512i {
20472    let shf = _mm512_sllv_epi32(a, count).as_i32x16();
20473    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20474}
20475
20476/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20477///
20478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
20479#[inline]
20480#[target_feature(enable = "avx512f")]
20481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20482#[cfg_attr(test, assert_instr(vpsllvd))]
20483pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20484    let shf = _mm512_sllv_epi32(a, count).as_i32x16();
20485    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20486}
20487
20488/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20489///
20490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
20491#[inline]
20492#[target_feature(enable = "avx512f,avx512vl")]
20493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20494#[cfg_attr(test, assert_instr(vpsllvd))]
20495pub unsafe fn _mm256_mask_sllv_epi32(
20496    src: __m256i,
20497    k: __mmask8,
20498    a: __m256i,
20499    count: __m256i,
20500) -> __m256i {
20501    let shf = _mm256_sllv_epi32(a, count).as_i32x8();
20502    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20503}
20504
20505/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20506///
20507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
20508#[inline]
20509#[target_feature(enable = "avx512f,avx512vl")]
20510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20511#[cfg_attr(test, assert_instr(vpsllvd))]
20512pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20513    let shf = _mm256_sllv_epi32(a, count).as_i32x8();
20514    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20515}
20516
20517/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20518///
20519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
20520#[inline]
20521#[target_feature(enable = "avx512f,avx512vl")]
20522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20523#[cfg_attr(test, assert_instr(vpsllvd))]
20524pub unsafe fn _mm_mask_sllv_epi32(
20525    src: __m128i,
20526    k: __mmask8,
20527    a: __m128i,
20528    count: __m128i,
20529) -> __m128i {
20530    let shf = _mm_sllv_epi32(a, count).as_i32x4();
20531    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20532}
20533
20534/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20535///
20536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
20537#[inline]
20538#[target_feature(enable = "avx512f,avx512vl")]
20539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20540#[cfg_attr(test, assert_instr(vpsllvd))]
20541pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20542    let shf = _mm_sllv_epi32(a, count).as_i32x4();
20543    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20544}
20545
20546/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20547///
20548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
20549#[inline]
20550#[target_feature(enable = "avx512f")]
20551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20552#[cfg_attr(test, assert_instr(vpsrlvd))]
20553pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
20554    transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
20555}
20556
20557/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20558///
20559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
20560#[inline]
20561#[target_feature(enable = "avx512f")]
20562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20563#[cfg_attr(test, assert_instr(vpsrlvd))]
20564pub unsafe fn _mm512_mask_srlv_epi32(
20565    src: __m512i,
20566    k: __mmask16,
20567    a: __m512i,
20568    count: __m512i,
20569) -> __m512i {
20570    let shf = _mm512_srlv_epi32(a, count).as_i32x16();
20571    transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20572}
20573
20574/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20575///
20576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
20577#[inline]
20578#[target_feature(enable = "avx512f")]
20579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20580#[cfg_attr(test, assert_instr(vpsrlvd))]
20581pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
20582    let shf = _mm512_srlv_epi32(a, count).as_i32x16();
20583    transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20584}
20585
20586/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20587///
20588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
20589#[inline]
20590#[target_feature(enable = "avx512f,avx512vl")]
20591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20592#[cfg_attr(test, assert_instr(vpsrlvd))]
20593pub unsafe fn _mm256_mask_srlv_epi32(
20594    src: __m256i,
20595    k: __mmask8,
20596    a: __m256i,
20597    count: __m256i,
20598) -> __m256i {
20599    let shf = _mm256_srlv_epi32(a, count).as_i32x8();
20600    transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20601}
20602
20603/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20604///
20605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
20606#[inline]
20607#[target_feature(enable = "avx512f,avx512vl")]
20608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20609#[cfg_attr(test, assert_instr(vpsrlvd))]
20610pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20611    let shf = _mm256_srlv_epi32(a, count).as_i32x8();
20612    transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20613}
20614
20615/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20616///
20617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
20618#[inline]
20619#[target_feature(enable = "avx512f,avx512vl")]
20620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20621#[cfg_attr(test, assert_instr(vpsrlvd))]
20622pub unsafe fn _mm_mask_srlv_epi32(
20623    src: __m128i,
20624    k: __mmask8,
20625    a: __m128i,
20626    count: __m128i,
20627) -> __m128i {
20628    let shf = _mm_srlv_epi32(a, count).as_i32x4();
20629    transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20630}
20631
20632/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20633///
20634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
20635#[inline]
20636#[target_feature(enable = "avx512f,avx512vl")]
20637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20638#[cfg_attr(test, assert_instr(vpsrlvd))]
20639pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20640    let shf = _mm_srlv_epi32(a, count).as_i32x4();
20641    transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20642}
20643
20644/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20645///
20646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
20647#[inline]
20648#[target_feature(enable = "avx512f")]
20649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20650#[cfg_attr(test, assert_instr(vpsllvq))]
20651pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
20652    transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
20653}
20654
20655/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20656///
20657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
20658#[inline]
20659#[target_feature(enable = "avx512f")]
20660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20661#[cfg_attr(test, assert_instr(vpsllvq))]
20662pub unsafe fn _mm512_mask_sllv_epi64(
20663    src: __m512i,
20664    k: __mmask8,
20665    a: __m512i,
20666    count: __m512i,
20667) -> __m512i {
20668    let shf = _mm512_sllv_epi64(a, count).as_i64x8();
20669    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20670}
20671
20672/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20673///
20674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
20675#[inline]
20676#[target_feature(enable = "avx512f")]
20677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20678#[cfg_attr(test, assert_instr(vpsllvq))]
20679pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
20680    let shf = _mm512_sllv_epi64(a, count).as_i64x8();
20681    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20682}
20683
20684/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20685///
20686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
20687#[inline]
20688#[target_feature(enable = "avx512f,avx512vl")]
20689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20690#[cfg_attr(test, assert_instr(vpsllvq))]
20691pub unsafe fn _mm256_mask_sllv_epi64(
20692    src: __m256i,
20693    k: __mmask8,
20694    a: __m256i,
20695    count: __m256i,
20696) -> __m256i {
20697    let shf = _mm256_sllv_epi64(a, count).as_i64x4();
20698    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20699}
20700
20701/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20702///
20703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
20704#[inline]
20705#[target_feature(enable = "avx512f,avx512vl")]
20706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20707#[cfg_attr(test, assert_instr(vpsllvq))]
20708pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20709    let shf = _mm256_sllv_epi64(a, count).as_i64x4();
20710    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20711}
20712
20713/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20714///
20715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
20716#[inline]
20717#[target_feature(enable = "avx512f,avx512vl")]
20718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20719#[cfg_attr(test, assert_instr(vpsllvq))]
20720pub unsafe fn _mm_mask_sllv_epi64(
20721    src: __m128i,
20722    k: __mmask8,
20723    a: __m128i,
20724    count: __m128i,
20725) -> __m128i {
20726    let shf = _mm_sllv_epi64(a, count).as_i64x2();
20727    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20728}
20729
20730/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20731///
20732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
20733#[inline]
20734#[target_feature(enable = "avx512f,avx512vl")]
20735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20736#[cfg_attr(test, assert_instr(vpsllvq))]
20737pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20738    let shf = _mm_sllv_epi64(a, count).as_i64x2();
20739    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20740}
20741
20742/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
20743///
20744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
20745#[inline]
20746#[target_feature(enable = "avx512f")]
20747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20748#[cfg_attr(test, assert_instr(vpsrlvq))]
20749pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
20750    transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
20751}
20752
20753/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20754///
20755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
20756#[inline]
20757#[target_feature(enable = "avx512f")]
20758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20759#[cfg_attr(test, assert_instr(vpsrlvq))]
20760pub unsafe fn _mm512_mask_srlv_epi64(
20761    src: __m512i,
20762    k: __mmask8,
20763    a: __m512i,
20764    count: __m512i,
20765) -> __m512i {
20766    let shf = _mm512_srlv_epi64(a, count).as_i64x8();
20767    transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20768}
20769
20770/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20771///
20772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
20773#[inline]
20774#[target_feature(enable = "avx512f")]
20775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20776#[cfg_attr(test, assert_instr(vpsrlvq))]
20777pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
20778    let shf = _mm512_srlv_epi64(a, count).as_i64x8();
20779    transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20780}
20781
20782/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20783///
20784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
20785#[inline]
20786#[target_feature(enable = "avx512f,avx512vl")]
20787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20788#[cfg_attr(test, assert_instr(vpsrlvq))]
20789pub unsafe fn _mm256_mask_srlv_epi64(
20790    src: __m256i,
20791    k: __mmask8,
20792    a: __m256i,
20793    count: __m256i,
20794) -> __m256i {
20795    let shf = _mm256_srlv_epi64(a, count).as_i64x4();
20796    transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20797}
20798
20799/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20800///
20801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
20802#[inline]
20803#[target_feature(enable = "avx512f,avx512vl")]
20804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20805#[cfg_attr(test, assert_instr(vpsrlvq))]
20806pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
20807    let shf = _mm256_srlv_epi64(a, count).as_i64x4();
20808    transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20809}
20810
20811/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20812///
20813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
20814#[inline]
20815#[target_feature(enable = "avx512f,avx512vl")]
20816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20817#[cfg_attr(test, assert_instr(vpsrlvq))]
20818pub unsafe fn _mm_mask_srlv_epi64(
20819    src: __m128i,
20820    k: __mmask8,
20821    a: __m128i,
20822    count: __m128i,
20823) -> __m128i {
20824    let shf = _mm_srlv_epi64(a, count).as_i64x2();
20825    transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20826}
20827
20828/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20829///
20830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
20831#[inline]
20832#[target_feature(enable = "avx512f,avx512vl")]
20833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20834#[cfg_attr(test, assert_instr(vpsrlvq))]
20835pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20836    let shf = _mm_srlv_epi64(a, count).as_i64x2();
20837    transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20838}
20839
20840/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
20841///
20842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
20843#[inline]
20844#[target_feature(enable = "avx512f")]
20845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20846#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20847#[rustc_legacy_const_generics(1)]
20848pub unsafe fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
20849    static_assert_uimm_bits!(MASK, 8);
20850    simd_shuffle!(
20851        a,
20852        a,
20853        [
20854            MASK as u32 & 0b11,
20855            (MASK as u32 >> 2) & 0b11,
20856            ((MASK as u32 >> 4) & 0b11),
20857            ((MASK as u32 >> 6) & 0b11),
20858            (MASK as u32 & 0b11) + 4,
20859            ((MASK as u32 >> 2) & 0b11) + 4,
20860            ((MASK as u32 >> 4) & 0b11) + 4,
20861            ((MASK as u32 >> 6) & 0b11) + 4,
20862            (MASK as u32 & 0b11) + 8,
20863            ((MASK as u32 >> 2) & 0b11) + 8,
20864            ((MASK as u32 >> 4) & 0b11) + 8,
20865            ((MASK as u32 >> 6) & 0b11) + 8,
20866            (MASK as u32 & 0b11) + 12,
20867            ((MASK as u32 >> 2) & 0b11) + 12,
20868            ((MASK as u32 >> 4) & 0b11) + 12,
20869            ((MASK as u32 >> 6) & 0b11) + 12,
20870        ],
20871    )
20872}
20873
20874/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20875///
20876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
20877#[inline]
20878#[target_feature(enable = "avx512f")]
20879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20880#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20881#[rustc_legacy_const_generics(3)]
20882pub unsafe fn _mm512_mask_permute_ps<const MASK: i32>(
20883    src: __m512,
20884    k: __mmask16,
20885    a: __m512,
20886) -> __m512 {
20887    static_assert_uimm_bits!(MASK, 8);
20888    let r = _mm512_permute_ps::<MASK>(a);
20889    transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
20890}
20891
20892/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20893///
20894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
20895#[inline]
20896#[target_feature(enable = "avx512f")]
20897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20898#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20899#[rustc_legacy_const_generics(2)]
20900pub unsafe fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
20901    static_assert_uimm_bits!(MASK, 8);
20902    let r = _mm512_permute_ps::<MASK>(a);
20903    transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
20904}
20905
20906/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20907///
20908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
20909#[inline]
20910#[target_feature(enable = "avx512f,avx512vl")]
20911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20912#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20913#[rustc_legacy_const_generics(3)]
20914pub unsafe fn _mm256_mask_permute_ps<const MASK: i32>(
20915    src: __m256,
20916    k: __mmask8,
20917    a: __m256,
20918) -> __m256 {
20919    let r = _mm256_permute_ps::<MASK>(a);
20920    transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
20921}
20922
20923/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20924///
20925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
20926#[inline]
20927#[target_feature(enable = "avx512f,avx512vl")]
20928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20929#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20930#[rustc_legacy_const_generics(2)]
20931pub unsafe fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
20932    let r = _mm256_permute_ps::<MASK>(a);
20933    transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
20934}
20935
20936/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20937///
20938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
20939#[inline]
20940#[target_feature(enable = "avx512f,avx512vl")]
20941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20942#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20943#[rustc_legacy_const_generics(3)]
20944pub unsafe fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
20945    let r = _mm_permute_ps::<MASK>(a);
20946    transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
20947}
20948
20949/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20950///
20951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
20952#[inline]
20953#[target_feature(enable = "avx512f,avx512vl")]
20954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20955#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
20956#[rustc_legacy_const_generics(2)]
20957pub unsafe fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
20958    let r = _mm_permute_ps::<MASK>(a);
20959    transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
20960}
20961
20962/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
20963///
20964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
20965#[inline]
20966#[target_feature(enable = "avx512f")]
20967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20968#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
20969#[rustc_legacy_const_generics(1)]
20970pub unsafe fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
20971    static_assert_uimm_bits!(MASK, 8);
20972    simd_shuffle!(
20973        a,
20974        a,
20975        [
20976            MASK as u32 & 0b1,
20977            ((MASK as u32 >> 1) & 0b1),
20978            ((MASK as u32 >> 2) & 0b1) + 2,
20979            ((MASK as u32 >> 3) & 0b1) + 2,
20980            ((MASK as u32 >> 4) & 0b1) + 4,
20981            ((MASK as u32 >> 5) & 0b1) + 4,
20982            ((MASK as u32 >> 6) & 0b1) + 6,
20983            ((MASK as u32 >> 7) & 0b1) + 6,
20984        ],
20985    )
20986}
20987
20988/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20989///
20990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
20991#[inline]
20992#[target_feature(enable = "avx512f")]
20993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20994#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
20995#[rustc_legacy_const_generics(3)]
20996pub unsafe fn _mm512_mask_permute_pd<const MASK: i32>(
20997    src: __m512d,
20998    k: __mmask8,
20999    a: __m512d,
21000) -> __m512d {
21001    static_assert_uimm_bits!(MASK, 8);
21002    let r = _mm512_permute_pd::<MASK>(a);
21003    transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
21004}
21005
21006/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21007///
21008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
21009#[inline]
21010#[target_feature(enable = "avx512f")]
21011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21012#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
21013#[rustc_legacy_const_generics(2)]
21014pub unsafe fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
21015    static_assert_uimm_bits!(MASK, 8);
21016    let r = _mm512_permute_pd::<MASK>(a);
21017    transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
21018}
21019
21020/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21021///
21022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
21023#[inline]
21024#[target_feature(enable = "avx512f,avx512vl")]
21025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21026#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
21027#[rustc_legacy_const_generics(3)]
21028pub unsafe fn _mm256_mask_permute_pd<const MASK: i32>(
21029    src: __m256d,
21030    k: __mmask8,
21031    a: __m256d,
21032) -> __m256d {
21033    static_assert_uimm_bits!(MASK, 4);
21034    let r = _mm256_permute_pd::<MASK>(a);
21035    transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
21036}
21037
21038/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21039///
21040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
21041#[inline]
21042#[target_feature(enable = "avx512f,avx512vl")]
21043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21044#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
21045#[rustc_legacy_const_generics(2)]
21046pub unsafe fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
21047    static_assert_uimm_bits!(MASK, 4);
21048    let r = _mm256_permute_pd::<MASK>(a);
21049    transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
21050}
21051
21052/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21053///
21054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
21055#[inline]
21056#[target_feature(enable = "avx512f,avx512vl")]
21057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21058#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
21059#[rustc_legacy_const_generics(3)]
21060pub unsafe fn _mm_mask_permute_pd<const IMM2: i32>(
21061    src: __m128d,
21062    k: __mmask8,
21063    a: __m128d,
21064) -> __m128d {
21065    static_assert_uimm_bits!(IMM2, 2);
21066    let r = _mm_permute_pd::<IMM2>(a);
21067    transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
21068}
21069
21070/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21071///
21072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
21073#[inline]
21074#[target_feature(enable = "avx512f,avx512vl")]
21075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21076#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
21077#[rustc_legacy_const_generics(2)]
21078pub unsafe fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
21079    static_assert_uimm_bits!(IMM2, 2);
21080    let r = _mm_permute_pd::<IMM2>(a);
21081    transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
21082}
21083
21084/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
21085///
21086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
21087#[inline]
21088#[target_feature(enable = "avx512f")]
21089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21090#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21091#[rustc_legacy_const_generics(1)]
21092pub unsafe fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
21093    static_assert_uimm_bits!(MASK, 8);
21094    simd_shuffle!(
21095        a,
21096        a,
21097        [
21098            MASK as u32 & 0b11,
21099            (MASK as u32 >> 2) & 0b11,
21100            ((MASK as u32 >> 4) & 0b11),
21101            ((MASK as u32 >> 6) & 0b11),
21102            (MASK as u32 & 0b11) + 4,
21103            ((MASK as u32 >> 2) & 0b11) + 4,
21104            ((MASK as u32 >> 4) & 0b11) + 4,
21105            ((MASK as u32 >> 6) & 0b11) + 4,
21106        ],
21107    )
21108}
21109
21110/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21111///
21112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
21113#[inline]
21114#[target_feature(enable = "avx512f")]
21115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21116#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21117#[rustc_legacy_const_generics(3)]
21118pub unsafe fn _mm512_mask_permutex_epi64<const MASK: i32>(
21119    src: __m512i,
21120    k: __mmask8,
21121    a: __m512i,
21122) -> __m512i {
21123    static_assert_uimm_bits!(MASK, 8);
21124    let r = _mm512_permutex_epi64::<MASK>(a);
21125    transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
21126}
21127
21128/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21129///
21130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
21131#[inline]
21132#[target_feature(enable = "avx512f")]
21133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21134#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21135#[rustc_legacy_const_generics(2)]
21136pub unsafe fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
21137    static_assert_uimm_bits!(MASK, 8);
21138    let r = _mm512_permutex_epi64::<MASK>(a);
21139    transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
21140}
21141
21142/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
21143///
21144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
21145#[inline]
21146#[target_feature(enable = "avx512f,avx512vl")]
21147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21148#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21149#[rustc_legacy_const_generics(1)]
21150pub unsafe fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
21151    static_assert_uimm_bits!(MASK, 8);
21152    simd_shuffle!(
21153        a,
21154        a,
21155        [
21156            MASK as u32 & 0b11,
21157            (MASK as u32 >> 2) & 0b11,
21158            ((MASK as u32 >> 4) & 0b11),
21159            ((MASK as u32 >> 6) & 0b11),
21160        ],
21161    )
21162}
21163
21164/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21165///
21166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
21167#[inline]
21168#[target_feature(enable = "avx512f,avx512vl")]
21169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21170#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21171#[rustc_legacy_const_generics(3)]
21172pub unsafe fn _mm256_mask_permutex_epi64<const MASK: i32>(
21173    src: __m256i,
21174    k: __mmask8,
21175    a: __m256i,
21176) -> __m256i {
21177    static_assert_uimm_bits!(MASK, 8);
21178    let r = _mm256_permutex_epi64::<MASK>(a);
21179    transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
21180}
21181
21182/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21183///
21184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
21185#[inline]
21186#[target_feature(enable = "avx512f,avx512vl")]
21187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21188#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
21189#[rustc_legacy_const_generics(2)]
21190pub unsafe fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
21191    static_assert_uimm_bits!(MASK, 8);
21192    let r = _mm256_permutex_epi64::<MASK>(a);
21193    transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
21194}
21195
21196/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
21197///
21198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
21199#[inline]
21200#[target_feature(enable = "avx512f")]
21201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21202#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21203#[rustc_legacy_const_generics(1)]
21204pub unsafe fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
21205    static_assert_uimm_bits!(MASK, 8);
21206    simd_shuffle!(
21207        a,
21208        a,
21209        [
21210            MASK as u32 & 0b11,
21211            (MASK as u32 >> 2) & 0b11,
21212            ((MASK as u32 >> 4) & 0b11),
21213            ((MASK as u32 >> 6) & 0b11),
21214            (MASK as u32 & 0b11) + 4,
21215            ((MASK as u32 >> 2) & 0b11) + 4,
21216            ((MASK as u32 >> 4) & 0b11) + 4,
21217            ((MASK as u32 >> 6) & 0b11) + 4,
21218        ],
21219    )
21220}
21221
21222/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21223///
21224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
21225#[inline]
21226#[target_feature(enable = "avx512f")]
21227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21228#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21229#[rustc_legacy_const_generics(3)]
21230pub unsafe fn _mm512_mask_permutex_pd<const MASK: i32>(
21231    src: __m512d,
21232    k: __mmask8,
21233    a: __m512d,
21234) -> __m512d {
21235    let r = _mm512_permutex_pd::<MASK>(a);
21236    transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
21237}
21238
21239/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21240///
21241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
21242#[inline]
21243#[target_feature(enable = "avx512f")]
21244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21245#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21246#[rustc_legacy_const_generics(2)]
21247pub unsafe fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
21248    let r = _mm512_permutex_pd::<MASK>(a);
21249    transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
21250}
21251
21252/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
21253///
21254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
21255#[inline]
21256#[target_feature(enable = "avx512f,avx512vl")]
21257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21258#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21259#[rustc_legacy_const_generics(1)]
21260pub unsafe fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
21261    static_assert_uimm_bits!(MASK, 8);
21262    simd_shuffle!(
21263        a,
21264        a,
21265        [
21266            MASK as u32 & 0b11,
21267            (MASK as u32 >> 2) & 0b11,
21268            ((MASK as u32 >> 4) & 0b11),
21269            ((MASK as u32 >> 6) & 0b11),
21270        ],
21271    )
21272}
21273
21274/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21275///
21276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
21277#[inline]
21278#[target_feature(enable = "avx512f,avx512vl")]
21279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21280#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21281#[rustc_legacy_const_generics(3)]
21282pub unsafe fn _mm256_mask_permutex_pd<const MASK: i32>(
21283    src: __m256d,
21284    k: __mmask8,
21285    a: __m256d,
21286) -> __m256d {
21287    static_assert_uimm_bits!(MASK, 8);
21288    let r = _mm256_permutex_pd::<MASK>(a);
21289    transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
21290}
21291
21292/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21293///
21294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
21295#[inline]
21296#[target_feature(enable = "avx512f,avx512vl")]
21297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21298#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
21299#[rustc_legacy_const_generics(2)]
21300pub unsafe fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
21301    static_assert_uimm_bits!(MASK, 8);
21302    let r = _mm256_permutex_pd::<MASK>(a);
21303    transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
21304}
21305
21306/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
21307///
21308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
21309#[inline]
21310#[target_feature(enable = "avx512f")]
21311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21312#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21313pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
21314    transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
21315}
21316
21317/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
21318///
21319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
21320#[inline]
21321#[target_feature(enable = "avx512f")]
21322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21323#[cfg_attr(test, assert_instr(vpermd))]
21324pub unsafe fn _mm512_mask_permutevar_epi32(
21325    src: __m512i,
21326    k: __mmask16,
21327    idx: __m512i,
21328    a: __m512i,
21329) -> __m512i {
21330    let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
21331    transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
21332}
21333
21334/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
21335///
21336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
21337#[inline]
21338#[target_feature(enable = "avx512f")]
21339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21340#[cfg_attr(test, assert_instr(vpermilps))]
21341pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
21342    transmute(vpermilps(a.as_f32x16(), b.as_i32x16()))
21343}
21344
21345/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21346///
21347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
21348#[inline]
21349#[target_feature(enable = "avx512f")]
21350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21351#[cfg_attr(test, assert_instr(vpermilps))]
21352pub unsafe fn _mm512_mask_permutevar_ps(
21353    src: __m512,
21354    k: __mmask16,
21355    a: __m512,
21356    b: __m512i,
21357) -> __m512 {
21358    let permute = _mm512_permutevar_ps(a, b).as_f32x16();
21359    transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
21360}
21361
21362/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21363///
21364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
21365#[inline]
21366#[target_feature(enable = "avx512f")]
21367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21368#[cfg_attr(test, assert_instr(vpermilps))]
21369pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
21370    let permute = _mm512_permutevar_ps(a, b).as_f32x16();
21371    transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
21372}
21373
21374/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21375///
21376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
21377#[inline]
21378#[target_feature(enable = "avx512f,avx512vl")]
21379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21380#[cfg_attr(test, assert_instr(vpermilps))]
21381pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
21382    let permute = _mm256_permutevar_ps(a, b).as_f32x8();
21383    transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
21384}
21385
21386/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21387///
21388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
21389#[inline]
21390#[target_feature(enable = "avx512f,avx512vl")]
21391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21392#[cfg_attr(test, assert_instr(vpermilps))]
21393pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
21394    let permute = _mm256_permutevar_ps(a, b).as_f32x8();
21395    transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
21396}
21397
21398/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21399///
21400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
21401#[inline]
21402#[target_feature(enable = "avx512f,avx512vl")]
21403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21404#[cfg_attr(test, assert_instr(vpermilps))]
21405pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
21406    let permute = _mm_permutevar_ps(a, b).as_f32x4();
21407    transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
21408}
21409
21410/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21411///
21412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
21413#[inline]
21414#[target_feature(enable = "avx512f,avx512vl")]
21415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21416#[cfg_attr(test, assert_instr(vpermilps))]
21417pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
21418    let permute = _mm_permutevar_ps(a, b).as_f32x4();
21419    transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
21420}
21421
21422/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
21423///
21424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
21425#[inline]
21426#[target_feature(enable = "avx512f")]
21427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21428#[cfg_attr(test, assert_instr(vpermilpd))]
21429pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
21430    transmute(vpermilpd(a.as_f64x8(), b.as_i64x8()))
21431}
21432
21433/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21434///
21435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
21436#[inline]
21437#[target_feature(enable = "avx512f")]
21438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21439#[cfg_attr(test, assert_instr(vpermilpd))]
21440pub unsafe fn _mm512_mask_permutevar_pd(
21441    src: __m512d,
21442    k: __mmask8,
21443    a: __m512d,
21444    b: __m512i,
21445) -> __m512d {
21446    let permute = _mm512_permutevar_pd(a, b).as_f64x8();
21447    transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
21448}
21449
21450/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21451///
21452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
21453#[inline]
21454#[target_feature(enable = "avx512f")]
21455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21456#[cfg_attr(test, assert_instr(vpermilpd))]
21457pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
21458    let permute = _mm512_permutevar_pd(a, b).as_f64x8();
21459    transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
21460}
21461
21462/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21463///
21464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
21465#[inline]
21466#[target_feature(enable = "avx512f,avx512vl")]
21467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21468#[cfg_attr(test, assert_instr(vpermilpd))]
21469pub unsafe fn _mm256_mask_permutevar_pd(
21470    src: __m256d,
21471    k: __mmask8,
21472    a: __m256d,
21473    b: __m256i,
21474) -> __m256d {
21475    let permute = _mm256_permutevar_pd(a, b).as_f64x4();
21476    transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
21477}
21478
21479/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21480///
21481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
21482#[inline]
21483#[target_feature(enable = "avx512f,avx512vl")]
21484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21485#[cfg_attr(test, assert_instr(vpermilpd))]
21486pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
21487    let permute = _mm256_permutevar_pd(a, b).as_f64x4();
21488    transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
21489}
21490
21491/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21492///
21493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
21494#[inline]
21495#[target_feature(enable = "avx512f,avx512vl")]
21496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21497#[cfg_attr(test, assert_instr(vpermilpd))]
21498pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
21499    let permute = _mm_permutevar_pd(a, b).as_f64x2();
21500    transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
21501}
21502
21503/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21504///
21505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
21506#[inline]
21507#[target_feature(enable = "avx512f,avx512vl")]
21508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21509#[cfg_attr(test, assert_instr(vpermilpd))]
21510pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
21511    let permute = _mm_permutevar_pd(a, b).as_f64x2();
21512    transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
21513}
21514
21515/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21516///
21517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
21518#[inline]
21519#[target_feature(enable = "avx512f")]
21520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21521#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21522pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
21523    transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
21524}
21525
21526/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21527///
21528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
21529#[inline]
21530#[target_feature(enable = "avx512f")]
21531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21532#[cfg_attr(test, assert_instr(vpermd))]
21533pub unsafe fn _mm512_mask_permutexvar_epi32(
21534    src: __m512i,
21535    k: __mmask16,
21536    idx: __m512i,
21537    a: __m512i,
21538) -> __m512i {
21539    let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
21540    transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
21541}
21542
21543/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21544///
21545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
21546#[inline]
21547#[target_feature(enable = "avx512f")]
21548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21549#[cfg_attr(test, assert_instr(vpermd))]
21550pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
21551    let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
21552    transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
21553}
21554
21555/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21556///
21557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
21558#[inline]
21559#[target_feature(enable = "avx512f,avx512vl")]
21560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21561#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
21562pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
21563    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
21564}
21565
21566/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21567///
21568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
21569#[inline]
21570#[target_feature(enable = "avx512f,avx512vl")]
21571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21572#[cfg_attr(test, assert_instr(vpermd))]
21573pub unsafe fn _mm256_mask_permutexvar_epi32(
21574    src: __m256i,
21575    k: __mmask8,
21576    idx: __m256i,
21577    a: __m256i,
21578) -> __m256i {
21579    let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
21580    transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
21581}
21582
21583/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21584///
21585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
21586#[inline]
21587#[target_feature(enable = "avx512f,avx512vl")]
21588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21589#[cfg_attr(test, assert_instr(vpermd))]
21590pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
21591    let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
21592    transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
21593}
21594
21595/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21596///
21597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
21598#[inline]
21599#[target_feature(enable = "avx512f")]
21600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21601#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
21602pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
21603    transmute(vpermq(a.as_i64x8(), idx.as_i64x8()))
21604}
21605
21606/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21607///
21608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
21609#[inline]
21610#[target_feature(enable = "avx512f")]
21611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21612#[cfg_attr(test, assert_instr(vpermq))]
21613pub unsafe fn _mm512_mask_permutexvar_epi64(
21614    src: __m512i,
21615    k: __mmask8,
21616    idx: __m512i,
21617    a: __m512i,
21618) -> __m512i {
21619    let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
21620    transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
21621}
21622
21623/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21624///
21625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
21626#[inline]
21627#[target_feature(enable = "avx512f")]
21628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21629#[cfg_attr(test, assert_instr(vpermq))]
21630pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
21631    let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
21632    transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
21633}
21634
21635/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
21636///
21637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
21638#[inline]
21639#[target_feature(enable = "avx512f,avx512vl")]
21640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21641#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
21642pub unsafe fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
21643    transmute(vpermq256(a.as_i64x4(), idx.as_i64x4()))
21644}
21645
21646/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21647///
21648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
21649#[inline]
21650#[target_feature(enable = "avx512f,avx512vl")]
21651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21652#[cfg_attr(test, assert_instr(vpermq))]
21653pub unsafe fn _mm256_mask_permutexvar_epi64(
21654    src: __m256i,
21655    k: __mmask8,
21656    idx: __m256i,
21657    a: __m256i,
21658) -> __m256i {
21659    let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
21660    transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
21661}
21662
21663/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21664///
21665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
21666#[inline]
21667#[target_feature(enable = "avx512f,avx512vl")]
21668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21669#[cfg_attr(test, assert_instr(vpermq))]
21670pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
21671    let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
21672    transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
21673}
21674
21675/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
21676///
21677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
21678#[inline]
21679#[target_feature(enable = "avx512f")]
21680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21681#[cfg_attr(test, assert_instr(vpermps))]
21682pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
21683    transmute(vpermps(a.as_f32x16(), idx.as_i32x16()))
21684}
21685
21686/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21687///
21688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
21689#[inline]
21690#[target_feature(enable = "avx512f")]
21691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21692#[cfg_attr(test, assert_instr(vpermps))]
21693pub unsafe fn _mm512_mask_permutexvar_ps(
21694    src: __m512,
21695    k: __mmask16,
21696    idx: __m512i,
21697    a: __m512,
21698) -> __m512 {
21699    let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
21700    transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
21701}
21702
21703/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21704///
21705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
21706#[inline]
21707#[target_feature(enable = "avx512f")]
21708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21709#[cfg_attr(test, assert_instr(vpermps))]
21710pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
21711    let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
21712    transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
21713}
21714
21715/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
21716///
21717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
21718#[inline]
21719#[target_feature(enable = "avx512f,avx512vl")]
21720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21721#[cfg_attr(test, assert_instr(vpermps))]
21722pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
21723    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
21724}
21725
21726/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21727///
21728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
21729#[inline]
21730#[target_feature(enable = "avx512f,avx512vl")]
21731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21732#[cfg_attr(test, assert_instr(vpermps))]
21733pub unsafe fn _mm256_mask_permutexvar_ps(
21734    src: __m256,
21735    k: __mmask8,
21736    idx: __m256i,
21737    a: __m256,
21738) -> __m256 {
21739    let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
21740    transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
21741}
21742
21743/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21744///
21745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
21746#[inline]
21747#[target_feature(enable = "avx512f,avx512vl")]
21748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21749#[cfg_attr(test, assert_instr(vpermps))]
21750pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
21751    let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
21752    transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
21753}
21754
21755/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
21756///
21757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
21758#[inline]
21759#[target_feature(enable = "avx512f")]
21760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21761#[cfg_attr(test, assert_instr(vpermpd))]
21762pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
21763    transmute(vpermpd(a.as_f64x8(), idx.as_i64x8()))
21764}
21765
21766/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
21769#[inline]
21770#[target_feature(enable = "avx512f")]
21771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21772#[cfg_attr(test, assert_instr(vpermpd))]
21773pub unsafe fn _mm512_mask_permutexvar_pd(
21774    src: __m512d,
21775    k: __mmask8,
21776    idx: __m512i,
21777    a: __m512d,
21778) -> __m512d {
21779    let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
21780    transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
21781}
21782
21783/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21784///
21785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
21786#[inline]
21787#[target_feature(enable = "avx512f")]
21788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21789#[cfg_attr(test, assert_instr(vpermpd))]
21790pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
21791    let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
21792    transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
21793}
21794
21795/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
21796///
21797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
21798#[inline]
21799#[target_feature(enable = "avx512f,avx512vl")]
21800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21801#[cfg_attr(test, assert_instr(vpermpd))]
21802pub unsafe fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
21803    transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4()))
21804}
21805
21806/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21807///
21808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
21809#[inline]
21810#[target_feature(enable = "avx512f,avx512vl")]
21811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21812#[cfg_attr(test, assert_instr(vpermpd))]
21813pub unsafe fn _mm256_mask_permutexvar_pd(
21814    src: __m256d,
21815    k: __mmask8,
21816    idx: __m256i,
21817    a: __m256d,
21818) -> __m256d {
21819    let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
21820    transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
21821}
21822
21823/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21824///
21825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
21826#[inline]
21827#[target_feature(enable = "avx512f,avx512vl")]
21828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21829#[cfg_attr(test, assert_instr(vpermpd))]
21830pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
21831    let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
21832    transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
21833}
21834
21835/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21836///
21837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
21838#[inline]
21839#[target_feature(enable = "avx512f")]
21840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21841#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21842pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
21843    transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16()))
21844}
21845
21846/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21847///
21848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
21849#[inline]
21850#[target_feature(enable = "avx512f")]
21851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21852#[cfg_attr(test, assert_instr(vpermt2d))]
21853pub unsafe fn _mm512_mask_permutex2var_epi32(
21854    a: __m512i,
21855    k: __mmask16,
21856    idx: __m512i,
21857    b: __m512i,
21858) -> __m512i {
21859    let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21860    transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
21861}
21862
21863/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21864///
21865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
21866#[inline]
21867#[target_feature(enable = "avx512f")]
21868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21869#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21870pub unsafe fn _mm512_maskz_permutex2var_epi32(
21871    k: __mmask16,
21872    a: __m512i,
21873    idx: __m512i,
21874    b: __m512i,
21875) -> __m512i {
21876    let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21877    transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
21878}
21879
21880/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21881///
21882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
21883#[inline]
21884#[target_feature(enable = "avx512f")]
21885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21886#[cfg_attr(test, assert_instr(vpermi2d))]
21887pub unsafe fn _mm512_mask2_permutex2var_epi32(
21888    a: __m512i,
21889    idx: __m512i,
21890    k: __mmask16,
21891    b: __m512i,
21892) -> __m512i {
21893    let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
21894    transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
21895}
21896
21897/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21898///
21899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
21900#[inline]
21901#[target_feature(enable = "avx512f,avx512vl")]
21902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21903#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21904pub unsafe fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
21905    transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8()))
21906}
21907
21908/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21909///
21910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
21911#[inline]
21912#[target_feature(enable = "avx512f,avx512vl")]
21913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21914#[cfg_attr(test, assert_instr(vpermt2d))]
21915pub unsafe fn _mm256_mask_permutex2var_epi32(
21916    a: __m256i,
21917    k: __mmask8,
21918    idx: __m256i,
21919    b: __m256i,
21920) -> __m256i {
21921    let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21922    transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
21923}
21924
21925/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21926///
21927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
21928#[inline]
21929#[target_feature(enable = "avx512f,avx512vl")]
21930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21931#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21932pub unsafe fn _mm256_maskz_permutex2var_epi32(
21933    k: __mmask8,
21934    a: __m256i,
21935    idx: __m256i,
21936    b: __m256i,
21937) -> __m256i {
21938    let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21939    transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
21940}
21941
21942/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
21943///
21944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
21945#[inline]
21946#[target_feature(enable = "avx512f,avx512vl")]
21947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21948#[cfg_attr(test, assert_instr(vpermi2d))]
21949pub unsafe fn _mm256_mask2_permutex2var_epi32(
21950    a: __m256i,
21951    idx: __m256i,
21952    k: __mmask8,
21953    b: __m256i,
21954) -> __m256i {
21955    let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
21956    transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
21957}
21958
21959/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
21960///
21961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
21962#[inline]
21963#[target_feature(enable = "avx512f,avx512vl")]
21964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21965#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21966pub unsafe fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
21967    transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4()))
21968}
21969
21970/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
21971///
21972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
21973#[inline]
21974#[target_feature(enable = "avx512f,avx512vl")]
21975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21976#[cfg_attr(test, assert_instr(vpermt2d))]
21977pub unsafe fn _mm_mask_permutex2var_epi32(
21978    a: __m128i,
21979    k: __mmask8,
21980    idx: __m128i,
21981    b: __m128i,
21982) -> __m128i {
21983    let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
21984    transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
21985}
21986
21987/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21988///
21989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
21990#[inline]
21991#[target_feature(enable = "avx512f,avx512vl")]
21992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21993#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
21994pub unsafe fn _mm_maskz_permutex2var_epi32(
21995    k: __mmask8,
21996    a: __m128i,
21997    idx: __m128i,
21998    b: __m128i,
21999) -> __m128i {
22000    let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
22001    transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
22002}
22003
22004/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22005///
22006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
22007#[inline]
22008#[target_feature(enable = "avx512f,avx512vl")]
22009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22010#[cfg_attr(test, assert_instr(vpermi2d))]
22011pub unsafe fn _mm_mask2_permutex2var_epi32(
22012    a: __m128i,
22013    idx: __m128i,
22014    k: __mmask8,
22015    b: __m128i,
22016) -> __m128i {
22017    let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
22018    transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
22019}
22020
22021/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22022///
22023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
22024#[inline]
22025#[target_feature(enable = "avx512f")]
22026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22027#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22028pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
22029    transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8()))
22030}
22031
22032/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22033///
22034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
22035#[inline]
22036#[target_feature(enable = "avx512f")]
22037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22038#[cfg_attr(test, assert_instr(vpermt2q))]
22039pub unsafe fn _mm512_mask_permutex2var_epi64(
22040    a: __m512i,
22041    k: __mmask8,
22042    idx: __m512i,
22043    b: __m512i,
22044) -> __m512i {
22045    let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22046    transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
22047}
22048
22049/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22050///
22051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
22052#[inline]
22053#[target_feature(enable = "avx512f")]
22054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22055#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22056pub unsafe fn _mm512_maskz_permutex2var_epi64(
22057    k: __mmask8,
22058    a: __m512i,
22059    idx: __m512i,
22060    b: __m512i,
22061) -> __m512i {
22062    let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22063    transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
22064}
22065
22066/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22067///
22068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
22069#[inline]
22070#[target_feature(enable = "avx512f")]
22071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22072#[cfg_attr(test, assert_instr(vpermi2q))]
22073pub unsafe fn _mm512_mask2_permutex2var_epi64(
22074    a: __m512i,
22075    idx: __m512i,
22076    k: __mmask8,
22077    b: __m512i,
22078) -> __m512i {
22079    let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
22080    transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
22081}
22082
22083/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22084///
22085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
22086#[inline]
22087#[target_feature(enable = "avx512f,avx512vl")]
22088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22089#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22090pub unsafe fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
22091    transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4()))
22092}
22093
22094/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22095///
22096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
22097#[inline]
22098#[target_feature(enable = "avx512f,avx512vl")]
22099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22100#[cfg_attr(test, assert_instr(vpermt2q))]
22101pub unsafe fn _mm256_mask_permutex2var_epi64(
22102    a: __m256i,
22103    k: __mmask8,
22104    idx: __m256i,
22105    b: __m256i,
22106) -> __m256i {
22107    let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22108    transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
22109}
22110
22111/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22112///
22113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
22114#[inline]
22115#[target_feature(enable = "avx512f,avx512vl")]
22116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22117#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22118pub unsafe fn _mm256_maskz_permutex2var_epi64(
22119    k: __mmask8,
22120    a: __m256i,
22121    idx: __m256i,
22122    b: __m256i,
22123) -> __m256i {
22124    let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22125    transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
22126}
22127
22128/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22129///
22130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
22131#[inline]
22132#[target_feature(enable = "avx512f,avx512vl")]
22133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22134#[cfg_attr(test, assert_instr(vpermi2q))]
22135pub unsafe fn _mm256_mask2_permutex2var_epi64(
22136    a: __m256i,
22137    idx: __m256i,
22138    k: __mmask8,
22139    b: __m256i,
22140) -> __m256i {
22141    let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
22142    transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
22143}
22144
22145/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22146///
22147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
22148#[inline]
22149#[target_feature(enable = "avx512f,avx512vl")]
22150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22151#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22152pub unsafe fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
22153    transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2()))
22154}
22155
22156/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22157///
22158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
22159#[inline]
22160#[target_feature(enable = "avx512f,avx512vl")]
22161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22162#[cfg_attr(test, assert_instr(vpermt2q))]
22163pub unsafe fn _mm_mask_permutex2var_epi64(
22164    a: __m128i,
22165    k: __mmask8,
22166    idx: __m128i,
22167    b: __m128i,
22168) -> __m128i {
22169    let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22170    transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
22171}
22172
22173/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22174///
22175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
22176#[inline]
22177#[target_feature(enable = "avx512f,avx512vl")]
22178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22179#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
22180pub unsafe fn _mm_maskz_permutex2var_epi64(
22181    k: __mmask8,
22182    a: __m128i,
22183    idx: __m128i,
22184    b: __m128i,
22185) -> __m128i {
22186    let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22187    transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
22188}
22189
22190/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22191///
22192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
22193#[inline]
22194#[target_feature(enable = "avx512f,avx512vl")]
22195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22196#[cfg_attr(test, assert_instr(vpermi2q))]
22197pub unsafe fn _mm_mask2_permutex2var_epi64(
22198    a: __m128i,
22199    idx: __m128i,
22200    k: __mmask8,
22201    b: __m128i,
22202) -> __m128i {
22203    let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
22204    transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
22205}
22206
22207/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22208///
22209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
22210#[inline]
22211#[target_feature(enable = "avx512f")]
22212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22213#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22214pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
22215    transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16()))
22216}
22217
22218/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22219///
22220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
22221#[inline]
22222#[target_feature(enable = "avx512f")]
22223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22224#[cfg_attr(test, assert_instr(vpermt2ps))]
22225pub unsafe fn _mm512_mask_permutex2var_ps(
22226    a: __m512,
22227    k: __mmask16,
22228    idx: __m512i,
22229    b: __m512,
22230) -> __m512 {
22231    let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22232    transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
22233}
22234
22235/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22236///
22237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
22238#[inline]
22239#[target_feature(enable = "avx512f")]
22240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22241#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22242pub unsafe fn _mm512_maskz_permutex2var_ps(
22243    k: __mmask16,
22244    a: __m512,
22245    idx: __m512i,
22246    b: __m512,
22247) -> __m512 {
22248    let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22249    transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
22250}
22251
22252/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22253///
22254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
22255#[inline]
22256#[target_feature(enable = "avx512f")]
22257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22258#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22259pub unsafe fn _mm512_mask2_permutex2var_ps(
22260    a: __m512,
22261    idx: __m512i,
22262    k: __mmask16,
22263    b: __m512,
22264) -> __m512 {
22265    let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
22266    let idx = _mm512_castsi512_ps(idx).as_f32x16();
22267    transmute(simd_select_bitmask(k, permute, idx))
22268}
22269
22270/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22271///
22272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
22273#[inline]
22274#[target_feature(enable = "avx512f,avx512vl")]
22275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22276#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22277pub unsafe fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
22278    transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8()))
22279}
22280
22281/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22282///
22283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
22284#[inline]
22285#[target_feature(enable = "avx512f,avx512vl")]
22286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22287#[cfg_attr(test, assert_instr(vpermt2ps))]
22288pub unsafe fn _mm256_mask_permutex2var_ps(
22289    a: __m256,
22290    k: __mmask8,
22291    idx: __m256i,
22292    b: __m256,
22293) -> __m256 {
22294    let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22295    transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
22296}
22297
22298/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22299///
22300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
22301#[inline]
22302#[target_feature(enable = "avx512f,avx512vl")]
22303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22304#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22305pub unsafe fn _mm256_maskz_permutex2var_ps(
22306    k: __mmask8,
22307    a: __m256,
22308    idx: __m256i,
22309    b: __m256,
22310) -> __m256 {
22311    let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22312    transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
22313}
22314
22315/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22316///
22317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
22318#[inline]
22319#[target_feature(enable = "avx512f,avx512vl")]
22320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22321#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22322pub unsafe fn _mm256_mask2_permutex2var_ps(
22323    a: __m256,
22324    idx: __m256i,
22325    k: __mmask8,
22326    b: __m256,
22327) -> __m256 {
22328    let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
22329    let idx = _mm256_castsi256_ps(idx).as_f32x8();
22330    transmute(simd_select_bitmask(k, permute, idx))
22331}
22332
22333/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22334///
22335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
22336#[inline]
22337#[target_feature(enable = "avx512f,avx512vl")]
22338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22339#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22340pub unsafe fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
22341    transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4()))
22342}
22343
22344/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22345///
22346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
22347#[inline]
22348#[target_feature(enable = "avx512f,avx512vl")]
22349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22350#[cfg_attr(test, assert_instr(vpermt2ps))]
22351pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
22352    let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22353    transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
22354}
22355
22356/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22357///
22358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
22359#[inline]
22360#[target_feature(enable = "avx512f,avx512vl")]
22361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22362#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
22363pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
22364    let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22365    transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
22366}
22367
22368/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
22369///
22370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
22371#[inline]
22372#[target_feature(enable = "avx512f,avx512vl")]
22373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22374#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
22375pub unsafe fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
22376    let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
22377    let idx = _mm_castsi128_ps(idx).as_f32x4();
22378    transmute(simd_select_bitmask(k, permute, idx))
22379}
22380
22381/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22382///
22383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
22384#[inline]
22385#[target_feature(enable = "avx512f")]
22386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22387#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22388pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
22389    transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8()))
22390}
22391
22392/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22393///
22394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
22395#[inline]
22396#[target_feature(enable = "avx512f")]
22397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22398#[cfg_attr(test, assert_instr(vpermt2pd))]
22399pub unsafe fn _mm512_mask_permutex2var_pd(
22400    a: __m512d,
22401    k: __mmask8,
22402    idx: __m512i,
22403    b: __m512d,
22404) -> __m512d {
22405    let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22406    transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
22407}
22408
22409/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22410///
22411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
22412#[inline]
22413#[target_feature(enable = "avx512f")]
22414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22415#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22416pub unsafe fn _mm512_maskz_permutex2var_pd(
22417    k: __mmask8,
22418    a: __m512d,
22419    idx: __m512i,
22420    b: __m512d,
22421) -> __m512d {
22422    let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22423    transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
22424}
22425
22426/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22427///
22428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
22429#[inline]
22430#[target_feature(enable = "avx512f")]
22431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22432#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22433pub unsafe fn _mm512_mask2_permutex2var_pd(
22434    a: __m512d,
22435    idx: __m512i,
22436    k: __mmask8,
22437    b: __m512d,
22438) -> __m512d {
22439    let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
22440    let idx = _mm512_castsi512_pd(idx).as_f64x8();
22441    transmute(simd_select_bitmask(k, permute, idx))
22442}
22443
22444/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22445///
22446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
22447#[inline]
22448#[target_feature(enable = "avx512f,avx512vl")]
22449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22450#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22451pub unsafe fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
22452    transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4()))
22453}
22454
22455/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22456///
22457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
22458#[inline]
22459#[target_feature(enable = "avx512f,avx512vl")]
22460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22461#[cfg_attr(test, assert_instr(vpermt2pd))]
22462pub unsafe fn _mm256_mask_permutex2var_pd(
22463    a: __m256d,
22464    k: __mmask8,
22465    idx: __m256i,
22466    b: __m256d,
22467) -> __m256d {
22468    let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22469    transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
22470}
22471
22472/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22473///
22474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
22475#[inline]
22476#[target_feature(enable = "avx512f,avx512vl")]
22477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22478#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22479pub unsafe fn _mm256_maskz_permutex2var_pd(
22480    k: __mmask8,
22481    a: __m256d,
22482    idx: __m256i,
22483    b: __m256d,
22484) -> __m256d {
22485    let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22486    transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
22487}
22488
22489/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22490///
22491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
22492#[inline]
22493#[target_feature(enable = "avx512f,avx512vl")]
22494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22495#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22496pub unsafe fn _mm256_mask2_permutex2var_pd(
22497    a: __m256d,
22498    idx: __m256i,
22499    k: __mmask8,
22500    b: __m256d,
22501) -> __m256d {
22502    let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
22503    let idx = _mm256_castsi256_pd(idx).as_f64x4();
22504    transmute(simd_select_bitmask(k, permute, idx))
22505}
22506
22507/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
22508///
22509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
22510#[inline]
22511#[target_feature(enable = "avx512f,avx512vl")]
22512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22513#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22514pub unsafe fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
22515    transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2()))
22516}
22517
22518/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
22519///
22520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
22521#[inline]
22522#[target_feature(enable = "avx512f,avx512vl")]
22523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22524#[cfg_attr(test, assert_instr(vpermt2pd))]
22525pub unsafe fn _mm_mask_permutex2var_pd(
22526    a: __m128d,
22527    k: __mmask8,
22528    idx: __m128i,
22529    b: __m128d,
22530) -> __m128d {
22531    let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22532    transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
22533}
22534
22535/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22536///
22537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
22538#[inline]
22539#[target_feature(enable = "avx512f,avx512vl")]
22540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22541#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
22542pub unsafe fn _mm_maskz_permutex2var_pd(
22543    k: __mmask8,
22544    a: __m128d,
22545    idx: __m128i,
22546    b: __m128d,
22547) -> __m128d {
22548    let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22549    transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
22550}
22551
22552/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
22553///
22554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
22555#[inline]
22556#[target_feature(enable = "avx512f,avx512vl")]
22557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22558#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
22559pub unsafe fn _mm_mask2_permutex2var_pd(
22560    a: __m128d,
22561    idx: __m128i,
22562    k: __mmask8,
22563    b: __m128d,
22564) -> __m128d {
22565    let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
22566    let idx = _mm_castsi128_pd(idx).as_f64x2();
22567    transmute(simd_select_bitmask(k, permute, idx))
22568}
22569
22570/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22571///
22572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
22573#[inline]
22574#[target_feature(enable = "avx512f")]
22575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22576#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
22577#[rustc_legacy_const_generics(1)]
22578pub unsafe fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
22579    static_assert_uimm_bits!(MASK, 8);
22580    let r: i32x16 = simd_shuffle!(
22581        a.as_i32x16(),
22582        a.as_i32x16(),
22583        [
22584            MASK as u32 & 0b11,
22585            (MASK as u32 >> 2) & 0b11,
22586            (MASK as u32 >> 4) & 0b11,
22587            (MASK as u32 >> 6) & 0b11,
22588            (MASK as u32 & 0b11) + 4,
22589            ((MASK as u32 >> 2) & 0b11) + 4,
22590            ((MASK as u32 >> 4) & 0b11) + 4,
22591            ((MASK as u32 >> 6) & 0b11) + 4,
22592            (MASK as u32 & 0b11) + 8,
22593            ((MASK as u32 >> 2) & 0b11) + 8,
22594            ((MASK as u32 >> 4) & 0b11) + 8,
22595            ((MASK as u32 >> 6) & 0b11) + 8,
22596            (MASK as u32 & 0b11) + 12,
22597            ((MASK as u32 >> 2) & 0b11) + 12,
22598            ((MASK as u32 >> 4) & 0b11) + 12,
22599            ((MASK as u32 >> 6) & 0b11) + 12,
22600        ],
22601    );
22602    transmute(r)
22603}
22604
22605/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22606///
22607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
22608#[inline]
22609#[target_feature(enable = "avx512f")]
22610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22611#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22612#[rustc_legacy_const_generics(3)]
22613pub unsafe fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22614    src: __m512i,
22615    k: __mmask16,
22616    a: __m512i,
22617) -> __m512i {
22618    static_assert_uimm_bits!(MASK, 8);
22619    let r = _mm512_shuffle_epi32::<MASK>(a);
22620    transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
22621}
22622
22623/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22624///
22625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
22626#[inline]
22627#[target_feature(enable = "avx512f")]
22628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22629#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22630#[rustc_legacy_const_generics(2)]
22631pub unsafe fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22632    k: __mmask16,
22633    a: __m512i,
22634) -> __m512i {
22635    static_assert_uimm_bits!(MASK, 8);
22636    let r = _mm512_shuffle_epi32::<MASK>(a);
22637    transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
22638}
22639
22640/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22641///
22642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
22643#[inline]
22644#[target_feature(enable = "avx512f,avx512vl")]
22645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22646#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22647#[rustc_legacy_const_generics(3)]
22648pub unsafe fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22649    src: __m256i,
22650    k: __mmask8,
22651    a: __m256i,
22652) -> __m256i {
22653    static_assert_uimm_bits!(MASK, 8);
22654    let r = _mm256_shuffle_epi32::<MASK>(a);
22655    transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
22656}
22657
22658/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22659///
22660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
22661#[inline]
22662#[target_feature(enable = "avx512f,avx512vl")]
22663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22664#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22665#[rustc_legacy_const_generics(2)]
22666pub unsafe fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22667    k: __mmask8,
22668    a: __m256i,
22669) -> __m256i {
22670    static_assert_uimm_bits!(MASK, 8);
22671    let r = _mm256_shuffle_epi32::<MASK>(a);
22672    transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
22673}
22674
22675/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22676///
22677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
22678#[inline]
22679#[target_feature(enable = "avx512f,avx512vl")]
22680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22681#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22682#[rustc_legacy_const_generics(3)]
22683pub unsafe fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22684    src: __m128i,
22685    k: __mmask8,
22686    a: __m128i,
22687) -> __m128i {
22688    static_assert_uimm_bits!(MASK, 8);
22689    let r = _mm_shuffle_epi32::<MASK>(a);
22690    transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
22691}
22692
22693/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22694///
22695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
22696#[inline]
22697#[target_feature(enable = "avx512f,avx512vl")]
22698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22699#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
22700#[rustc_legacy_const_generics(2)]
22701pub unsafe fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
22702    k: __mmask8,
22703    a: __m128i,
22704) -> __m128i {
22705    static_assert_uimm_bits!(MASK, 8);
22706    let r = _mm_shuffle_epi32::<MASK>(a);
22707    transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
22708}
22709
22710/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22711///
22712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
22713#[inline]
22714#[target_feature(enable = "avx512f")]
22715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22716#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22717#[rustc_legacy_const_generics(2)]
22718pub unsafe fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
22719    static_assert_uimm_bits!(MASK, 8);
22720    simd_shuffle!(
22721        a,
22722        b,
22723        [
22724            MASK as u32 & 0b11,
22725            (MASK as u32 >> 2) & 0b11,
22726            ((MASK as u32 >> 4) & 0b11) + 16,
22727            ((MASK as u32 >> 6) & 0b11) + 16,
22728            (MASK as u32 & 0b11) + 4,
22729            ((MASK as u32 >> 2) & 0b11) + 4,
22730            ((MASK as u32 >> 4) & 0b11) + 20,
22731            ((MASK as u32 >> 6) & 0b11) + 20,
22732            (MASK as u32 & 0b11) + 8,
22733            ((MASK as u32 >> 2) & 0b11) + 8,
22734            ((MASK as u32 >> 4) & 0b11) + 24,
22735            ((MASK as u32 >> 6) & 0b11) + 24,
22736            (MASK as u32 & 0b11) + 12,
22737            ((MASK as u32 >> 2) & 0b11) + 12,
22738            ((MASK as u32 >> 4) & 0b11) + 28,
22739            ((MASK as u32 >> 6) & 0b11) + 28,
22740        ],
22741    )
22742}
22743
22744/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22745///
22746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
22747#[inline]
22748#[target_feature(enable = "avx512f")]
22749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22750#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22751#[rustc_legacy_const_generics(4)]
22752pub unsafe fn _mm512_mask_shuffle_ps<const MASK: i32>(
22753    src: __m512,
22754    k: __mmask16,
22755    a: __m512,
22756    b: __m512,
22757) -> __m512 {
22758    static_assert_uimm_bits!(MASK, 8);
22759    let r = _mm512_shuffle_ps::<MASK>(a, b);
22760    transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22761}
22762
22763/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22764///
22765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
22766#[inline]
22767#[target_feature(enable = "avx512f")]
22768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22769#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22770#[rustc_legacy_const_generics(3)]
22771pub unsafe fn _mm512_maskz_shuffle_ps<const MASK: i32>(
22772    k: __mmask16,
22773    a: __m512,
22774    b: __m512,
22775) -> __m512 {
22776    static_assert_uimm_bits!(MASK, 8);
22777    let r = _mm512_shuffle_ps::<MASK>(a, b);
22778    transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22779}
22780
22781/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22782///
22783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
22784#[inline]
22785#[target_feature(enable = "avx512f,avx512vl")]
22786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22787#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22788#[rustc_legacy_const_generics(4)]
22789pub unsafe fn _mm256_mask_shuffle_ps<const MASK: i32>(
22790    src: __m256,
22791    k: __mmask8,
22792    a: __m256,
22793    b: __m256,
22794) -> __m256 {
22795    static_assert_uimm_bits!(MASK, 8);
22796    let r = _mm256_shuffle_ps::<MASK>(a, b);
22797    transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22798}
22799
22800/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22801///
22802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
22803#[inline]
22804#[target_feature(enable = "avx512f,avx512vl")]
22805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22806#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22807#[rustc_legacy_const_generics(3)]
22808pub unsafe fn _mm256_maskz_shuffle_ps<const MASK: i32>(
22809    k: __mmask8,
22810    a: __m256,
22811    b: __m256,
22812) -> __m256 {
22813    static_assert_uimm_bits!(MASK, 8);
22814    let r = _mm256_shuffle_ps::<MASK>(a, b);
22815    transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22816}
22817
22818/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22819///
22820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
22821#[inline]
22822#[target_feature(enable = "avx512f,avx512vl")]
22823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22824#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22825#[rustc_legacy_const_generics(4)]
22826pub unsafe fn _mm_mask_shuffle_ps<const MASK: i32>(
22827    src: __m128,
22828    k: __mmask8,
22829    a: __m128,
22830    b: __m128,
22831) -> __m128 {
22832    static_assert_uimm_bits!(MASK, 8);
22833    let r = _mm_shuffle_ps::<MASK>(a, b);
22834    transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22835}
22836
22837/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22838///
22839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
22840#[inline]
22841#[target_feature(enable = "avx512f,avx512vl")]
22842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22843#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
22844#[rustc_legacy_const_generics(3)]
22845pub unsafe fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
22846    static_assert_uimm_bits!(MASK, 8);
22847    let r = _mm_shuffle_ps::<MASK>(a, b);
22848    transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22849}
22850
22851/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
22852///
22853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
22854#[inline]
22855#[target_feature(enable = "avx512f")]
22856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22857#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22858#[rustc_legacy_const_generics(2)]
22859pub unsafe fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
22860    static_assert_uimm_bits!(MASK, 8);
22861    simd_shuffle!(
22862        a,
22863        b,
22864        [
22865            MASK as u32 & 0b1,
22866            ((MASK as u32 >> 1) & 0b1) + 8,
22867            ((MASK as u32 >> 2) & 0b1) + 2,
22868            ((MASK as u32 >> 3) & 0b1) + 10,
22869            ((MASK as u32 >> 4) & 0b1) + 4,
22870            ((MASK as u32 >> 5) & 0b1) + 12,
22871            ((MASK as u32 >> 6) & 0b1) + 6,
22872            ((MASK as u32 >> 7) & 0b1) + 14,
22873        ],
22874    )
22875}
22876
22877/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22878///
22879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
22880#[inline]
22881#[target_feature(enable = "avx512f")]
22882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22883#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22884#[rustc_legacy_const_generics(4)]
22885pub unsafe fn _mm512_mask_shuffle_pd<const MASK: i32>(
22886    src: __m512d,
22887    k: __mmask8,
22888    a: __m512d,
22889    b: __m512d,
22890) -> __m512d {
22891    static_assert_uimm_bits!(MASK, 8);
22892    let r = _mm512_shuffle_pd::<MASK>(a, b);
22893    transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
22894}
22895
22896/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22897///
22898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
22899#[inline]
22900#[target_feature(enable = "avx512f")]
22901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22902#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22903#[rustc_legacy_const_generics(3)]
22904pub unsafe fn _mm512_maskz_shuffle_pd<const MASK: i32>(
22905    k: __mmask8,
22906    a: __m512d,
22907    b: __m512d,
22908) -> __m512d {
22909    static_assert_uimm_bits!(MASK, 8);
22910    let r = _mm512_shuffle_pd::<MASK>(a, b);
22911    transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
22912}
22913
22914/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22915///
22916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
22917#[inline]
22918#[target_feature(enable = "avx512f,avx512vl")]
22919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22920#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22921#[rustc_legacy_const_generics(4)]
22922pub unsafe fn _mm256_mask_shuffle_pd<const MASK: i32>(
22923    src: __m256d,
22924    k: __mmask8,
22925    a: __m256d,
22926    b: __m256d,
22927) -> __m256d {
22928    static_assert_uimm_bits!(MASK, 8);
22929    let r = _mm256_shuffle_pd::<MASK>(a, b);
22930    transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
22931}
22932
22933/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22934///
22935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
22936#[inline]
22937#[target_feature(enable = "avx512f,avx512vl")]
22938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22939#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
22940#[rustc_legacy_const_generics(3)]
22941pub unsafe fn _mm256_maskz_shuffle_pd<const MASK: i32>(
22942    k: __mmask8,
22943    a: __m256d,
22944    b: __m256d,
22945) -> __m256d {
22946    static_assert_uimm_bits!(MASK, 8);
22947    let r = _mm256_shuffle_pd::<MASK>(a, b);
22948    transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
22949}
22950
22951/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
22954#[inline]
22955#[target_feature(enable = "avx512f,avx512vl")]
22956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
22958#[rustc_legacy_const_generics(4)]
22959pub unsafe fn _mm_mask_shuffle_pd<const MASK: i32>(
22960    src: __m128d,
22961    k: __mmask8,
22962    a: __m128d,
22963    b: __m128d,
22964) -> __m128d {
22965    static_assert_uimm_bits!(MASK, 8);
22966    let r = _mm_shuffle_pd::<MASK>(a, b);
22967    transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
22968}
22969
22970/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22971///
22972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
22973#[inline]
22974#[target_feature(enable = "avx512f,avx512vl")]
22975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22976#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
22977#[rustc_legacy_const_generics(3)]
22978pub unsafe fn _mm_maskz_shuffle_pd<const MASK: i32>(
22979    k: __mmask8,
22980    a: __m128d,
22981    b: __m128d,
22982) -> __m128d {
22983    static_assert_uimm_bits!(MASK, 8);
22984    let r = _mm_shuffle_pd::<MASK>(a, b);
22985    transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
22986}
22987
22988/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
22989///
22990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
22991#[inline]
22992#[target_feature(enable = "avx512f")]
22993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22994#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
22995#[rustc_legacy_const_generics(2)]
22996pub unsafe fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
22997    static_assert_uimm_bits!(MASK, 8);
22998    let a = a.as_i32x16();
22999    let b = b.as_i32x16();
23000    let r: i32x16 = simd_shuffle!(
23001        a,
23002        b,
23003        [
23004            (MASK as u32 & 0b11) * 4 + 0,
23005            (MASK as u32 & 0b11) * 4 + 1,
23006            (MASK as u32 & 0b11) * 4 + 2,
23007            (MASK as u32 & 0b11) * 4 + 3,
23008            ((MASK as u32 >> 2) & 0b11) * 4 + 0,
23009            ((MASK as u32 >> 2) & 0b11) * 4 + 1,
23010            ((MASK as u32 >> 2) & 0b11) * 4 + 2,
23011            ((MASK as u32 >> 2) & 0b11) * 4 + 3,
23012            ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
23013            ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
23014            ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
23015            ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
23016            ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
23017            ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
23018            ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
23019            ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
23020        ],
23021    );
23022    transmute(r)
23023}
23024
23025/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23026///
23027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
23028#[inline]
23029#[target_feature(enable = "avx512f")]
23030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23031#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
23032#[rustc_legacy_const_generics(4)]
23033pub unsafe fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
23034    src: __m512i,
23035    k: __mmask16,
23036    a: __m512i,
23037    b: __m512i,
23038) -> __m512i {
23039    static_assert_uimm_bits!(MASK, 8);
23040    let r = _mm512_shuffle_i32x4::<MASK>(a, b);
23041    transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
23042}
23043
23044/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23045///
23046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
23047#[inline]
23048#[target_feature(enable = "avx512f")]
23049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23050#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
23051#[rustc_legacy_const_generics(3)]
23052pub unsafe fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
23053    k: __mmask16,
23054    a: __m512i,
23055    b: __m512i,
23056) -> __m512i {
23057    static_assert_uimm_bits!(MASK, 8);
23058    let r = _mm512_shuffle_i32x4::<MASK>(a, b);
23059    transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
23060}
23061
23062/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
23063///
23064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
23065#[inline]
23066#[target_feature(enable = "avx512f,avx512vl")]
23067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23068#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
23069#[rustc_legacy_const_generics(2)]
23070pub unsafe fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
23071    static_assert_uimm_bits!(MASK, 8);
23072    let a = a.as_i32x8();
23073    let b = b.as_i32x8();
23074    let r: i32x8 = simd_shuffle!(
23075        a,
23076        b,
23077        [
23078            (MASK as u32 & 0b1) * 4 + 0,
23079            (MASK as u32 & 0b1) * 4 + 1,
23080            (MASK as u32 & 0b1) * 4 + 2,
23081            (MASK as u32 & 0b1) * 4 + 3,
23082            ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
23083            ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
23084            ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
23085            ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
23086        ],
23087    );
23088    transmute(r)
23089}
23090
23091/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23092///
23093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
23094#[inline]
23095#[target_feature(enable = "avx512f,avx512vl")]
23096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23097#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
23098#[rustc_legacy_const_generics(4)]
23099pub unsafe fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
23100    src: __m256i,
23101    k: __mmask8,
23102    a: __m256i,
23103    b: __m256i,
23104) -> __m256i {
23105    static_assert_uimm_bits!(MASK, 8);
23106    let r = _mm256_shuffle_i32x4::<MASK>(a, b);
23107    transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
23108}
23109
23110/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23111///
23112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
23113#[inline]
23114#[target_feature(enable = "avx512f,avx512vl")]
23115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23116#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
23117#[rustc_legacy_const_generics(3)]
23118pub unsafe fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
23119    k: __mmask8,
23120    a: __m256i,
23121    b: __m256i,
23122) -> __m256i {
23123    static_assert_uimm_bits!(MASK, 8);
23124    let r = _mm256_shuffle_i32x4::<MASK>(a, b);
23125    transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
23126}
23127
23128/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
23129///
23130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
23131#[inline]
23132#[target_feature(enable = "avx512f")]
23133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23134#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
23135#[rustc_legacy_const_generics(2)]
23136pub unsafe fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
23137    static_assert_uimm_bits!(MASK, 8);
23138    let a = a.as_i64x8();
23139    let b = b.as_i64x8();
23140    let r: i64x8 = simd_shuffle!(
23141        a,
23142        b,
23143        [
23144            (MASK as u32 & 0b11) * 2 + 0,
23145            (MASK as u32 & 0b11) * 2 + 1,
23146            ((MASK as u32 >> 2) & 0b11) * 2 + 0,
23147            ((MASK as u32 >> 2) & 0b11) * 2 + 1,
23148            ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
23149            ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
23150            ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
23151            ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
23152        ],
23153    );
23154    transmute(r)
23155}
23156
23157/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23158///
23159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
23160#[inline]
23161#[target_feature(enable = "avx512f")]
23162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23163#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
23164#[rustc_legacy_const_generics(4)]
23165pub unsafe fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
23166    src: __m512i,
23167    k: __mmask8,
23168    a: __m512i,
23169    b: __m512i,
23170) -> __m512i {
23171    static_assert_uimm_bits!(MASK, 8);
23172    let r = _mm512_shuffle_i64x2::<MASK>(a, b);
23173    transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
23174}
23175
23176/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23177///
23178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
23179#[inline]
23180#[target_feature(enable = "avx512f")]
23181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23182#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
23183#[rustc_legacy_const_generics(3)]
23184pub unsafe fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
23185    k: __mmask8,
23186    a: __m512i,
23187    b: __m512i,
23188) -> __m512i {
23189    static_assert_uimm_bits!(MASK, 8);
23190    let r = _mm512_shuffle_i64x2::<MASK>(a, b);
23191    transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
23192}
23193
23194/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
23195///
23196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
23197#[inline]
23198#[target_feature(enable = "avx512f,avx512vl")]
23199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23200#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
23201#[rustc_legacy_const_generics(2)]
23202pub unsafe fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
23203    static_assert_uimm_bits!(MASK, 8);
23204    let a = a.as_i64x4();
23205    let b = b.as_i64x4();
23206    let r: i64x4 = simd_shuffle!(
23207        a,
23208        b,
23209        [
23210            (MASK as u32 & 0b1) * 2 + 0,
23211            (MASK as u32 & 0b1) * 2 + 1,
23212            ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
23213            ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
23214        ],
23215    );
23216    transmute(r)
23217}
23218
23219/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23220///
23221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
23222#[inline]
23223#[target_feature(enable = "avx512f,avx512vl")]
23224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23225#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
23226#[rustc_legacy_const_generics(4)]
23227pub unsafe fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
23228    src: __m256i,
23229    k: __mmask8,
23230    a: __m256i,
23231    b: __m256i,
23232) -> __m256i {
23233    static_assert_uimm_bits!(MASK, 8);
23234    let r = _mm256_shuffle_i64x2::<MASK>(a, b);
23235    transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
23236}
23237
23238/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23239///
23240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
23241#[inline]
23242#[target_feature(enable = "avx512f,avx512vl")]
23243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23244#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
23245#[rustc_legacy_const_generics(3)]
23246pub unsafe fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
23247    k: __mmask8,
23248    a: __m256i,
23249    b: __m256i,
23250) -> __m256i {
23251    static_assert_uimm_bits!(MASK, 8);
23252    let r = _mm256_shuffle_i64x2::<MASK>(a, b);
23253    transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
23254}
23255
23256/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23257///
23258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
23259#[inline]
23260#[target_feature(enable = "avx512f")]
23261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23262#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
23263#[rustc_legacy_const_generics(2)]
23264pub unsafe fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23265    static_assert_uimm_bits!(MASK, 8);
23266    let a = a.as_f32x16();
23267    let b = b.as_f32x16();
23268    let r: f32x16 = simd_shuffle!(
23269        a,
23270        b,
23271        [
23272            (MASK as u32 & 0b11) * 4 + 0,
23273            (MASK as u32 & 0b11) * 4 + 1,
23274            (MASK as u32 & 0b11) * 4 + 2,
23275            (MASK as u32 & 0b11) * 4 + 3,
23276            ((MASK as u32 >> 2) & 0b11) * 4 + 0,
23277            ((MASK as u32 >> 2) & 0b11) * 4 + 1,
23278            ((MASK as u32 >> 2) & 0b11) * 4 + 2,
23279            ((MASK as u32 >> 2) & 0b11) * 4 + 3,
23280            ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
23281            ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
23282            ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
23283            ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
23284            ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
23285            ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
23286            ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
23287            ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
23288        ],
23289    );
23290    transmute(r)
23291}
23292
23293/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23294///
23295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
23296#[inline]
23297#[target_feature(enable = "avx512f")]
23298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23299#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
23300#[rustc_legacy_const_generics(4)]
23301pub unsafe fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
23302    src: __m512,
23303    k: __mmask16,
23304    a: __m512,
23305    b: __m512,
23306) -> __m512 {
23307    static_assert_uimm_bits!(MASK, 8);
23308    let r = _mm512_shuffle_f32x4::<MASK>(a, b);
23309    transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
23310}
23311
23312/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23313///
23314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
23315#[inline]
23316#[target_feature(enable = "avx512f")]
23317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23318#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
23319#[rustc_legacy_const_generics(3)]
23320pub unsafe fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
23321    k: __mmask16,
23322    a: __m512,
23323    b: __m512,
23324) -> __m512 {
23325    static_assert_uimm_bits!(MASK, 8);
23326    let r = _mm512_shuffle_f32x4::<MASK>(a, b);
23327    transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
23328}
23329
23330/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23331///
23332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
23333#[inline]
23334#[target_feature(enable = "avx512f,avx512vl")]
23335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23336#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
23337#[rustc_legacy_const_generics(2)]
23338pub unsafe fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
23339    static_assert_uimm_bits!(MASK, 8);
23340    let a = a.as_f32x8();
23341    let b = b.as_f32x8();
23342    let r: f32x8 = simd_shuffle!(
23343        a,
23344        b,
23345        [
23346            (MASK as u32 & 0b1) * 4 + 0,
23347            (MASK as u32 & 0b1) * 4 + 1,
23348            (MASK as u32 & 0b1) * 4 + 2,
23349            (MASK as u32 & 0b1) * 4 + 3,
23350            ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
23351            ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
23352            ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
23353            ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
23354        ],
23355    );
23356    transmute(r)
23357}
23358
23359/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23360///
23361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
23362#[inline]
23363#[target_feature(enable = "avx512f,avx512vl")]
23364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23365#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
23366#[rustc_legacy_const_generics(4)]
23367pub unsafe fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
23368    src: __m256,
23369    k: __mmask8,
23370    a: __m256,
23371    b: __m256,
23372) -> __m256 {
23373    static_assert_uimm_bits!(MASK, 8);
23374    let r = _mm256_shuffle_f32x4::<MASK>(a, b);
23375    transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
23376}
23377
23378/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23379///
23380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
23381#[inline]
23382#[target_feature(enable = "avx512f,avx512vl")]
23383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23384#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
23385#[rustc_legacy_const_generics(3)]
23386pub unsafe fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
23387    k: __mmask8,
23388    a: __m256,
23389    b: __m256,
23390) -> __m256 {
23391    static_assert_uimm_bits!(MASK, 8);
23392    let r = _mm256_shuffle_f32x4::<MASK>(a, b);
23393    transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
23394}
23395
23396/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23397///
23398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
23399#[inline]
23400#[target_feature(enable = "avx512f")]
23401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23402#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
23403#[rustc_legacy_const_generics(2)]
23404pub unsafe fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
23405    static_assert_uimm_bits!(MASK, 8);
23406    let a = a.as_f64x8();
23407    let b = b.as_f64x8();
23408    let r: f64x8 = simd_shuffle!(
23409        a,
23410        b,
23411        [
23412            (MASK as u32 & 0b11) * 2 + 0,
23413            (MASK as u32 & 0b11) * 2 + 1,
23414            ((MASK as u32 >> 2) & 0b11) * 2 + 0,
23415            ((MASK as u32 >> 2) & 0b11) * 2 + 1,
23416            ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
23417            ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
23418            ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
23419            ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
23420        ],
23421    );
23422    transmute(r)
23423}
23424
23425/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23426///
23427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
23428#[inline]
23429#[target_feature(enable = "avx512f")]
23430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23431#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
23432#[rustc_legacy_const_generics(4)]
23433pub unsafe fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
23434    src: __m512d,
23435    k: __mmask8,
23436    a: __m512d,
23437    b: __m512d,
23438) -> __m512d {
23439    static_assert_uimm_bits!(MASK, 8);
23440    let r = _mm512_shuffle_f64x2::<MASK>(a, b);
23441    transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23442}
23443
23444/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23445///
23446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
23447#[inline]
23448#[target_feature(enable = "avx512f")]
23449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23450#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
23451#[rustc_legacy_const_generics(3)]
23452pub unsafe fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
23453    k: __mmask8,
23454    a: __m512d,
23455    b: __m512d,
23456) -> __m512d {
23457    static_assert_uimm_bits!(MASK, 8);
23458    let r = _mm512_shuffle_f64x2::<MASK>(a, b);
23459    transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23460}
23461
23462/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
23463///
23464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
23465#[inline]
23466#[target_feature(enable = "avx512f,avx512vl")]
23467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23468#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
23469#[rustc_legacy_const_generics(2)]
23470pub unsafe fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
23471    static_assert_uimm_bits!(MASK, 8);
23472    let a = a.as_f64x4();
23473    let b = b.as_f64x4();
23474    let r: f64x4 = simd_shuffle!(
23475        a,
23476        b,
23477        [
23478            (MASK as u32 & 0b1) * 2 + 0,
23479            (MASK as u32 & 0b1) * 2 + 1,
23480            ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
23481            ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
23482        ],
23483    );
23484    transmute(r)
23485}
23486
23487/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23488///
23489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
23490#[inline]
23491#[target_feature(enable = "avx512f,avx512vl")]
23492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23493#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
23494#[rustc_legacy_const_generics(4)]
23495pub unsafe fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
23496    src: __m256d,
23497    k: __mmask8,
23498    a: __m256d,
23499    b: __m256d,
23500) -> __m256d {
23501    static_assert_uimm_bits!(MASK, 8);
23502    let r = _mm256_shuffle_f64x2::<MASK>(a, b);
23503    transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23504}
23505
23506/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23507///
23508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
23509#[inline]
23510#[target_feature(enable = "avx512f,avx512vl")]
23511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23512#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
23513#[rustc_legacy_const_generics(3)]
23514pub unsafe fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
23515    k: __mmask8,
23516    a: __m256d,
23517    b: __m256d,
23518) -> __m256d {
23519    static_assert_uimm_bits!(MASK, 8);
23520    let r = _mm256_shuffle_f64x2::<MASK>(a, b);
23521    transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23522}
23523
23524/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23525///
23526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
23527#[inline]
23528#[target_feature(enable = "avx512f")]
23529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23530#[cfg_attr(
23531    all(test, not(target_env = "msvc")),
23532    assert_instr(vextractf32x4, IMM8 = 3)
23533)]
23534#[rustc_legacy_const_generics(1)]
23535pub unsafe fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
23536    static_assert_uimm_bits!(IMM8, 2);
23537    match IMM8 & 0x3 {
23538        0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
23539        1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
23540        2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
23541        _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
23542    }
23543}
23544
23545/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23546///
23547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
23548#[inline]
23549#[target_feature(enable = "avx512f")]
23550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23551#[cfg_attr(
23552    all(test, not(target_env = "msvc")),
23553    assert_instr(vextractf32x4, IMM8 = 3)
23554)]
23555#[rustc_legacy_const_generics(3)]
23556pub unsafe fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
23557    src: __m128,
23558    k: __mmask8,
23559    a: __m512,
23560) -> __m128 {
23561    static_assert_uimm_bits!(IMM8, 2);
23562    let r = _mm512_extractf32x4_ps::<IMM8>(a);
23563    transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
23564}
23565
23566/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23567///
23568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
23569#[inline]
23570#[target_feature(enable = "avx512f")]
23571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23572#[cfg_attr(
23573    all(test, not(target_env = "msvc")),
23574    assert_instr(vextractf32x4, IMM8 = 3)
23575)]
23576#[rustc_legacy_const_generics(2)]
23577pub unsafe fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
23578    static_assert_uimm_bits!(IMM8, 2);
23579    let r = _mm512_extractf32x4_ps::<IMM8>(a);
23580    transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
23581}
23582
23583/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23584///
23585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
23586#[inline]
23587#[target_feature(enable = "avx512f,avx512vl")]
23588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23589#[cfg_attr(
23590    all(test, not(target_env = "msvc")),
23591    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
23592)]
23593#[rustc_legacy_const_generics(1)]
23594pub unsafe fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
23595    static_assert_uimm_bits!(IMM8, 1);
23596    match IMM8 & 0x1 {
23597        0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
23598        _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
23599    }
23600}
23601
23602/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23603///
23604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
23605#[inline]
23606#[target_feature(enable = "avx512f,avx512vl")]
23607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23608#[cfg_attr(
23609    all(test, not(target_env = "msvc")),
23610    assert_instr(vextractf32x4, IMM8 = 1)
23611)]
23612#[rustc_legacy_const_generics(3)]
23613pub unsafe fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
23614    src: __m128,
23615    k: __mmask8,
23616    a: __m256,
23617) -> __m128 {
23618    static_assert_uimm_bits!(IMM8, 1);
23619    let r = _mm256_extractf32x4_ps::<IMM8>(a);
23620    transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
23621}
23622
23623/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23624///
23625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
23626#[inline]
23627#[target_feature(enable = "avx512f,avx512vl")]
23628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23629#[cfg_attr(
23630    all(test, not(target_env = "msvc")),
23631    assert_instr(vextractf32x4, IMM8 = 1)
23632)]
23633#[rustc_legacy_const_generics(2)]
23634pub unsafe fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
23635    static_assert_uimm_bits!(IMM8, 1);
23636    let r = _mm256_extractf32x4_ps::<IMM8>(a);
23637    transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
23638}
23639
23640/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
23641///
23642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
23643#[inline]
23644#[target_feature(enable = "avx512f")]
23645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23646#[cfg_attr(
23647    all(test, not(target_env = "msvc")),
23648    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
23649)]
23650#[rustc_legacy_const_generics(1)]
23651pub unsafe fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
23652    static_assert_uimm_bits!(IMM1, 1);
23653    match IMM1 {
23654        0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
23655        _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
23656    }
23657}
23658
23659/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23660///
23661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
23662#[inline]
23663#[target_feature(enable = "avx512f")]
23664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23665#[cfg_attr(
23666    all(test, not(target_env = "msvc")),
23667    assert_instr(vextracti64x4, IMM1 = 1)
23668)]
23669#[rustc_legacy_const_generics(3)]
23670pub unsafe fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
23671    src: __m256i,
23672    k: __mmask8,
23673    a: __m512i,
23674) -> __m256i {
23675    static_assert_uimm_bits!(IMM1, 1);
23676    let r = _mm512_extracti64x4_epi64::<IMM1>(a);
23677    transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
23678}
23679
23680/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23681///
23682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
23683#[inline]
23684#[target_feature(enable = "avx512f")]
23685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23686#[cfg_attr(
23687    all(test, not(target_env = "msvc")),
23688    assert_instr(vextracti64x4, IMM1 = 1)
23689)]
23690#[rustc_legacy_const_generics(2)]
23691pub unsafe fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
23692    static_assert_uimm_bits!(IMM1, 1);
23693    let r = _mm512_extracti64x4_epi64::<IMM1>(a);
23694    transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
23695}
23696
23697/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
23698///
23699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
23700#[inline]
23701#[target_feature(enable = "avx512f")]
23702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23703#[cfg_attr(
23704    all(test, not(target_env = "msvc")),
23705    assert_instr(vextractf64x4, IMM8 = 1)
23706)]
23707#[rustc_legacy_const_generics(1)]
23708pub unsafe fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
23709    static_assert_uimm_bits!(IMM8, 1);
23710    match IMM8 & 0x1 {
23711        0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
23712        _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
23713    }
23714}
23715
23716/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23717///
23718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
23719#[inline]
23720#[target_feature(enable = "avx512f")]
23721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23722#[cfg_attr(
23723    all(test, not(target_env = "msvc")),
23724    assert_instr(vextractf64x4, IMM8 = 1)
23725)]
23726#[rustc_legacy_const_generics(3)]
23727pub unsafe fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
23728    src: __m256d,
23729    k: __mmask8,
23730    a: __m512d,
23731) -> __m256d {
23732    static_assert_uimm_bits!(IMM8, 1);
23733    let r = _mm512_extractf64x4_pd::<IMM8>(a);
23734    transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23735}
23736
23737/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23738///
23739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
23740#[inline]
23741#[target_feature(enable = "avx512f")]
23742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23743#[cfg_attr(
23744    all(test, not(target_env = "msvc")),
23745    assert_instr(vextractf64x4, IMM8 = 1)
23746)]
23747#[rustc_legacy_const_generics(2)]
23748pub unsafe fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
23749    static_assert_uimm_bits!(IMM8, 1);
23750    let r = _mm512_extractf64x4_pd::<IMM8>(a);
23751    transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23752}
23753
23754/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
23755///
23756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
23757#[inline]
23758#[target_feature(enable = "avx512f")]
23759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23760#[cfg_attr(
23761    all(test, not(target_env = "msvc")),
23762    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
23763)]
23764#[rustc_legacy_const_generics(1)]
23765pub unsafe fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
23766    static_assert_uimm_bits!(IMM2, 2);
23767    let a = a.as_i32x16();
23768    let zero = i32x16::ZERO;
23769    let extract: i32x4 = match IMM2 {
23770        0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
23771        1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
23772        2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
23773        _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
23774    };
23775    transmute(extract)
23776}
23777
23778/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23779///
23780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
23781#[inline]
23782#[target_feature(enable = "avx512f")]
23783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23784#[cfg_attr(
23785    all(test, not(target_env = "msvc")),
23786    assert_instr(vextracti32x4, IMM2 = 3)
23787)]
23788#[rustc_legacy_const_generics(3)]
23789pub unsafe fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
23790    src: __m128i,
23791    k: __mmask8,
23792    a: __m512i,
23793) -> __m128i {
23794    static_assert_uimm_bits!(IMM2, 2);
23795    let r = _mm512_extracti32x4_epi32::<IMM2>(a);
23796    transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
23797}
23798
23799/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23800///
23801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
23802#[inline]
23803#[target_feature(enable = "avx512f")]
23804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23805#[cfg_attr(
23806    all(test, not(target_env = "msvc")),
23807    assert_instr(vextracti32x4, IMM2 = 3)
23808)]
23809#[rustc_legacy_const_generics(2)]
23810pub unsafe fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
23811    static_assert_uimm_bits!(IMM2, 2);
23812    let r = _mm512_extracti32x4_epi32::<IMM2>(a);
23813    transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
23814}
23815
23816/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
23817///
23818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
23819#[inline]
23820#[target_feature(enable = "avx512f,avx512vl")]
23821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23822#[cfg_attr(
23823    all(test, not(target_env = "msvc")),
23824    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
23825)]
23826#[rustc_legacy_const_generics(1)]
23827pub unsafe fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
23828    static_assert_uimm_bits!(IMM1, 1);
23829    let a = a.as_i32x8();
23830    let zero = i32x8::ZERO;
23831    let extract: i32x4 = match IMM1 {
23832        0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
23833        _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
23834    };
23835    transmute(extract)
23836}
23837
23838/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23839///
23840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
23841#[inline]
23842#[target_feature(enable = "avx512f,avx512vl")]
23843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23844#[cfg_attr(
23845    all(test, not(target_env = "msvc")),
23846    assert_instr(vextracti32x4, IMM1 = 1)
23847)]
23848#[rustc_legacy_const_generics(3)]
23849pub unsafe fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
23850    src: __m128i,
23851    k: __mmask8,
23852    a: __m256i,
23853) -> __m128i {
23854    static_assert_uimm_bits!(IMM1, 1);
23855    let r = _mm256_extracti32x4_epi32::<IMM1>(a);
23856    transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
23857}
23858
23859/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23860///
23861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
23862#[inline]
23863#[target_feature(enable = "avx512f,avx512vl")]
23864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23865#[cfg_attr(
23866    all(test, not(target_env = "msvc")),
23867    assert_instr(vextracti32x4, IMM1 = 1)
23868)]
23869#[rustc_legacy_const_generics(2)]
23870pub unsafe fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
23871    static_assert_uimm_bits!(IMM1, 1);
23872    let r = _mm256_extracti32x4_epi32::<IMM1>(a);
23873    transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
23874}
23875
23876/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
23877///
23878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
23879#[inline]
23880#[target_feature(enable = "avx512f")]
23881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23882#[cfg_attr(test, assert_instr(vmovsldup))]
23883pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
23884    let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
23885    transmute(r)
23886}
23887
23888/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23889///
23890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
23891#[inline]
23892#[target_feature(enable = "avx512f")]
23893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23894#[cfg_attr(test, assert_instr(vmovsldup))]
23895pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
23896    let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
23897    transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
23898}
23899
23900/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23901///
23902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
23903#[inline]
23904#[target_feature(enable = "avx512f")]
23905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23906#[cfg_attr(test, assert_instr(vmovsldup))]
23907pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
23908    let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
23909    transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
23910}
23911
23912/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23913///
23914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
23915#[inline]
23916#[target_feature(enable = "avx512f,avx512vl")]
23917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23918#[cfg_attr(test, assert_instr(vmovsldup))]
23919pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
23920    let mov = _mm256_moveldup_ps(a);
23921    transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
23922}
23923
23924/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23925///
23926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
23927#[inline]
23928#[target_feature(enable = "avx512f,avx512vl")]
23929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23930#[cfg_attr(test, assert_instr(vmovsldup))]
23931pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
23932    let mov = _mm256_moveldup_ps(a);
23933    transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
23934}
23935
23936/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23937///
23938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
23939#[inline]
23940#[target_feature(enable = "avx512f,avx512vl")]
23941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23942#[cfg_attr(test, assert_instr(vmovsldup))]
23943pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
23944    let mov = _mm_moveldup_ps(a);
23945    transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
23946}
23947
23948/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23949///
23950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
23951#[inline]
23952#[target_feature(enable = "avx512f,avx512vl")]
23953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23954#[cfg_attr(test, assert_instr(vmovsldup))]
23955pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
23956    let mov = _mm_moveldup_ps(a);
23957    transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
23958}
23959
23960/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
23961///
23962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
23963#[inline]
23964#[target_feature(enable = "avx512f")]
23965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23966#[cfg_attr(test, assert_instr(vmovshdup))]
23967pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
23968    let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
23969    transmute(r)
23970}
23971
23972/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23973///
23974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
23975#[inline]
23976#[target_feature(enable = "avx512f")]
23977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23978#[cfg_attr(test, assert_instr(vmovshdup))]
23979pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
23980    let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
23981    transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
23982}
23983
23984/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23985///
23986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
23987#[inline]
23988#[target_feature(enable = "avx512f")]
23989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23990#[cfg_attr(test, assert_instr(vmovshdup))]
23991pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
23992    let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
23993    transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
23994}
23995
23996/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23997///
23998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
23999#[inline]
24000#[target_feature(enable = "avx512f,avx512vl")]
24001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24002#[cfg_attr(test, assert_instr(vmovshdup))]
24003pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
24004    let mov = _mm256_movehdup_ps(a);
24005    transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
24006}
24007
24008/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24009///
24010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
24011#[inline]
24012#[target_feature(enable = "avx512f,avx512vl")]
24013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24014#[cfg_attr(test, assert_instr(vmovshdup))]
24015pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
24016    let mov = _mm256_movehdup_ps(a);
24017    transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
24018}
24019
24020/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24021///
24022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
24023#[inline]
24024#[target_feature(enable = "avx512f,avx512vl")]
24025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24026#[cfg_attr(test, assert_instr(vmovshdup))]
24027pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
24028    let mov = _mm_movehdup_ps(a);
24029    transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
24030}
24031
24032/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24033///
24034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
24035#[inline]
24036#[target_feature(enable = "avx512f,avx512vl")]
24037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24038#[cfg_attr(test, assert_instr(vmovshdup))]
24039pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
24040    let mov = _mm_movehdup_ps(a);
24041    transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
24042}
24043
24044/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
24045///
24046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
24047#[inline]
24048#[target_feature(enable = "avx512f")]
24049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24050#[cfg_attr(test, assert_instr(vmovddup))]
24051pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
24052    let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
24053    transmute(r)
24054}
24055
24056/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24057///
24058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
24059#[inline]
24060#[target_feature(enable = "avx512f")]
24061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24062#[cfg_attr(test, assert_instr(vmovddup))]
24063pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
24064    let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
24065    transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
24066}
24067
24068/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24069///
24070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
24071#[inline]
24072#[target_feature(enable = "avx512f")]
24073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24074#[cfg_attr(test, assert_instr(vmovddup))]
24075pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
24076    let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
24077    transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
24078}
24079
24080/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24081///
24082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
24083#[inline]
24084#[target_feature(enable = "avx512f,avx512vl")]
24085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24086#[cfg_attr(test, assert_instr(vmovddup))]
24087pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
24088    let mov = _mm256_movedup_pd(a);
24089    transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
24090}
24091
24092/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24093///
24094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
24095#[inline]
24096#[target_feature(enable = "avx512f,avx512vl")]
24097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24098#[cfg_attr(test, assert_instr(vmovddup))]
24099pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
24100    let mov = _mm256_movedup_pd(a);
24101    transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
24102}
24103
24104/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24105///
24106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
24107#[inline]
24108#[target_feature(enable = "avx512f,avx512vl")]
24109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24110#[cfg_attr(test, assert_instr(vmovddup))]
24111pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
24112    let mov = _mm_movedup_pd(a);
24113    transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
24114}
24115
24116/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24117///
24118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
24119#[inline]
24120#[target_feature(enable = "avx512f,avx512vl")]
24121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24122#[cfg_attr(test, assert_instr(vmovddup))]
24123pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
24124    let mov = _mm_movedup_pd(a);
24125    transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
24126}
24127
24128/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
24129///
24130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
24131#[inline]
24132#[target_feature(enable = "avx512f")]
24133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24134#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
24135#[rustc_legacy_const_generics(2)]
24136pub unsafe fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
24137    static_assert_uimm_bits!(IMM8, 2);
24138    let a = a.as_i32x16();
24139    let b = _mm512_castsi128_si512(b).as_i32x16();
24140    let ret: i32x16 = match IMM8 & 0b11 {
24141        0 => simd_shuffle!(
24142            a,
24143            b,
24144            [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
24145        ),
24146        1 => simd_shuffle!(
24147            a,
24148            b,
24149            [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
24150        ),
24151        2 => simd_shuffle!(
24152            a,
24153            b,
24154            [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
24155        ),
24156        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
24157    };
24158    transmute(ret)
24159}
24160
24161/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24162///
24163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
24164#[inline]
24165#[target_feature(enable = "avx512f")]
24166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24167#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
24168#[rustc_legacy_const_generics(4)]
24169pub unsafe fn _mm512_mask_inserti32x4<const IMM8: i32>(
24170    src: __m512i,
24171    k: __mmask16,
24172    a: __m512i,
24173    b: __m128i,
24174) -> __m512i {
24175    static_assert_uimm_bits!(IMM8, 2);
24176    let r = _mm512_inserti32x4::<IMM8>(a, b);
24177    transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24178}
24179
24180/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24181///
24182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
24183#[inline]
24184#[target_feature(enable = "avx512f")]
24185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24186#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
24187#[rustc_legacy_const_generics(3)]
24188pub unsafe fn _mm512_maskz_inserti32x4<const IMM8: i32>(
24189    k: __mmask16,
24190    a: __m512i,
24191    b: __m128i,
24192) -> __m512i {
24193    static_assert_uimm_bits!(IMM8, 2);
24194    let r = _mm512_inserti32x4::<IMM8>(a, b);
24195    transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24196}
24197
24198/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
24199///
24200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
24201#[inline]
24202#[target_feature(enable = "avx512f,avx512vl")]
24203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24204#[cfg_attr(
24205    all(test, not(target_env = "msvc")),
24206    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
24207)]
24208#[rustc_legacy_const_generics(2)]
24209pub unsafe fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
24210    static_assert_uimm_bits!(IMM8, 1);
24211    let a = a.as_i32x8();
24212    let b = _mm256_castsi128_si256(b).as_i32x8();
24213    let ret: i32x8 = match IMM8 & 0b1 {
24214        0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24215        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24216    };
24217    transmute(ret)
24218}
24219
24220/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24221///
24222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
24223#[inline]
24224#[target_feature(enable = "avx512f,avx512vl")]
24225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24226#[cfg_attr(
24227    all(test, not(target_env = "msvc")),
24228    assert_instr(vinserti32x4, IMM8 = 1)
24229)]
24230#[rustc_legacy_const_generics(4)]
24231pub unsafe fn _mm256_mask_inserti32x4<const IMM8: i32>(
24232    src: __m256i,
24233    k: __mmask8,
24234    a: __m256i,
24235    b: __m128i,
24236) -> __m256i {
24237    static_assert_uimm_bits!(IMM8, 1);
24238    let r = _mm256_inserti32x4::<IMM8>(a, b);
24239    transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24240}
24241
24242/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24243///
24244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
24245#[inline]
24246#[target_feature(enable = "avx512f,avx512vl")]
24247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24248#[cfg_attr(
24249    all(test, not(target_env = "msvc")),
24250    assert_instr(vinserti32x4, IMM8 = 1)
24251)]
24252#[rustc_legacy_const_generics(3)]
24253pub unsafe fn _mm256_maskz_inserti32x4<const IMM8: i32>(
24254    k: __mmask8,
24255    a: __m256i,
24256    b: __m128i,
24257) -> __m256i {
24258    static_assert_uimm_bits!(IMM8, 1);
24259    let r = _mm256_inserti32x4::<IMM8>(a, b);
24260    transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24261}
24262
24263/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
24264///
24265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
24266#[inline]
24267#[target_feature(enable = "avx512f")]
24268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24269#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
24270#[rustc_legacy_const_generics(2)]
24271pub unsafe fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
24272    static_assert_uimm_bits!(IMM8, 1);
24273    let b = _mm512_castsi256_si512(b);
24274    match IMM8 & 0b1 {
24275        0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24276        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24277    }
24278}
24279
24280/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24281///
24282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
24283#[inline]
24284#[target_feature(enable = "avx512f")]
24285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24286#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
24287#[rustc_legacy_const_generics(4)]
24288pub unsafe fn _mm512_mask_inserti64x4<const IMM8: i32>(
24289    src: __m512i,
24290    k: __mmask8,
24291    a: __m512i,
24292    b: __m256i,
24293) -> __m512i {
24294    static_assert_uimm_bits!(IMM8, 1);
24295    let r = _mm512_inserti64x4::<IMM8>(a, b);
24296    transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
24297}
24298
24299/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24300///
24301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
24302#[inline]
24303#[target_feature(enable = "avx512f")]
24304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24305#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
24306#[rustc_legacy_const_generics(3)]
24307pub unsafe fn _mm512_maskz_inserti64x4<const IMM8: i32>(
24308    k: __mmask8,
24309    a: __m512i,
24310    b: __m256i,
24311) -> __m512i {
24312    static_assert_uimm_bits!(IMM8, 1);
24313    let r = _mm512_inserti64x4::<IMM8>(a, b);
24314    transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
24315}
24316
24317/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
24318///
24319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
24320#[inline]
24321#[target_feature(enable = "avx512f")]
24322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24323#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
24324#[rustc_legacy_const_generics(2)]
24325pub unsafe fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
24326    static_assert_uimm_bits!(IMM8, 2);
24327    let b = _mm512_castps128_ps512(b);
24328    match IMM8 & 0b11 {
24329        0 => simd_shuffle!(
24330            a,
24331            b,
24332            [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
24333        ),
24334        1 => simd_shuffle!(
24335            a,
24336            b,
24337            [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
24338        ),
24339        2 => simd_shuffle!(
24340            a,
24341            b,
24342            [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
24343        ),
24344        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
24345    }
24346}
24347
24348/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24349///
24350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
24351#[inline]
24352#[target_feature(enable = "avx512f")]
24353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24354#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
24355#[rustc_legacy_const_generics(4)]
24356pub unsafe fn _mm512_mask_insertf32x4<const IMM8: i32>(
24357    src: __m512,
24358    k: __mmask16,
24359    a: __m512,
24360    b: __m128,
24361) -> __m512 {
24362    static_assert_uimm_bits!(IMM8, 2);
24363    let r = _mm512_insertf32x4::<IMM8>(a, b);
24364    transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24365}
24366
24367/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24368///
24369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
24370#[inline]
24371#[target_feature(enable = "avx512f")]
24372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24373#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
24374#[rustc_legacy_const_generics(3)]
24375pub unsafe fn _mm512_maskz_insertf32x4<const IMM8: i32>(
24376    k: __mmask16,
24377    a: __m512,
24378    b: __m128,
24379) -> __m512 {
24380    static_assert_uimm_bits!(IMM8, 2);
24381    let r = _mm512_insertf32x4::<IMM8>(a, b);
24382    transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24383}
24384
24385/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
24386///
24387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
24388#[inline]
24389#[target_feature(enable = "avx512f,avx512vl")]
24390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24391#[cfg_attr(
24392    all(test, not(target_env = "msvc")),
24393    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
24394)]
24395#[rustc_legacy_const_generics(2)]
24396pub unsafe fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
24397    static_assert_uimm_bits!(IMM8, 1);
24398    let b = _mm256_castps128_ps256(b);
24399    match IMM8 & 0b1 {
24400        0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24401        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24402    }
24403}
24404
24405/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24406///
24407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
24408#[inline]
24409#[target_feature(enable = "avx512f,avx512vl")]
24410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24411#[cfg_attr(
24412    all(test, not(target_env = "msvc")),
24413    assert_instr(vinsertf32x4, IMM8 = 1)
24414)]
24415#[rustc_legacy_const_generics(4)]
24416pub unsafe fn _mm256_mask_insertf32x4<const IMM8: i32>(
24417    src: __m256,
24418    k: __mmask8,
24419    a: __m256,
24420    b: __m128,
24421) -> __m256 {
24422    static_assert_uimm_bits!(IMM8, 1);
24423    let r = _mm256_insertf32x4::<IMM8>(a, b);
24424    transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24425}
24426
24427/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24428///
24429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
24430#[inline]
24431#[target_feature(enable = "avx512f,avx512vl")]
24432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24433#[cfg_attr(
24434    all(test, not(target_env = "msvc")),
24435    assert_instr(vinsertf32x4, IMM8 = 1)
24436)]
24437#[rustc_legacy_const_generics(3)]
24438pub unsafe fn _mm256_maskz_insertf32x4<const IMM8: i32>(
24439    k: __mmask8,
24440    a: __m256,
24441    b: __m128,
24442) -> __m256 {
24443    static_assert_uimm_bits!(IMM8, 1);
24444    let r = _mm256_insertf32x4::<IMM8>(a, b);
24445    transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24446}
24447
24448/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
24449///
24450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
24451#[inline]
24452#[target_feature(enable = "avx512f")]
24453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24454#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
24455#[rustc_legacy_const_generics(2)]
24456pub unsafe fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
24457    static_assert_uimm_bits!(IMM8, 1);
24458    let b = _mm512_castpd256_pd512(b);
24459    match IMM8 & 0b1 {
24460        0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
24461        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
24462    }
24463}
24464
24465/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24466///
24467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
24468#[inline]
24469#[target_feature(enable = "avx512f")]
24470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24471#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
24472#[rustc_legacy_const_generics(4)]
24473pub unsafe fn _mm512_mask_insertf64x4<const IMM8: i32>(
24474    src: __m512d,
24475    k: __mmask8,
24476    a: __m512d,
24477    b: __m256d,
24478) -> __m512d {
24479    static_assert_uimm_bits!(IMM8, 1);
24480    let r = _mm512_insertf64x4::<IMM8>(a, b);
24481    transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
24482}
24483
24484/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24485///
24486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
24487#[inline]
24488#[target_feature(enable = "avx512f")]
24489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24490#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
24491#[rustc_legacy_const_generics(3)]
24492pub unsafe fn _mm512_maskz_insertf64x4<const IMM8: i32>(
24493    k: __mmask8,
24494    a: __m512d,
24495    b: __m256d,
24496) -> __m512d {
24497    static_assert_uimm_bits!(IMM8, 1);
24498    let r = _mm512_insertf64x4::<IMM8>(a, b);
24499    transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
24500}
24501
24502/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
24503///
24504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
24505#[inline]
24506#[target_feature(enable = "avx512f")]
24507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24508#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
24509pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
24510    let a = a.as_i32x16();
24511    let b = b.as_i32x16();
24512    #[rustfmt::skip]
24513    let r: i32x16 = simd_shuffle!(
24514        a, b,
24515        [ 2, 18, 3, 19,
24516          2 + 4, 18 + 4, 3 + 4, 19 + 4,
24517          2 + 8, 18 + 8, 3 + 8, 19 + 8,
24518          2 + 12, 18 + 12, 3 + 12, 19 + 12],
24519    );
24520    transmute(r)
24521}
24522
24523/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24524///
24525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
24526#[inline]
24527#[target_feature(enable = "avx512f")]
24528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24529#[cfg_attr(test, assert_instr(vpunpckhdq))]
24530pub unsafe fn _mm512_mask_unpackhi_epi32(
24531    src: __m512i,
24532    k: __mmask16,
24533    a: __m512i,
24534    b: __m512i,
24535) -> __m512i {
24536    let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
24537    transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
24538}
24539
24540/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24541///
24542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
24543#[inline]
24544#[target_feature(enable = "avx512f")]
24545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24546#[cfg_attr(test, assert_instr(vpunpckhdq))]
24547pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
24548    let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
24549    transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
24550}
24551
24552/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24553///
24554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
24555#[inline]
24556#[target_feature(enable = "avx512f,avx512vl")]
24557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24558#[cfg_attr(test, assert_instr(vpunpckhdq))]
24559pub unsafe fn _mm256_mask_unpackhi_epi32(
24560    src: __m256i,
24561    k: __mmask8,
24562    a: __m256i,
24563    b: __m256i,
24564) -> __m256i {
24565    let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
24566    transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
24567}
24568
24569/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24570///
24571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
24572#[inline]
24573#[target_feature(enable = "avx512f,avx512vl")]
24574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24575#[cfg_attr(test, assert_instr(vpunpckhdq))]
24576pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24577    let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
24578    transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
24579}
24580
24581/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24582///
24583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
24584#[inline]
24585#[target_feature(enable = "avx512f,avx512vl")]
24586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24587#[cfg_attr(test, assert_instr(vpunpckhdq))]
24588pub unsafe fn _mm_mask_unpackhi_epi32(
24589    src: __m128i,
24590    k: __mmask8,
24591    a: __m128i,
24592    b: __m128i,
24593) -> __m128i {
24594    let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
24595    transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
24596}
24597
24598/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24599///
24600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
24601#[inline]
24602#[target_feature(enable = "avx512f,avx512vl")]
24603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24604#[cfg_attr(test, assert_instr(vpunpckhdq))]
24605pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24606    let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
24607    transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
24608}
24609
24610/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
24611///
24612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
24613#[inline]
24614#[target_feature(enable = "avx512f")]
24615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24616#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
24617pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
24618    simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
24619}
24620
24621/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24622///
24623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
24624#[inline]
24625#[target_feature(enable = "avx512f")]
24626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24627#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24628pub unsafe fn _mm512_mask_unpackhi_epi64(
24629    src: __m512i,
24630    k: __mmask8,
24631    a: __m512i,
24632    b: __m512i,
24633) -> __m512i {
24634    let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
24635    transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
24636}
24637
24638/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24639///
24640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
24641#[inline]
24642#[target_feature(enable = "avx512f")]
24643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24644#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24645pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24646    let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
24647    transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
24648}
24649
24650/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24651///
24652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
24653#[inline]
24654#[target_feature(enable = "avx512f,avx512vl")]
24655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24656#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24657pub unsafe fn _mm256_mask_unpackhi_epi64(
24658    src: __m256i,
24659    k: __mmask8,
24660    a: __m256i,
24661    b: __m256i,
24662) -> __m256i {
24663    let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
24664    transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
24665}
24666
24667/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24668///
24669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
24670#[inline]
24671#[target_feature(enable = "avx512f,avx512vl")]
24672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24673#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24674pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24675    let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
24676    transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
24677}
24678
24679/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24680///
24681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
24682#[inline]
24683#[target_feature(enable = "avx512f,avx512vl")]
24684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24685#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24686pub unsafe fn _mm_mask_unpackhi_epi64(
24687    src: __m128i,
24688    k: __mmask8,
24689    a: __m128i,
24690    b: __m128i,
24691) -> __m128i {
24692    let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
24693    transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
24694}
24695
24696/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24697///
24698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
24699#[inline]
24700#[target_feature(enable = "avx512f,avx512vl")]
24701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24702#[cfg_attr(test, assert_instr(vpunpckhqdq))]
24703pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24704    let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
24705    transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
24706}
24707
24708/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
24709///
24710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
24711#[inline]
24712#[target_feature(enable = "avx512f")]
24713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24714#[cfg_attr(test, assert_instr(vunpckhps))]
24715pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
24716    #[rustfmt::skip]
24717    simd_shuffle!(
24718        a, b,
24719        [ 2, 18, 3, 19,
24720          2 + 4, 18 + 4, 3 + 4, 19 + 4,
24721          2 + 8, 18 + 8, 3 + 8, 19 + 8,
24722          2 + 12, 18 + 12, 3 + 12, 19 + 12],
24723    )
24724}
24725
24726/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24727///
24728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
24729#[inline]
24730#[target_feature(enable = "avx512f")]
24731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24732#[cfg_attr(test, assert_instr(vunpckhps))]
24733pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
24734    let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
24735    transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
24736}
24737
24738/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
24741#[inline]
24742#[target_feature(enable = "avx512f")]
24743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24744#[cfg_attr(test, assert_instr(vunpckhps))]
24745pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24746    let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
24747    transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
24748}
24749
24750/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24751///
24752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
24753#[inline]
24754#[target_feature(enable = "avx512f,avx512vl")]
24755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24756#[cfg_attr(test, assert_instr(vunpckhps))]
24757pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
24758    let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
24759    transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
24760}
24761
24762/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24763///
24764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
24765#[inline]
24766#[target_feature(enable = "avx512f,avx512vl")]
24767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24768#[cfg_attr(test, assert_instr(vunpckhps))]
24769pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24770    let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
24771    transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
24772}
24773
24774/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24775///
24776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
24777#[inline]
24778#[target_feature(enable = "avx512f,avx512vl")]
24779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24780#[cfg_attr(test, assert_instr(vunpckhps))]
24781pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
24782    let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
24783    transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
24784}
24785
24786/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24787///
24788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
24789#[inline]
24790#[target_feature(enable = "avx512f,avx512vl")]
24791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24792#[cfg_attr(test, assert_instr(vunpckhps))]
24793pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24794    let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
24795    transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
24796}
24797
24798/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
24799///
24800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
24801#[inline]
24802#[target_feature(enable = "avx512f")]
24803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24804#[cfg_attr(test, assert_instr(vunpckhpd))]
24805pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
24806    simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
24807}
24808
24809/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24810///
24811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
24812#[inline]
24813#[target_feature(enable = "avx512f")]
24814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24815#[cfg_attr(test, assert_instr(vunpckhpd))]
24816pub unsafe fn _mm512_mask_unpackhi_pd(
24817    src: __m512d,
24818    k: __mmask8,
24819    a: __m512d,
24820    b: __m512d,
24821) -> __m512d {
24822    let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
24823    transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
24824}
24825
24826/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24827///
24828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
24829#[inline]
24830#[target_feature(enable = "avx512f")]
24831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24832#[cfg_attr(test, assert_instr(vunpckhpd))]
24833pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24834    let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
24835    transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
24836}
24837
24838/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24839///
24840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
24841#[inline]
24842#[target_feature(enable = "avx512f,avx512vl")]
24843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24844#[cfg_attr(test, assert_instr(vunpckhpd))]
24845pub unsafe fn _mm256_mask_unpackhi_pd(
24846    src: __m256d,
24847    k: __mmask8,
24848    a: __m256d,
24849    b: __m256d,
24850) -> __m256d {
24851    let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
24852    transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
24853}
24854
24855/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24856///
24857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
24858#[inline]
24859#[target_feature(enable = "avx512f,avx512vl")]
24860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24861#[cfg_attr(test, assert_instr(vunpckhpd))]
24862pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24863    let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
24864    transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
24865}
24866
24867/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24868///
24869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
24870#[inline]
24871#[target_feature(enable = "avx512f,avx512vl")]
24872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24873#[cfg_attr(test, assert_instr(vunpckhpd))]
24874pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24875    let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
24876    transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
24877}
24878
24879/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24880///
24881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
24882#[inline]
24883#[target_feature(enable = "avx512f,avx512vl")]
24884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24885#[cfg_attr(test, assert_instr(vunpckhpd))]
24886pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24887    let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
24888    transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
24889}
24890
24891/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
24892///
24893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
24894#[inline]
24895#[target_feature(enable = "avx512f")]
24896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24897#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
24898pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
24899    let a = a.as_i32x16();
24900    let b = b.as_i32x16();
24901    #[rustfmt::skip]
24902    let r: i32x16 = simd_shuffle!(
24903        a, b,
24904        [ 0, 16, 1, 17,
24905          0 + 4, 16 + 4, 1 + 4, 17 + 4,
24906          0 + 8, 16 + 8, 1 + 8, 17 + 8,
24907          0 + 12, 16 + 12, 1 + 12, 17 + 12],
24908    );
24909    transmute(r)
24910}
24911
24912/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24913///
24914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
24915#[inline]
24916#[target_feature(enable = "avx512f")]
24917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24918#[cfg_attr(test, assert_instr(vpunpckldq))]
24919pub unsafe fn _mm512_mask_unpacklo_epi32(
24920    src: __m512i,
24921    k: __mmask16,
24922    a: __m512i,
24923    b: __m512i,
24924) -> __m512i {
24925    let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
24926    transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
24927}
24928
24929/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24930///
24931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
24932#[inline]
24933#[target_feature(enable = "avx512f")]
24934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24935#[cfg_attr(test, assert_instr(vpunpckldq))]
24936pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
24937    let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
24938    transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
24939}
24940
24941/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24942///
24943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
24944#[inline]
24945#[target_feature(enable = "avx512f,avx512vl")]
24946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24947#[cfg_attr(test, assert_instr(vpunpckldq))]
24948pub unsafe fn _mm256_mask_unpacklo_epi32(
24949    src: __m256i,
24950    k: __mmask8,
24951    a: __m256i,
24952    b: __m256i,
24953) -> __m256i {
24954    let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
24955    transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
24956}
24957
24958/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24959///
24960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
24961#[inline]
24962#[target_feature(enable = "avx512f,avx512vl")]
24963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24964#[cfg_attr(test, assert_instr(vpunpckldq))]
24965pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24966    let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
24967    transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
24968}
24969
24970/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24971///
24972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
24973#[inline]
24974#[target_feature(enable = "avx512f,avx512vl")]
24975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24976#[cfg_attr(test, assert_instr(vpunpckldq))]
24977pub unsafe fn _mm_mask_unpacklo_epi32(
24978    src: __m128i,
24979    k: __mmask8,
24980    a: __m128i,
24981    b: __m128i,
24982) -> __m128i {
24983    let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
24984    transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
24985}
24986
24987/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24988///
24989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
24990#[inline]
24991#[target_feature(enable = "avx512f,avx512vl")]
24992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24993#[cfg_attr(test, assert_instr(vpunpckldq))]
24994pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
24995    let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
24996    transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
24997}
24998
24999/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
25000///
25001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
25002#[inline]
25003#[target_feature(enable = "avx512f")]
25004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25005#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
25006pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
25007    simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
25008}
25009
25010/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25011///
25012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
25013#[inline]
25014#[target_feature(enable = "avx512f")]
25015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25016#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25017pub unsafe fn _mm512_mask_unpacklo_epi64(
25018    src: __m512i,
25019    k: __mmask8,
25020    a: __m512i,
25021    b: __m512i,
25022) -> __m512i {
25023    let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
25024    transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
25025}
25026
25027/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25028///
25029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
25030#[inline]
25031#[target_feature(enable = "avx512f")]
25032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25033#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25034pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
25035    let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
25036    transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
25037}
25038
25039/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25040///
25041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
25042#[inline]
25043#[target_feature(enable = "avx512f,avx512vl")]
25044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25045#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25046pub unsafe fn _mm256_mask_unpacklo_epi64(
25047    src: __m256i,
25048    k: __mmask8,
25049    a: __m256i,
25050    b: __m256i,
25051) -> __m256i {
25052    let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
25053    transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
25054}
25055
25056/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25057///
25058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
25059#[inline]
25060#[target_feature(enable = "avx512f,avx512vl")]
25061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25062#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25063pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25064    let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
25065    transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
25066}
25067
25068/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25069///
25070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
25071#[inline]
25072#[target_feature(enable = "avx512f,avx512vl")]
25073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25074#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25075pub unsafe fn _mm_mask_unpacklo_epi64(
25076    src: __m128i,
25077    k: __mmask8,
25078    a: __m128i,
25079    b: __m128i,
25080) -> __m128i {
25081    let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
25082    transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
25083}
25084
25085/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25086///
25087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
25088#[inline]
25089#[target_feature(enable = "avx512f,avx512vl")]
25090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25091#[cfg_attr(test, assert_instr(vpunpcklqdq))]
25092pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25093    let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
25094    transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
25095}
25096
25097/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
25098///
25099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
25100#[inline]
25101#[target_feature(enable = "avx512f")]
25102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25103#[cfg_attr(test, assert_instr(vunpcklps))]
25104pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
25105    #[rustfmt::skip]
25106    simd_shuffle!(a, b,
25107                   [ 0, 16, 1, 17,
25108                     0 + 4, 16 + 4, 1 + 4, 17 + 4,
25109                     0 + 8, 16 + 8, 1 + 8, 17 + 8,
25110                     0 + 12, 16 + 12, 1 + 12, 17 + 12],
25111    )
25112}
25113
25114/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25115///
25116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
25117#[inline]
25118#[target_feature(enable = "avx512f")]
25119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25120#[cfg_attr(test, assert_instr(vunpcklps))]
25121pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
25122    let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
25123    transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
25124}
25125
25126/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25127///
25128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
25129#[inline]
25130#[target_feature(enable = "avx512f")]
25131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25132#[cfg_attr(test, assert_instr(vunpcklps))]
25133pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
25134    let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
25135    transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
25136}
25137
25138/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25139///
25140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
25141#[inline]
25142#[target_feature(enable = "avx512f,avx512vl")]
25143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25144#[cfg_attr(test, assert_instr(vunpcklps))]
25145pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
25146    let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
25147    transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
25148}
25149
25150/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25151///
25152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
25153#[inline]
25154#[target_feature(enable = "avx512f,avx512vl")]
25155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25156#[cfg_attr(test, assert_instr(vunpcklps))]
25157pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
25158    let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
25159    transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
25160}
25161
25162/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25163///
25164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
25165#[inline]
25166#[target_feature(enable = "avx512f,avx512vl")]
25167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25168#[cfg_attr(test, assert_instr(vunpcklps))]
25169pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
25170    let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
25171    transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
25172}
25173
25174/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25175///
25176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
25177#[inline]
25178#[target_feature(enable = "avx512f,avx512vl")]
25179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25180#[cfg_attr(test, assert_instr(vunpcklps))]
25181pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
25182    let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
25183    transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
25184}
25185
25186/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
25187///
25188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
25189#[inline]
25190#[target_feature(enable = "avx512f")]
25191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25192#[cfg_attr(test, assert_instr(vunpcklpd))]
25193pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
25194    simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
25195}
25196
25197/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25198///
25199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
25200#[inline]
25201#[target_feature(enable = "avx512f")]
25202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25203#[cfg_attr(test, assert_instr(vunpcklpd))]
25204pub unsafe fn _mm512_mask_unpacklo_pd(
25205    src: __m512d,
25206    k: __mmask8,
25207    a: __m512d,
25208    b: __m512d,
25209) -> __m512d {
25210    let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
25211    transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
25212}
25213
25214/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25215///
25216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
25217#[inline]
25218#[target_feature(enable = "avx512f")]
25219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25220#[cfg_attr(test, assert_instr(vunpcklpd))]
25221pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
25222    let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
25223    transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
25224}
25225
25226/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25227///
25228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
25229#[inline]
25230#[target_feature(enable = "avx512f,avx512vl")]
25231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25232#[cfg_attr(test, assert_instr(vunpcklpd))]
25233pub unsafe fn _mm256_mask_unpacklo_pd(
25234    src: __m256d,
25235    k: __mmask8,
25236    a: __m256d,
25237    b: __m256d,
25238) -> __m256d {
25239    let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
25240    transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
25241}
25242
25243/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25244///
25245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
25246#[inline]
25247#[target_feature(enable = "avx512f,avx512vl")]
25248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25249#[cfg_attr(test, assert_instr(vunpcklpd))]
25250pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
25251    let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
25252    transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
25253}
25254
25255/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25256///
25257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
25258#[inline]
25259#[target_feature(enable = "avx512f,avx512vl")]
25260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25261#[cfg_attr(test, assert_instr(vunpcklpd))]
25262pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25263    let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
25264    transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
25265}
25266
25267/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25268///
25269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
25270#[inline]
25271#[target_feature(enable = "avx512f,avx512vl")]
25272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25273#[cfg_attr(test, assert_instr(vunpcklpd))]
25274pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25275    let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
25276    transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
25277}
25278
25279/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25280///
25281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
25282#[inline]
25283#[target_feature(enable = "avx512f")]
25284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25285pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
25286    simd_shuffle!(
25287        a,
25288        _mm_undefined_ps(),
25289        [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
25290    )
25291}
25292
25293/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25294///
25295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
25296#[inline]
25297#[target_feature(enable = "avx512f")]
25298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25299pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
25300    simd_shuffle!(
25301        a,
25302        _mm256_undefined_ps(),
25303        [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
25304    )
25305}
25306
25307/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25308///
25309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
25310#[inline]
25311#[target_feature(enable = "avx512f")]
25312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25313pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
25314    simd_shuffle!(
25315        a,
25316        _mm_set1_ps(0.),
25317        [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
25318    )
25319}
25320
25321/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25322///
25323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
25324#[inline]
25325#[target_feature(enable = "avx512f")]
25326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25327pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
25328    simd_shuffle!(
25329        a,
25330        _mm256_set1_ps(0.),
25331        [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
25332    )
25333}
25334
25335/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25336///
25337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
25338#[inline]
25339#[target_feature(enable = "avx512f")]
25340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25341pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
25342    simd_shuffle!(a, a, [0, 1, 2, 3])
25343}
25344
25345/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25346///
25347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
25348#[inline]
25349#[target_feature(enable = "avx512f")]
25350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25351pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
25352    simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
25353}
25354
25355/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25356///
25357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
25358#[inline]
25359#[target_feature(enable = "avx512f")]
25360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25361pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
25362    transmute(a)
25363}
25364
25365/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25366///
25367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
25368#[inline]
25369#[target_feature(enable = "avx512f")]
25370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25371pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
25372    transmute(a)
25373}
25374
25375/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25376///
25377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
25378#[inline]
25379#[target_feature(enable = "avx512f")]
25380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25381pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
25382    simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2])
25383}
25384
25385/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25386///
25387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
25388#[inline]
25389#[target_feature(enable = "avx512f")]
25390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25391pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
25392    simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4])
25393}
25394
25395/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25396///
25397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
25398#[inline]
25399#[target_feature(enable = "avx512f")]
25400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25401pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
25402    simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
25403}
25404
25405/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25406///
25407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
25408#[inline]
25409#[target_feature(enable = "avx512f")]
25410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25411pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
25412    simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
25413}
25414
25415/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25416///
25417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
25418#[inline]
25419#[target_feature(enable = "avx512f")]
25420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25421pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
25422    simd_shuffle!(a, a, [0, 1])
25423}
25424
25425/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25426///
25427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
25428#[inline]
25429#[target_feature(enable = "avx512f")]
25430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25431pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
25432    simd_shuffle!(a, a, [0, 1, 2, 3])
25433}
25434
25435/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25436///
25437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
25438#[inline]
25439#[target_feature(enable = "avx512f")]
25440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25441pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
25442    transmute(a)
25443}
25444
25445/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25446///
25447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
25448#[inline]
25449#[target_feature(enable = "avx512f")]
25450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25451pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
25452    transmute(a)
25453}
25454
25455/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25456///
25457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
25458#[inline]
25459#[target_feature(enable = "avx512f")]
25460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25461pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
25462    simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2])
25463}
25464
25465/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25466///
25467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
25468#[inline]
25469#[target_feature(enable = "avx512f")]
25470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25471pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
25472    simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4])
25473}
25474
25475/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25476///
25477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
25478#[inline]
25479#[target_feature(enable = "avx512f")]
25480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25481pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
25482    simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2])
25483}
25484
25485/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25486///
25487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
25488#[inline]
25489#[target_feature(enable = "avx512f")]
25490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25491pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
25492    simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4])
25493}
25494
25495/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25496///
25497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
25498#[inline]
25499#[target_feature(enable = "avx512f")]
25500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25501pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
25502    simd_shuffle!(a, a, [0, 1])
25503}
25504
25505/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25506///
25507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
25508#[inline]
25509#[target_feature(enable = "avx512f")]
25510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25511pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
25512    simd_shuffle!(a, a, [0, 1, 2, 3])
25513}
25514
25515/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25516///
25517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
25518#[inline]
25519#[target_feature(enable = "avx512f")]
25520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25521pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
25522    transmute(a)
25523}
25524
25525/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
25526///
25527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
25528#[inline]
25529#[target_feature(enable = "avx512f")]
25530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25531pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
25532    transmute(a)
25533}
25534
25535/// Copy the lower 32-bit integer in a to dst.
25536///
25537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
25538#[inline]
25539#[target_feature(enable = "avx512f")]
25540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25541#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))]
25542pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
25543    simd_extract!(a.as_i32x16(), 0)
25544}
25545
25546/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
25547///
25548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
25549#[inline]
25550#[target_feature(enable = "avx512f")]
25551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25552pub unsafe fn _mm512_cvtss_f32(a: __m512) -> f32 {
25553    simd_extract!(a, 0)
25554}
25555
25556/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
25557///
25558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
25559#[inline]
25560#[target_feature(enable = "avx512f")]
25561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25562pub unsafe fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
25563    simd_extract!(a, 0)
25564}
25565
25566/// Broadcast the low packed 32-bit integer from a to all elements of dst.
25567///
25568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
25569#[inline]
25570#[target_feature(enable = "avx512f")]
25571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25572#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
25573pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
25574    let a = _mm512_castsi128_si512(a).as_i32x16();
25575    let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
25576    transmute(ret)
25577}
25578
25579/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25580///
25581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
25582#[inline]
25583#[target_feature(enable = "avx512f")]
25584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25585#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25586pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
25587    let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
25588    transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
25589}
25590
25591/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25592///
25593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
25594#[inline]
25595#[target_feature(enable = "avx512f")]
25596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25597#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25598pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
25599    let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
25600    transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
25601}
25602
25603/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25604///
25605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
25606#[inline]
25607#[target_feature(enable = "avx512f,avx512vl")]
25608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25609#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25610pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25611    let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
25612    transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
25613}
25614
25615/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25616///
25617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
25618#[inline]
25619#[target_feature(enable = "avx512f,avx512vl")]
25620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25621#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25622pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
25623    let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
25624    transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
25625}
25626
25627/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25628///
25629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
25630#[inline]
25631#[target_feature(enable = "avx512f,avx512vl")]
25632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25633#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25634pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
25635    let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
25636    transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
25637}
25638
25639/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25640///
25641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
25642#[inline]
25643#[target_feature(enable = "avx512f,avx512vl")]
25644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25645#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
25646pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
25647    let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
25648    transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
25649}
25650
25651/// Broadcast the low packed 64-bit integer from a to all elements of dst.
25652///
25653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
25654#[inline]
25655#[target_feature(enable = "avx512f")]
25656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25657#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
25658pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
25659    simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
25660}
25661
25662/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25663///
25664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
25665#[inline]
25666#[target_feature(enable = "avx512f")]
25667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25668#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25669pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
25670    let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
25671    transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
25672}
25673
25674/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25675///
25676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
25677#[inline]
25678#[target_feature(enable = "avx512f")]
25679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25680#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25681pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
25682    let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
25683    transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
25684}
25685
25686/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25687///
25688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
25689#[inline]
25690#[target_feature(enable = "avx512f,avx512vl")]
25691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25692#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25693pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25694    let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
25695    transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
25696}
25697
25698/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25699///
25700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
25701#[inline]
25702#[target_feature(enable = "avx512f,avx512vl")]
25703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25704#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25705pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
25706    let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
25707    transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
25708}
25709
25710/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25711///
25712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
25713#[inline]
25714#[target_feature(enable = "avx512f,avx512vl")]
25715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25716#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25717pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
25718    let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
25719    transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
25720}
25721
25722/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25723///
25724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
25725#[inline]
25726#[target_feature(enable = "avx512f,avx512vl")]
25727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25728#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
25729pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
25730    let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
25731    transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
25732}
25733
25734/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
25735///
25736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
25737#[inline]
25738#[target_feature(enable = "avx512f")]
25739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25740#[cfg_attr(test, assert_instr(vbroadcastss))]
25741pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
25742    simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
25743}
25744
25745/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25746///
25747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
25748#[inline]
25749#[target_feature(enable = "avx512f")]
25750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25751#[cfg_attr(test, assert_instr(vbroadcastss))]
25752pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
25753    let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
25754    transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
25755}
25756
25757/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25758///
25759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
25760#[inline]
25761#[target_feature(enable = "avx512f")]
25762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25763#[cfg_attr(test, assert_instr(vbroadcastss))]
25764pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
25765    let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
25766    transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
25767}
25768
25769/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25770///
25771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
25772#[inline]
25773#[target_feature(enable = "avx512f,avx512vl")]
25774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25775#[cfg_attr(test, assert_instr(vbroadcastss))]
25776pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
25777    let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
25778    transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
25779}
25780
25781/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25782///
25783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
25784#[inline]
25785#[target_feature(enable = "avx512f,avx512vl")]
25786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25787#[cfg_attr(test, assert_instr(vbroadcastss))]
25788pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
25789    let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
25790    transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
25791}
25792
25793/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25794///
25795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
25796#[inline]
25797#[target_feature(enable = "avx512f,avx512vl")]
25798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25799#[cfg_attr(test, assert_instr(vbroadcastss))]
25800pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25801    let broadcast = _mm_broadcastss_ps(a).as_f32x4();
25802    transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
25803}
25804
25805/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25806///
25807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
25808#[inline]
25809#[target_feature(enable = "avx512f,avx512vl")]
25810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25811#[cfg_attr(test, assert_instr(vbroadcastss))]
25812pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
25813    let broadcast = _mm_broadcastss_ps(a).as_f32x4();
25814    transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
25815}
25816
25817/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
25818///
25819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
25820#[inline]
25821#[target_feature(enable = "avx512f")]
25822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25823#[cfg_attr(test, assert_instr(vbroadcastsd))]
25824pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
25825    simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
25826}
25827
25828/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25829///
25830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
25831#[inline]
25832#[target_feature(enable = "avx512f")]
25833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25834#[cfg_attr(test, assert_instr(vbroadcastsd))]
25835pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
25836    let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
25837    transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
25838}
25839
25840/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25841///
25842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
25843#[inline]
25844#[target_feature(enable = "avx512f")]
25845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25846#[cfg_attr(test, assert_instr(vbroadcastsd))]
25847pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
25848    let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
25849    transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
25850}
25851
25852/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25853///
25854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
25855#[inline]
25856#[target_feature(enable = "avx512f,avx512vl")]
25857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25858#[cfg_attr(test, assert_instr(vbroadcastsd))]
25859pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
25860    let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
25861    transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
25862}
25863
25864/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25865///
25866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
25867#[inline]
25868#[target_feature(enable = "avx512f,avx512vl")]
25869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25870#[cfg_attr(test, assert_instr(vbroadcastsd))]
25871pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
25872    let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
25873    transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
25874}
25875
25876/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
25877///
25878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
25879#[inline]
25880#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25882pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
25883    let a = a.as_i32x4();
25884    let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
25885    transmute(ret)
25886}
25887
25888/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25889///
25890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
25891#[inline]
25892#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25894pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
25895    let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
25896    transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
25897}
25898
25899/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25900///
25901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
25902#[inline]
25903#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
25904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25905pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
25906    let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
25907    transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
25908}
25909
25910/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
25911///
25912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
25913#[inline]
25914#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25916pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
25917    let a = a.as_i32x4();
25918    let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
25919    transmute(ret)
25920}
25921
25922/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25923///
25924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
25925#[inline]
25926#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25928pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
25929    let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
25930    transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
25931}
25932
25933/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25934///
25935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
25936#[inline]
25937#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
25938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25939pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
25940    let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
25941    transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
25942}
25943
25944/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
25945///
25946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
25947#[inline]
25948#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25950pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
25951    simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
25952}
25953
25954/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25955///
25956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
25957#[inline]
25958#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25960pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
25961    let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
25962    transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
25963}
25964
25965/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25966///
25967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
25968#[inline]
25969#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
25970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25971pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
25972    let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
25973    transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
25974}
25975
25976/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
25977///
25978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
25979#[inline]
25980#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
25981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25982pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
25983    simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
25984}
25985
25986/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25987///
25988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
25989#[inline]
25990#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
25991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25992pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
25993    let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
25994    transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
25995}
25996
25997/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25998///
25999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
26000#[inline]
26001#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
26002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26003pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
26004    let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
26005    transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
26006}
26007
26008/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
26009///
26010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
26011#[inline]
26012#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
26013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
26015    simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
26016}
26017
26018/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26019///
26020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
26021#[inline]
26022#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
26023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26024pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
26025    let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
26026    transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
26027}
26028
26029/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26030///
26031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
26032#[inline]
26033#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
26034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26035pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
26036    let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
26037    transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
26038}
26039
26040/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
26041///
26042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
26043#[inline]
26044#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
26045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26046pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
26047    simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
26048}
26049
26050/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26051///
26052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
26053#[inline]
26054#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
26055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26056pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
26057    let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
26058    transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
26059}
26060
26061/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26062///
26063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
26064#[inline]
26065#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
26066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26067pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
26068    let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
26069    transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
26070}
26071
26072/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26073///
26074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
26075#[inline]
26076#[target_feature(enable = "avx512f")]
26077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26078#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26079pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26080    transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16()))
26081}
26082
26083/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26084///
26085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
26086#[inline]
26087#[target_feature(enable = "avx512f,avx512vl")]
26088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26089#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26090pub unsafe fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26091    transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8()))
26092}
26093
26094/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
26095///
26096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
26097#[inline]
26098#[target_feature(enable = "avx512f,avx512vl")]
26099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26100#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
26101pub unsafe fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26102    transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4()))
26103}
26104
26105/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26106///
26107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
26108#[inline]
26109#[target_feature(enable = "avx512f")]
26110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26111#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26112pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26113    transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8()))
26114}
26115
26116/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26117///
26118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
26119#[inline]
26120#[target_feature(enable = "avx512f,avx512vl")]
26121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26122#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26123pub unsafe fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26124    transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4()))
26125}
26126
26127/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
26128///
26129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
26130#[inline]
26131#[target_feature(enable = "avx512f,avx512vl")]
26132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26133#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
26134pub unsafe fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26135    transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2()))
26136}
26137
26138/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26139///
26140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
26141#[inline]
26142#[target_feature(enable = "avx512f")]
26143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26144#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26145pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26146    transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16()))
26147}
26148
26149/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26150///
26151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
26152#[inline]
26153#[target_feature(enable = "avx512f,avx512vl")]
26154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26155#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26156pub unsafe fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26157    transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8()))
26158}
26159
26160/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26166#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
26167pub unsafe fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26168    transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4()))
26169}
26170
26171/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26172///
26173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
26174#[inline]
26175#[target_feature(enable = "avx512f")]
26176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26177#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26178pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26179    transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8()))
26180}
26181
26182/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26183///
26184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
26185#[inline]
26186#[target_feature(enable = "avx512f,avx512vl")]
26187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26188#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26189pub unsafe fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26190    transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4()))
26191}
26192
26193/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
26194///
26195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
26196#[inline]
26197#[target_feature(enable = "avx512f,avx512vl")]
26198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26199#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
26200pub unsafe fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26201    transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2()))
26202}
26203
26204/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
26205///
26206/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
26207///
26208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
26209#[inline]
26210#[target_feature(enable = "avx512f")]
26211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26212#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26213#[rustc_legacy_const_generics(2)]
26214pub unsafe fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
26215    static_assert_uimm_bits!(IMM8, 8);
26216    let a = a.as_i32x16();
26217    let b = b.as_i32x16();
26218    let imm8: i32 = IMM8 % 16;
26219    let r: i32x16 = match imm8 {
26220        0 => simd_shuffle!(
26221            a,
26222            b,
26223            [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,],
26224        ),
26225        1 => simd_shuffle!(
26226            a,
26227            b,
26228            [17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,],
26229        ),
26230        2 => simd_shuffle!(
26231            a,
26232            b,
26233            [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
26234        ),
26235        3 => simd_shuffle!(
26236            a,
26237            b,
26238            [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
26239        ),
26240        4 => simd_shuffle!(
26241            a,
26242            b,
26243            [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
26244        ),
26245        5 => simd_shuffle!(
26246            a,
26247            b,
26248            [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
26249        ),
26250        6 => simd_shuffle!(
26251            a,
26252            b,
26253            [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
26254        ),
26255        7 => simd_shuffle!(
26256            a,
26257            b,
26258            [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
26259        ),
26260        8 => simd_shuffle!(
26261            a,
26262            b,
26263            [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
26264        ),
26265        9 => simd_shuffle!(
26266            a,
26267            b,
26268            [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
26269        ),
26270        10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
26271        11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
26272        12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
26273        13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
26274        14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
26275        15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
26276        _ => unreachable_unchecked(),
26277    };
26278    transmute(r)
26279}
26280
26281/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26282///
26283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
26284#[inline]
26285#[target_feature(enable = "avx512f")]
26286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26287#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26288#[rustc_legacy_const_generics(4)]
26289pub unsafe fn _mm512_mask_alignr_epi32<const IMM8: i32>(
26290    src: __m512i,
26291    k: __mmask16,
26292    a: __m512i,
26293    b: __m512i,
26294) -> __m512i {
26295    static_assert_uimm_bits!(IMM8, 8);
26296    let r = _mm512_alignr_epi32::<IMM8>(a, b);
26297    transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
26298}
26299
26300/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26301///
26302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
26303#[inline]
26304#[target_feature(enable = "avx512f")]
26305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26306#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26307#[rustc_legacy_const_generics(3)]
26308pub unsafe fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
26309    k: __mmask16,
26310    a: __m512i,
26311    b: __m512i,
26312) -> __m512i {
26313    static_assert_uimm_bits!(IMM8, 8);
26314    let r = _mm512_alignr_epi32::<IMM8>(a, b);
26315    transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
26316}
26317
26318/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
26319///
26320/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
26321///
26322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
26323#[inline]
26324#[target_feature(enable = "avx512f,avx512vl")]
26325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26326#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26327#[rustc_legacy_const_generics(2)]
26328pub unsafe fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
26329    static_assert_uimm_bits!(IMM8, 8);
26330    let a = a.as_i32x8();
26331    let b = b.as_i32x8();
26332    let imm8: i32 = IMM8 % 8;
26333    let r: i32x8 = match imm8 {
26334        0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
26335        1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
26336        2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
26337        3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
26338        4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
26339        5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
26340        6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
26341        7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
26342        _ => unreachable_unchecked(),
26343    };
26344    transmute(r)
26345}
26346
26347/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26348///
26349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
26350#[inline]
26351#[target_feature(enable = "avx512f,avx512vl")]
26352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26353#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26354#[rustc_legacy_const_generics(4)]
26355pub unsafe fn _mm256_mask_alignr_epi32<const IMM8: i32>(
26356    src: __m256i,
26357    k: __mmask8,
26358    a: __m256i,
26359    b: __m256i,
26360) -> __m256i {
26361    static_assert_uimm_bits!(IMM8, 8);
26362    let r = _mm256_alignr_epi32::<IMM8>(a, b);
26363    transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
26364}
26365
26366/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26367///
26368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
26369#[inline]
26370#[target_feature(enable = "avx512f,avx512vl")]
26371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26372#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26373#[rustc_legacy_const_generics(3)]
26374pub unsafe fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
26375    k: __mmask8,
26376    a: __m256i,
26377    b: __m256i,
26378) -> __m256i {
26379    static_assert_uimm_bits!(IMM8, 8);
26380    let r = _mm256_alignr_epi32::<IMM8>(a, b);
26381    transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
26382}
26383
26384/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
26385///
26386/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
26387///
26388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
26389#[inline]
26390#[target_feature(enable = "avx512f,avx512vl")]
26391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26392#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
26393#[rustc_legacy_const_generics(2)]
26394pub unsafe fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
26395    static_assert_uimm_bits!(IMM8, 8);
26396    let a = a.as_i32x4();
26397    let b = b.as_i32x4();
26398    let imm8: i32 = IMM8 % 4;
26399    let r: i32x4 = match imm8 {
26400        0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
26401        1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
26402        2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
26403        3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
26404        _ => unreachable_unchecked(),
26405    };
26406    transmute(r)
26407}
26408
26409/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26410///
26411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
26412#[inline]
26413#[target_feature(enable = "avx512f,avx512vl")]
26414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26415#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26416#[rustc_legacy_const_generics(4)]
26417pub unsafe fn _mm_mask_alignr_epi32<const IMM8: i32>(
26418    src: __m128i,
26419    k: __mmask8,
26420    a: __m128i,
26421    b: __m128i,
26422) -> __m128i {
26423    static_assert_uimm_bits!(IMM8, 8);
26424    let r = _mm_alignr_epi32::<IMM8>(a, b);
26425    transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26426}
26427
26428/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26429///
26430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
26431#[inline]
26432#[target_feature(enable = "avx512f,avx512vl")]
26433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26434#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
26435#[rustc_legacy_const_generics(3)]
26436pub unsafe fn _mm_maskz_alignr_epi32<const IMM8: i32>(
26437    k: __mmask8,
26438    a: __m128i,
26439    b: __m128i,
26440) -> __m128i {
26441    static_assert_uimm_bits!(IMM8, 8);
26442    let r = _mm_alignr_epi32::<IMM8>(a, b);
26443    transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26444}
26445
26446/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
26447///
26448/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
26449///
26450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
26451#[inline]
26452#[target_feature(enable = "avx512f")]
26453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26454#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26455#[rustc_legacy_const_generics(2)]
26456pub unsafe fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
26457    static_assert_uimm_bits!(IMM8, 8);
26458    let imm8: i32 = IMM8 % 8;
26459    let r: i64x8 = match imm8 {
26460        0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
26461        1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
26462        2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
26463        3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
26464        4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
26465        5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
26466        6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
26467        7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
26468        _ => unreachable_unchecked(),
26469    };
26470    transmute(r)
26471}
26472
26473/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26474///
26475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
26476#[inline]
26477#[target_feature(enable = "avx512f")]
26478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26479#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26480#[rustc_legacy_const_generics(4)]
26481pub unsafe fn _mm512_mask_alignr_epi64<const IMM8: i32>(
26482    src: __m512i,
26483    k: __mmask8,
26484    a: __m512i,
26485    b: __m512i,
26486) -> __m512i {
26487    static_assert_uimm_bits!(IMM8, 8);
26488    let r = _mm512_alignr_epi64::<IMM8>(a, b);
26489    transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
26490}
26491
26492/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26493///
26494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
26495#[inline]
26496#[target_feature(enable = "avx512f")]
26497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26498#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26499#[rustc_legacy_const_generics(3)]
26500pub unsafe fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
26501    k: __mmask8,
26502    a: __m512i,
26503    b: __m512i,
26504) -> __m512i {
26505    static_assert_uimm_bits!(IMM8, 8);
26506    let r = _mm512_alignr_epi64::<IMM8>(a, b);
26507    transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
26508}
26509
26510/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
26511///
26512/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
26513///
26514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
26515#[inline]
26516#[target_feature(enable = "avx512f,avx512vl")]
26517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26518#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26519#[rustc_legacy_const_generics(2)]
26520pub unsafe fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
26521    static_assert_uimm_bits!(IMM8, 8);
26522    let imm8: i32 = IMM8 % 4;
26523    let r: i64x4 = match imm8 {
26524        0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
26525        1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
26526        2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
26527        3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
26528        _ => unreachable_unchecked(),
26529    };
26530    transmute(r)
26531}
26532
26533/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26534///
26535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
26536#[inline]
26537#[target_feature(enable = "avx512f,avx512vl")]
26538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26539#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26540#[rustc_legacy_const_generics(4)]
26541pub unsafe fn _mm256_mask_alignr_epi64<const IMM8: i32>(
26542    src: __m256i,
26543    k: __mmask8,
26544    a: __m256i,
26545    b: __m256i,
26546) -> __m256i {
26547    static_assert_uimm_bits!(IMM8, 8);
26548    let r = _mm256_alignr_epi64::<IMM8>(a, b);
26549    transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
26550}
26551
26552/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26553///
26554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
26555#[inline]
26556#[target_feature(enable = "avx512f,avx512vl")]
26557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26558#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26559#[rustc_legacy_const_generics(3)]
26560pub unsafe fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
26561    k: __mmask8,
26562    a: __m256i,
26563    b: __m256i,
26564) -> __m256i {
26565    static_assert_uimm_bits!(IMM8, 8);
26566    let r = _mm256_alignr_epi64::<IMM8>(a, b);
26567    transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
26568}
26569
26570/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
26571///
26572/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
26573///
26574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
26575#[inline]
26576#[target_feature(enable = "avx512f,avx512vl")]
26577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26578#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
26579#[rustc_legacy_const_generics(2)]
26580pub unsafe fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
26581    static_assert_uimm_bits!(IMM8, 8);
26582    let imm8: i32 = IMM8 % 2;
26583    let r: i64x2 = match imm8 {
26584        0 => simd_shuffle!(a, b, [2, 3]),
26585        1 => simd_shuffle!(a, b, [3, 0]),
26586        _ => unreachable_unchecked(),
26587    };
26588    transmute(r)
26589}
26590
26591/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26592///
26593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
26594#[inline]
26595#[target_feature(enable = "avx512f,avx512vl")]
26596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26597#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26598#[rustc_legacy_const_generics(4)]
26599pub unsafe fn _mm_mask_alignr_epi64<const IMM8: i32>(
26600    src: __m128i,
26601    k: __mmask8,
26602    a: __m128i,
26603    b: __m128i,
26604) -> __m128i {
26605    static_assert_uimm_bits!(IMM8, 8);
26606    let r = _mm_alignr_epi64::<IMM8>(a, b);
26607    transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
26608}
26609
26610/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26611///
26612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
26613#[inline]
26614#[target_feature(enable = "avx512f,avx512vl")]
26615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26616#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
26617#[rustc_legacy_const_generics(3)]
26618pub unsafe fn _mm_maskz_alignr_epi64<const IMM8: i32>(
26619    k: __mmask8,
26620    a: __m128i,
26621    b: __m128i,
26622) -> __m128i {
26623    static_assert_uimm_bits!(IMM8, 8);
26624    let r = _mm_alignr_epi64::<IMM8>(a, b);
26625    transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
26626}
26627
26628/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
26629///
26630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
26631#[inline]
26632#[target_feature(enable = "avx512f")]
26633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26634#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
26635pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
26636    transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
26637}
26638
26639/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26640///
26641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
26642#[inline]
26643#[target_feature(enable = "avx512f")]
26644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26645#[cfg_attr(test, assert_instr(vpandd))]
26646pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26647    let and = _mm512_and_epi32(a, b).as_i32x16();
26648    transmute(simd_select_bitmask(k, and, src.as_i32x16()))
26649}
26650
26651/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26652///
26653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
26654#[inline]
26655#[target_feature(enable = "avx512f")]
26656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26657#[cfg_attr(test, assert_instr(vpandd))]
26658pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26659    let and = _mm512_and_epi32(a, b).as_i32x16();
26660    transmute(simd_select_bitmask(k, and, i32x16::ZERO))
26661}
26662
26663/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26664///
26665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
26666#[inline]
26667#[target_feature(enable = "avx512f,avx512vl")]
26668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26669#[cfg_attr(test, assert_instr(vpandd))]
26670pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26671    let and = simd_and(a.as_i32x8(), b.as_i32x8());
26672    transmute(simd_select_bitmask(k, and, src.as_i32x8()))
26673}
26674
26675/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26676///
26677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
26678#[inline]
26679#[target_feature(enable = "avx512f,avx512vl")]
26680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26681#[cfg_attr(test, assert_instr(vpandd))]
26682pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26683    let and = simd_and(a.as_i32x8(), b.as_i32x8());
26684    transmute(simd_select_bitmask(k, and, i32x8::ZERO))
26685}
26686
26687/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26688///
26689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
26690#[inline]
26691#[target_feature(enable = "avx512f,avx512vl")]
26692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26693#[cfg_attr(test, assert_instr(vpandd))]
26694pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26695    let and = simd_and(a.as_i32x4(), b.as_i32x4());
26696    transmute(simd_select_bitmask(k, and, src.as_i32x4()))
26697}
26698
26699/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26700///
26701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
26702#[inline]
26703#[target_feature(enable = "avx512f,avx512vl")]
26704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26705#[cfg_attr(test, assert_instr(vpandd))]
26706pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26707    let and = simd_and(a.as_i32x4(), b.as_i32x4());
26708    transmute(simd_select_bitmask(k, and, i32x4::ZERO))
26709}
26710
26711/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
26712///
26713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
26714#[inline]
26715#[target_feature(enable = "avx512f")]
26716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26717#[cfg_attr(test, assert_instr(vpandq))]
26718pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
26719    transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
26720}
26721
26722/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26723///
26724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
26725#[inline]
26726#[target_feature(enable = "avx512f")]
26727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26728#[cfg_attr(test, assert_instr(vpandq))]
26729pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26730    let and = _mm512_and_epi64(a, b).as_i64x8();
26731    transmute(simd_select_bitmask(k, and, src.as_i64x8()))
26732}
26733
26734/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26735///
26736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
26737#[inline]
26738#[target_feature(enable = "avx512f")]
26739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26740#[cfg_attr(test, assert_instr(vpandq))]
26741pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26742    let and = _mm512_and_epi64(a, b).as_i64x8();
26743    transmute(simd_select_bitmask(k, and, i64x8::ZERO))
26744}
26745
26746/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26747///
26748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
26749#[inline]
26750#[target_feature(enable = "avx512f,avx512vl")]
26751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26752#[cfg_attr(test, assert_instr(vpandq))]
26753pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26754    let and = simd_and(a.as_i64x4(), b.as_i64x4());
26755    transmute(simd_select_bitmask(k, and, src.as_i64x4()))
26756}
26757
26758/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26759///
26760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
26761#[inline]
26762#[target_feature(enable = "avx512f,avx512vl")]
26763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26764#[cfg_attr(test, assert_instr(vpandq))]
26765pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26766    let and = simd_and(a.as_i64x4(), b.as_i64x4());
26767    transmute(simd_select_bitmask(k, and, i64x4::ZERO))
26768}
26769
26770/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26771///
26772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
26773#[inline]
26774#[target_feature(enable = "avx512f,avx512vl")]
26775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26776#[cfg_attr(test, assert_instr(vpandq))]
26777pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26778    let and = simd_and(a.as_i64x2(), b.as_i64x2());
26779    transmute(simd_select_bitmask(k, and, src.as_i64x2()))
26780}
26781
26782/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26783///
26784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
26785#[inline]
26786#[target_feature(enable = "avx512f,avx512vl")]
26787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26788#[cfg_attr(test, assert_instr(vpandq))]
26789pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26790    let and = simd_and(a.as_i64x2(), b.as_i64x2());
26791    transmute(simd_select_bitmask(k, and, i64x2::ZERO))
26792}
26793
26794/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
26795///
26796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
26797#[inline]
26798#[target_feature(enable = "avx512f")]
26799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26800#[cfg_attr(test, assert_instr(vpandq))]
26801pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
26802    transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
26803}
26804
26805/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26806///
26807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
26808#[inline]
26809#[target_feature(enable = "avx512f")]
26810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26811#[cfg_attr(test, assert_instr(vporq))]
26812pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
26813    transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
26814}
26815
26816/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26817///
26818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
26819#[inline]
26820#[target_feature(enable = "avx512f")]
26821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26822#[cfg_attr(test, assert_instr(vpord))]
26823pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26824    let or = _mm512_or_epi32(a, b).as_i32x16();
26825    transmute(simd_select_bitmask(k, or, src.as_i32x16()))
26826}
26827
26828/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26829///
26830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
26831#[inline]
26832#[target_feature(enable = "avx512f")]
26833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26834#[cfg_attr(test, assert_instr(vpord))]
26835pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26836    let or = _mm512_or_epi32(a, b).as_i32x16();
26837    transmute(simd_select_bitmask(k, or, i32x16::ZERO))
26838}
26839
26840/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26841///
26842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
26843#[inline]
26844#[target_feature(enable = "avx512f,avx512vl")]
26845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26846#[cfg_attr(test, assert_instr(vor))] //should be vpord
26847pub unsafe fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
26848    transmute(simd_or(a.as_i32x8(), b.as_i32x8()))
26849}
26850
26851/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
26854#[inline]
26855#[target_feature(enable = "avx512f,avx512vl")]
26856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857#[cfg_attr(test, assert_instr(vpord))]
26858pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26859    let or = _mm256_or_epi32(a, b).as_i32x8();
26860    transmute(simd_select_bitmask(k, or, src.as_i32x8()))
26861}
26862
26863/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26864///
26865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
26866#[inline]
26867#[target_feature(enable = "avx512f,avx512vl")]
26868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26869#[cfg_attr(test, assert_instr(vpord))]
26870pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26871    let or = _mm256_or_epi32(a, b).as_i32x8();
26872    transmute(simd_select_bitmask(k, or, i32x8::ZERO))
26873}
26874
26875/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
26876///
26877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
26878#[inline]
26879#[target_feature(enable = "avx512f,avx512vl")]
26880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26881#[cfg_attr(test, assert_instr(vor))] //should be vpord
26882pub unsafe fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
26883    transmute(simd_or(a.as_i32x4(), b.as_i32x4()))
26884}
26885
26886/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26887///
26888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
26889#[inline]
26890#[target_feature(enable = "avx512f,avx512vl")]
26891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26892#[cfg_attr(test, assert_instr(vpord))]
26893pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26894    let or = _mm_or_epi32(a, b).as_i32x4();
26895    transmute(simd_select_bitmask(k, or, src.as_i32x4()))
26896}
26897
26898/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26899///
26900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
26901#[inline]
26902#[target_feature(enable = "avx512f,avx512vl")]
26903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26904#[cfg_attr(test, assert_instr(vpord))]
26905pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26906    let or = _mm_or_epi32(a, b).as_i32x4();
26907    transmute(simd_select_bitmask(k, or, i32x4::ZERO))
26908}
26909
26910/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26911///
26912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
26913#[inline]
26914#[target_feature(enable = "avx512f")]
26915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26916#[cfg_attr(test, assert_instr(vporq))]
26917pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
26918    transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
26919}
26920
26921/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26927#[cfg_attr(test, assert_instr(vporq))]
26928pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26929    let or = _mm512_or_epi64(a, b).as_i64x8();
26930    transmute(simd_select_bitmask(k, or, src.as_i64x8()))
26931}
26932
26933/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26934///
26935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
26936#[inline]
26937#[target_feature(enable = "avx512f")]
26938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26939#[cfg_attr(test, assert_instr(vporq))]
26940pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26941    let or = _mm512_or_epi64(a, b).as_i64x8();
26942    transmute(simd_select_bitmask(k, or, i64x8::ZERO))
26943}
26944
26945/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26946///
26947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
26948#[inline]
26949#[target_feature(enable = "avx512f,avx512vl")]
26950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26951#[cfg_attr(test, assert_instr(vor))] //should be vporq
26952pub unsafe fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
26953    transmute(simd_or(a.as_i64x4(), b.as_i64x4()))
26954}
26955
26956/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26957///
26958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
26959#[inline]
26960#[target_feature(enable = "avx512f,avx512vl")]
26961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26962#[cfg_attr(test, assert_instr(vporq))]
26963pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26964    let or = _mm256_or_epi64(a, b).as_i64x4();
26965    transmute(simd_select_bitmask(k, or, src.as_i64x4()))
26966}
26967
26968/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26969///
26970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
26971#[inline]
26972#[target_feature(enable = "avx512f,avx512vl")]
26973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26974#[cfg_attr(test, assert_instr(vporq))]
26975pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26976    let or = _mm256_or_epi64(a, b).as_i64x4();
26977    transmute(simd_select_bitmask(k, or, i64x4::ZERO))
26978}
26979
26980/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
26981///
26982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
26983#[inline]
26984#[target_feature(enable = "avx512f,avx512vl")]
26985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26986#[cfg_attr(test, assert_instr(vor))] //should be vporq
26987pub unsafe fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
26988    transmute(simd_or(a.as_i64x2(), b.as_i64x2()))
26989}
26990
26991/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26992///
26993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
26994#[inline]
26995#[target_feature(enable = "avx512f,avx512vl")]
26996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26997#[cfg_attr(test, assert_instr(vporq))]
26998pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26999    let or = _mm_or_epi64(a, b).as_i64x2();
27000    transmute(simd_select_bitmask(k, or, src.as_i64x2()))
27001}
27002
27003/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27004///
27005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
27006#[inline]
27007#[target_feature(enable = "avx512f,avx512vl")]
27008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27009#[cfg_attr(test, assert_instr(vporq))]
27010pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27011    let or = _mm_or_epi64(a, b).as_i64x2();
27012    transmute(simd_select_bitmask(k, or, i64x2::ZERO))
27013}
27014
27015/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
27016///
27017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
27018#[inline]
27019#[target_feature(enable = "avx512f")]
27020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27021#[cfg_attr(test, assert_instr(vporq))]
27022pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
27023    transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
27024}
27025
27026/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27027///
27028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
27029#[inline]
27030#[target_feature(enable = "avx512f")]
27031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27032#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
27033pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
27034    transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
27035}
27036
27037/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27038///
27039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
27040#[inline]
27041#[target_feature(enable = "avx512f")]
27042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27043#[cfg_attr(test, assert_instr(vpxord))]
27044pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27045    let xor = _mm512_xor_epi32(a, b).as_i32x16();
27046    transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
27047}
27048
27049/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27050///
27051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
27052#[inline]
27053#[target_feature(enable = "avx512f")]
27054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27055#[cfg_attr(test, assert_instr(vpxord))]
27056pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27057    let xor = _mm512_xor_epi32(a, b).as_i32x16();
27058    transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
27059}
27060
27061/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27062///
27063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
27064#[inline]
27065#[target_feature(enable = "avx512f,avx512vl")]
27066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27067#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
27068pub unsafe fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
27069    transmute(simd_xor(a.as_i32x8(), b.as_i32x8()))
27070}
27071
27072/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27073///
27074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
27075#[inline]
27076#[target_feature(enable = "avx512f,avx512vl")]
27077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27078#[cfg_attr(test, assert_instr(vpxord))]
27079pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27080    let xor = _mm256_xor_epi32(a, b).as_i32x8();
27081    transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
27082}
27083
27084/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27085///
27086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
27087#[inline]
27088#[target_feature(enable = "avx512f,avx512vl")]
27089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27090#[cfg_attr(test, assert_instr(vpxord))]
27091pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27092    let xor = _mm256_xor_epi32(a, b).as_i32x8();
27093    transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
27094}
27095
27096/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
27097///
27098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
27099#[inline]
27100#[target_feature(enable = "avx512f,avx512vl")]
27101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27102#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
27103pub unsafe fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
27104    transmute(simd_xor(a.as_i32x4(), b.as_i32x4()))
27105}
27106
27107/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27108///
27109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
27110#[inline]
27111#[target_feature(enable = "avx512f,avx512vl")]
27112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27113#[cfg_attr(test, assert_instr(vpxord))]
27114pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27115    let xor = _mm_xor_epi32(a, b).as_i32x4();
27116    transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
27117}
27118
27119/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27120///
27121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
27122#[inline]
27123#[target_feature(enable = "avx512f,avx512vl")]
27124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27125#[cfg_attr(test, assert_instr(vpxord))]
27126pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27127    let xor = _mm_xor_epi32(a, b).as_i32x4();
27128    transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
27129}
27130
27131/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27132///
27133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
27134#[inline]
27135#[target_feature(enable = "avx512f")]
27136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27137#[cfg_attr(test, assert_instr(vpxorq))]
27138pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
27139    transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
27140}
27141
27142/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27143///
27144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
27145#[inline]
27146#[target_feature(enable = "avx512f")]
27147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27148#[cfg_attr(test, assert_instr(vpxorq))]
27149pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27150    let xor = _mm512_xor_epi64(a, b).as_i64x8();
27151    transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
27152}
27153
27154/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27155///
27156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
27157#[inline]
27158#[target_feature(enable = "avx512f")]
27159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27160#[cfg_attr(test, assert_instr(vpxorq))]
27161pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27162    let xor = _mm512_xor_epi64(a, b).as_i64x8();
27163    transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
27164}
27165
27166/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
27169#[inline]
27170#[target_feature(enable = "avx512f,avx512vl")]
27171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
27173pub unsafe fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
27174    transmute(simd_xor(a.as_i64x4(), b.as_i64x4()))
27175}
27176
27177/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178///
27179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
27180#[inline]
27181#[target_feature(enable = "avx512f,avx512vl")]
27182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183#[cfg_attr(test, assert_instr(vpxorq))]
27184pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27185    let xor = _mm256_xor_epi64(a, b).as_i64x4();
27186    transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
27187}
27188
27189/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27190///
27191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
27192#[inline]
27193#[target_feature(enable = "avx512f,avx512vl")]
27194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27195#[cfg_attr(test, assert_instr(vpxorq))]
27196pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27197    let xor = _mm256_xor_epi64(a, b).as_i64x4();
27198    transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
27199}
27200
27201/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
27202///
27203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
27204#[inline]
27205#[target_feature(enable = "avx512f,avx512vl")]
27206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27207#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
27208pub unsafe fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
27209    transmute(simd_xor(a.as_i64x2(), b.as_i64x2()))
27210}
27211
27212/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27213///
27214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
27215#[inline]
27216#[target_feature(enable = "avx512f,avx512vl")]
27217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27218#[cfg_attr(test, assert_instr(vpxorq))]
27219pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27220    let xor = _mm_xor_epi64(a, b).as_i64x2();
27221    transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
27222}
27223
27224/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27225///
27226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
27227#[inline]
27228#[target_feature(enable = "avx512f,avx512vl")]
27229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27230#[cfg_attr(test, assert_instr(vpxorq))]
27231pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27232    let xor = _mm_xor_epi64(a, b).as_i64x2();
27233    transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
27234}
27235
27236/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
27237///
27238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
27239#[inline]
27240#[target_feature(enable = "avx512f")]
27241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27242#[cfg_attr(test, assert_instr(vpxorq))]
27243pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
27244    transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
27245}
27246
27247/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
27248///
27249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
27250#[inline]
27251#[target_feature(enable = "avx512f")]
27252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27253#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
27254pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
27255    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
27256}
27257
27258/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27259///
27260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
27261#[inline]
27262#[target_feature(enable = "avx512f")]
27263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27264#[cfg_attr(test, assert_instr(vpandnd))]
27265pub unsafe fn _mm512_mask_andnot_epi32(
27266    src: __m512i,
27267    k: __mmask16,
27268    a: __m512i,
27269    b: __m512i,
27270) -> __m512i {
27271    let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
27272    transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
27273}
27274
27275/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27276///
27277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
27278#[inline]
27279#[target_feature(enable = "avx512f")]
27280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27281#[cfg_attr(test, assert_instr(vpandnd))]
27282pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27283    let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
27284    transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
27285}
27286
27287/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27288///
27289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
27290#[inline]
27291#[target_feature(enable = "avx512f,avx512vl")]
27292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27293#[cfg_attr(test, assert_instr(vpandnd))]
27294pub unsafe fn _mm256_mask_andnot_epi32(
27295    src: __m256i,
27296    k: __mmask8,
27297    a: __m256i,
27298    b: __m256i,
27299) -> __m256i {
27300    let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
27301    let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
27302    transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
27303}
27304
27305/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27306///
27307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
27308#[inline]
27309#[target_feature(enable = "avx512f,avx512vl")]
27310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27311#[cfg_attr(test, assert_instr(vpandnd))]
27312pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27313    let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
27314    let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
27315    transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
27316}
27317
27318/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27319///
27320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
27321#[inline]
27322#[target_feature(enable = "avx512f,avx512vl")]
27323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27324#[cfg_attr(test, assert_instr(vpandnd))]
27325pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27326    let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
27327    let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
27328    transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
27329}
27330
27331/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27332///
27333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
27334#[inline]
27335#[target_feature(enable = "avx512f,avx512vl")]
27336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27337#[cfg_attr(test, assert_instr(vpandnd))]
27338pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27339    let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
27340    let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
27341    transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
27342}
27343
27344/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
27345///
27346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
27347#[inline]
27348#[target_feature(enable = "avx512f")]
27349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27350#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
27351pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
27352    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
27353}
27354
27355/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27356///
27357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
27358#[inline]
27359#[target_feature(enable = "avx512f")]
27360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27361#[cfg_attr(test, assert_instr(vpandnq))]
27362pub unsafe fn _mm512_mask_andnot_epi64(
27363    src: __m512i,
27364    k: __mmask8,
27365    a: __m512i,
27366    b: __m512i,
27367) -> __m512i {
27368    let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
27369    transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
27370}
27371
27372/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27373///
27374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
27375#[inline]
27376#[target_feature(enable = "avx512f")]
27377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27378#[cfg_attr(test, assert_instr(vpandnq))]
27379pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27380    let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
27381    transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
27382}
27383
27384/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27385///
27386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
27387#[inline]
27388#[target_feature(enable = "avx512f,avx512vl")]
27389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27390#[cfg_attr(test, assert_instr(vpandnq))]
27391pub unsafe fn _mm256_mask_andnot_epi64(
27392    src: __m256i,
27393    k: __mmask8,
27394    a: __m256i,
27395    b: __m256i,
27396) -> __m256i {
27397    let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
27398    let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
27399    transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
27400}
27401
27402/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27403///
27404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
27405#[inline]
27406#[target_feature(enable = "avx512f,avx512vl")]
27407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27408#[cfg_attr(test, assert_instr(vpandnq))]
27409pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27410    let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
27411    let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
27412    transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
27413}
27414
27415/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27416///
27417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
27418#[inline]
27419#[target_feature(enable = "avx512f,avx512vl")]
27420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27421#[cfg_attr(test, assert_instr(vpandnq))]
27422pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27423    let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
27424    let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
27425    transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
27426}
27427
27428/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27429///
27430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
27431#[inline]
27432#[target_feature(enable = "avx512f,avx512vl")]
27433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27434#[cfg_attr(test, assert_instr(vpandnq))]
27435pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27436    let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
27437    let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
27438    transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
27439}
27440
27441/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
27442///
27443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
27444#[inline]
27445#[target_feature(enable = "avx512f")]
27446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27447#[cfg_attr(test, assert_instr(vpandnq))]
27448pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
27449    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
27450}
27451
27452/// Convert 16-bit mask a into an integer value, and store the result in dst.
27453///
27454/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
27455#[inline]
27456#[target_feature(enable = "avx512f")]
27457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27458pub unsafe fn _cvtmask16_u32(a: __mmask16) -> u32 {
27459    a as u32
27460}
27461
27462/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
27463///
27464/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
27465#[inline]
27466#[target_feature(enable = "avx512f")]
27467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27468pub unsafe fn _cvtu32_mask16(a: u32) -> __mmask16 {
27469    a as __mmask16
27470}
27471
27472/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
27473///
27474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
27475#[inline]
27476#[target_feature(enable = "avx512f")]
27477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27478#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
27479pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27480    a & b
27481}
27482
27483/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
27484///
27485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
27486#[inline]
27487#[target_feature(enable = "avx512f")]
27488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27489#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
27490pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
27491    a & b
27492}
27493
27494/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
27495///
27496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
27497#[inline]
27498#[target_feature(enable = "avx512f")]
27499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27500#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
27501pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27502    a | b
27503}
27504
27505/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
27506///
27507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
27508#[inline]
27509#[target_feature(enable = "avx512f")]
27510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27511#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
27512pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
27513    a | b
27514}
27515
27516/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
27517///
27518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
27519#[inline]
27520#[target_feature(enable = "avx512f")]
27521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27522#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
27523pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27524    a ^ b
27525}
27526
27527/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
27528///
27529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
27530#[inline]
27531#[target_feature(enable = "avx512f")]
27532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27533#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
27534pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
27535    a ^ b
27536}
27537
27538/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
27539///
27540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
27541#[inline]
27542#[target_feature(enable = "avx512f")]
27543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27544pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
27545    a ^ 0b11111111_11111111
27546}
27547
27548/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
27549///
27550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
27551#[inline]
27552#[target_feature(enable = "avx512f")]
27553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27554pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
27555    a ^ 0b11111111_11111111
27556}
27557
27558/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
27559///
27560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
27561#[inline]
27562#[target_feature(enable = "avx512f")]
27563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27564#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
27565pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27566    _mm512_kand(_mm512_knot(a), b)
27567}
27568
27569/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
27570///
27571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
27572#[inline]
27573#[target_feature(enable = "avx512f")]
27574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27575#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
27576pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
27577    _mm512_kand(_mm512_knot(a), b)
27578}
27579
27580/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
27581///
27582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
27583#[inline]
27584#[target_feature(enable = "avx512f")]
27585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27586#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
27587pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
27588    _mm512_knot(_mm512_kxor(a, b))
27589}
27590
27591/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
27592///
27593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
27594#[inline]
27595#[target_feature(enable = "avx512f")]
27596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27597#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
27598pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
27599    _mm512_knot(_mm512_kxor(a, b))
27600}
27601
27602/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
27603/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
27604///
27605/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
27606#[inline]
27607#[target_feature(enable = "avx512f")]
27608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27609pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
27610    let tmp = _kor_mask16(a, b);
27611    *all_ones = (tmp == 0xffff) as u8;
27612    (tmp == 0) as u8
27613}
27614
27615/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
27616/// store 0 in dst.
27617///
27618/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
27619#[inline]
27620#[target_feature(enable = "avx512f")]
27621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27622pub unsafe fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
27623    (_kor_mask16(a, b) == 0xffff) as u8
27624}
27625
27626/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
27627/// store 0 in dst.
27628///
27629/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
27630#[inline]
27631#[target_feature(enable = "avx512f")]
27632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27633pub unsafe fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
27634    (_kor_mask16(a, b) == 0) as u8
27635}
27636
27637/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
27638///
27639/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
27640#[inline]
27641#[target_feature(enable = "avx512f")]
27642#[rustc_legacy_const_generics(1)]
27643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27644pub unsafe fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
27645    a << COUNT
27646}
27647
27648/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
27649///
27650/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
27651#[inline]
27652#[target_feature(enable = "avx512f")]
27653#[rustc_legacy_const_generics(1)]
27654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27655pub unsafe fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
27656    a >> COUNT
27657}
27658
27659/// Load 16-bit mask from memory
27660///
27661/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
27662#[inline]
27663#[target_feature(enable = "avx512f")]
27664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27665pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
27666    *mem_addr
27667}
27668
27669/// Store 16-bit mask to memory
27670///
27671/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
27672#[inline]
27673#[target_feature(enable = "avx512f")]
27674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27675pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
27676    *mem_addr = a;
27677}
27678
27679/// Copy 16-bit mask a to k.
27680///
27681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
27682#[inline]
27683#[target_feature(enable = "avx512f")]
27684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27685#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
27686pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
27687    a
27688}
27689
27690/// Converts integer mask into bitmask, storing the result in dst.
27691///
27692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
27693#[inline]
27694#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
27695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27696pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
27697    mask as u16
27698}
27699
27700/// Converts bit mask k1 into an integer value, storing the results in dst.
27701///
27702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
27703#[inline]
27704#[target_feature(enable = "avx512f")]
27705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27706#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
27707pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
27708    k1 as i32
27709}
27710
27711/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
27712///
27713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
27714#[inline]
27715#[target_feature(enable = "avx512f")]
27716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27717#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
27718pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
27719    ((a & 0xff) << 8) | (b & 0xff)
27720}
27721
27722/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
27723///
27724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
27725#[inline]
27726#[target_feature(enable = "avx512f")]
27727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27728#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
27729pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
27730    let r = (a | b) == 0b11111111_11111111;
27731    r as i32
27732}
27733
27734/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
27735///
27736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
27737#[inline]
27738#[target_feature(enable = "avx512f")]
27739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27740#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
27741pub unsafe fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
27742    let r = (a | b) == 0;
27743    r as i32
27744}
27745
27746/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27747///
27748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
27749#[inline]
27750#[target_feature(enable = "avx512f")]
27751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27752#[cfg_attr(test, assert_instr(vptestmd))]
27753pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
27754    let and = _mm512_and_epi32(a, b);
27755    let zero = _mm512_setzero_si512();
27756    _mm512_cmpneq_epi32_mask(and, zero)
27757}
27758
27759/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27760///
27761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
27762#[inline]
27763#[target_feature(enable = "avx512f")]
27764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27765#[cfg_attr(test, assert_instr(vptestmd))]
27766pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
27767    let and = _mm512_and_epi32(a, b);
27768    let zero = _mm512_setzero_si512();
27769    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
27770}
27771
27772/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27773///
27774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
27775#[inline]
27776#[target_feature(enable = "avx512f,avx512vl")]
27777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27778#[cfg_attr(test, assert_instr(vptestmd))]
27779pub unsafe fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
27780    let and = _mm256_and_si256(a, b);
27781    let zero = _mm256_setzero_si256();
27782    _mm256_cmpneq_epi32_mask(and, zero)
27783}
27784
27785/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27786///
27787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
27788#[inline]
27789#[target_feature(enable = "avx512f,avx512vl")]
27790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27791#[cfg_attr(test, assert_instr(vptestmd))]
27792pub unsafe fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27793    let and = _mm256_and_si256(a, b);
27794    let zero = _mm256_setzero_si256();
27795    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
27796}
27797
27798/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27799///
27800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
27801#[inline]
27802#[target_feature(enable = "avx512f,avx512vl")]
27803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27804#[cfg_attr(test, assert_instr(vptestmd))]
27805pub unsafe fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
27806    let and = _mm_and_si128(a, b);
27807    let zero = _mm_setzero_si128();
27808    _mm_cmpneq_epi32_mask(and, zero)
27809}
27810
27811/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27812///
27813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
27814#[inline]
27815#[target_feature(enable = "avx512f,avx512vl")]
27816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27817#[cfg_attr(test, assert_instr(vptestmd))]
27818pub unsafe fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27819    let and = _mm_and_si128(a, b);
27820    let zero = _mm_setzero_si128();
27821    _mm_mask_cmpneq_epi32_mask(k, and, zero)
27822}
27823
27824/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27825///
27826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
27827#[inline]
27828#[target_feature(enable = "avx512f")]
27829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27830#[cfg_attr(test, assert_instr(vptestmq))]
27831pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
27832    let and = _mm512_and_epi64(a, b);
27833    let zero = _mm512_setzero_si512();
27834    _mm512_cmpneq_epi64_mask(and, zero)
27835}
27836
27837/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27838///
27839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
27840#[inline]
27841#[target_feature(enable = "avx512f")]
27842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27843#[cfg_attr(test, assert_instr(vptestmq))]
27844pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
27845    let and = _mm512_and_epi64(a, b);
27846    let zero = _mm512_setzero_si512();
27847    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
27848}
27849
27850/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27851///
27852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
27853#[inline]
27854#[target_feature(enable = "avx512f,avx512vl")]
27855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27856#[cfg_attr(test, assert_instr(vptestmq))]
27857pub unsafe fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
27858    let and = _mm256_and_si256(a, b);
27859    let zero = _mm256_setzero_si256();
27860    _mm256_cmpneq_epi64_mask(and, zero)
27861}
27862
27863/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27864///
27865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
27866#[inline]
27867#[target_feature(enable = "avx512f,avx512vl")]
27868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27869#[cfg_attr(test, assert_instr(vptestmq))]
27870pub unsafe fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27871    let and = _mm256_and_si256(a, b);
27872    let zero = _mm256_setzero_si256();
27873    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
27874}
27875
27876/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
27877///
27878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
27879#[inline]
27880#[target_feature(enable = "avx512f,avx512vl")]
27881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27882#[cfg_attr(test, assert_instr(vptestmq))]
27883pub unsafe fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
27884    let and = _mm_and_si128(a, b);
27885    let zero = _mm_setzero_si128();
27886    _mm_cmpneq_epi64_mask(and, zero)
27887}
27888
27889/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
27890///
27891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
27892#[inline]
27893#[target_feature(enable = "avx512f,avx512vl")]
27894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27895#[cfg_attr(test, assert_instr(vptestmq))]
27896pub unsafe fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27897    let and = _mm_and_si128(a, b);
27898    let zero = _mm_setzero_si128();
27899    _mm_mask_cmpneq_epi64_mask(k, and, zero)
27900}
27901
27902/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27903///
27904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
27905#[inline]
27906#[target_feature(enable = "avx512f")]
27907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27908#[cfg_attr(test, assert_instr(vptestnmd))]
27909pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
27910    let and = _mm512_and_epi32(a, b);
27911    let zero = _mm512_setzero_si512();
27912    _mm512_cmpeq_epi32_mask(and, zero)
27913}
27914
27915/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27916///
27917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
27918#[inline]
27919#[target_feature(enable = "avx512f")]
27920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27921#[cfg_attr(test, assert_instr(vptestnmd))]
27922pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
27923    let and = _mm512_and_epi32(a, b);
27924    let zero = _mm512_setzero_si512();
27925    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
27926}
27927
27928/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27929///
27930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
27931#[inline]
27932#[target_feature(enable = "avx512f,avx512vl")]
27933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27934#[cfg_attr(test, assert_instr(vptestnmd))]
27935pub unsafe fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
27936    let and = _mm256_and_si256(a, b);
27937    let zero = _mm256_setzero_si256();
27938    _mm256_cmpeq_epi32_mask(and, zero)
27939}
27940
27941/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27942///
27943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
27944#[inline]
27945#[target_feature(enable = "avx512f,avx512vl")]
27946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27947#[cfg_attr(test, assert_instr(vptestnmd))]
27948pub unsafe fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
27949    let and = _mm256_and_si256(a, b);
27950    let zero = _mm256_setzero_si256();
27951    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
27952}
27953
27954/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27955///
27956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
27957#[inline]
27958#[target_feature(enable = "avx512f,avx512vl")]
27959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27960#[cfg_attr(test, assert_instr(vptestnmd))]
27961pub unsafe fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
27962    let and = _mm_and_si128(a, b);
27963    let zero = _mm_setzero_si128();
27964    _mm_cmpeq_epi32_mask(and, zero)
27965}
27966
27967/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27968///
27969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
27970#[inline]
27971#[target_feature(enable = "avx512f,avx512vl")]
27972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27973#[cfg_attr(test, assert_instr(vptestnmd))]
27974pub unsafe fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
27975    let and = _mm_and_si128(a, b);
27976    let zero = _mm_setzero_si128();
27977    _mm_mask_cmpeq_epi32_mask(k, and, zero)
27978}
27979
27980/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
27981///
27982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
27983#[inline]
27984#[target_feature(enable = "avx512f")]
27985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27986#[cfg_attr(test, assert_instr(vptestnmq))]
27987pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
27988    let and = _mm512_and_epi64(a, b);
27989    let zero = _mm512_setzero_si512();
27990    _mm512_cmpeq_epi64_mask(and, zero)
27991}
27992
27993/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
27994///
27995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
27996#[inline]
27997#[target_feature(enable = "avx512f")]
27998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27999#[cfg_attr(test, assert_instr(vptestnmq))]
28000pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
28001    let and = _mm512_and_epi64(a, b);
28002    let zero = _mm512_setzero_si512();
28003    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
28004}
28005
28006/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
28007///
28008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
28009#[inline]
28010#[target_feature(enable = "avx512f,avx512vl")]
28011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28012#[cfg_attr(test, assert_instr(vptestnmq))]
28013pub unsafe fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
28014    let and = _mm256_and_si256(a, b);
28015    let zero = _mm256_setzero_si256();
28016    _mm256_cmpeq_epi64_mask(and, zero)
28017}
28018
28019/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
28020///
28021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
28022#[inline]
28023#[target_feature(enable = "avx512f,avx512vl")]
28024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28025#[cfg_attr(test, assert_instr(vptestnmq))]
28026pub unsafe fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
28027    let and = _mm256_and_si256(a, b);
28028    let zero = _mm256_setzero_si256();
28029    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
28030}
28031
28032/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
28033///
28034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
28035#[inline]
28036#[target_feature(enable = "avx512f,avx512vl")]
28037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28038#[cfg_attr(test, assert_instr(vptestnmq))]
28039pub unsafe fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
28040    let and = _mm_and_si128(a, b);
28041    let zero = _mm_setzero_si128();
28042    _mm_cmpeq_epi64_mask(and, zero)
28043}
28044
28045/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
28046///
28047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
28048#[inline]
28049#[target_feature(enable = "avx512f,avx512vl")]
28050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28051#[cfg_attr(test, assert_instr(vptestnmq))]
28052pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
28053    let and = _mm_and_si128(a, b);
28054    let zero = _mm_setzero_si128();
28055    _mm_mask_cmpeq_epi64_mask(k, and, zero)
28056}
28057
28058/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28059///
28060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
28061///
28062/// # Safety of non-temporal stores
28063///
28064/// After using this intrinsic, but before any other access to the memory that this intrinsic
28065/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28066/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28067/// return.
28068///
28069/// See [`_mm_sfence`] for details.
28070#[inline]
28071#[target_feature(enable = "avx512f")]
28072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28073#[cfg_attr(test, assert_instr(vmovntps))]
28074#[allow(clippy::cast_ptr_alignment)]
28075pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
28076    crate::arch::asm!(
28077        vps!("vmovntps", ",{a}"),
28078        p = in(reg) mem_addr,
28079        a = in(zmm_reg) a,
28080        options(nostack, preserves_flags),
28081    );
28082}
28083
28084/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28085///
28086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
28087///
28088/// # Safety of non-temporal stores
28089///
28090/// After using this intrinsic, but before any other access to the memory that this intrinsic
28091/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28092/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28093/// return.
28094///
28095/// See [`_mm_sfence`] for details.
28096#[inline]
28097#[target_feature(enable = "avx512f")]
28098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28099#[cfg_attr(test, assert_instr(vmovntpd))]
28100#[allow(clippy::cast_ptr_alignment)]
28101pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
28102    crate::arch::asm!(
28103        vps!("vmovntpd", ",{a}"),
28104        p = in(reg) mem_addr,
28105        a = in(zmm_reg) a,
28106        options(nostack, preserves_flags),
28107    );
28108}
28109
28110/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
28111///
28112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
28113///
28114/// # Safety of non-temporal stores
28115///
28116/// After using this intrinsic, but before any other access to the memory that this intrinsic
28117/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
28118/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
28119/// return.
28120///
28121/// See [`_mm_sfence`] for details.
28122#[inline]
28123#[target_feature(enable = "avx512f")]
28124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28125#[cfg_attr(test, assert_instr(vmovntdq))]
28126#[allow(clippy::cast_ptr_alignment)]
28127pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) {
28128    crate::arch::asm!(
28129        vps!("vmovntdq", ",{a}"),
28130        p = in(reg) mem_addr,
28131        a = in(zmm_reg) a,
28132        options(nostack, preserves_flags),
28133    );
28134}
28135
28136/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
28137/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
28138/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
28139///
28140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
28141#[inline]
28142#[target_feature(enable = "avx512f")]
28143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28144pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
28145    let dst: __m512i;
28146    crate::arch::asm!(
28147        vpl!("vmovntdqa {a}"),
28148        a = out(zmm_reg) dst,
28149        p = in(reg) mem_addr,
28150        options(pure, readonly, nostack, preserves_flags),
28151    );
28152    dst
28153}
28154
28155/// Sets packed 32-bit integers in `dst` with the supplied values.
28156///
28157/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
28158#[inline]
28159#[target_feature(enable = "avx512f")]
28160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28161pub unsafe fn _mm512_set_ps(
28162    e0: f32,
28163    e1: f32,
28164    e2: f32,
28165    e3: f32,
28166    e4: f32,
28167    e5: f32,
28168    e6: f32,
28169    e7: f32,
28170    e8: f32,
28171    e9: f32,
28172    e10: f32,
28173    e11: f32,
28174    e12: f32,
28175    e13: f32,
28176    e14: f32,
28177    e15: f32,
28178) -> __m512 {
28179    _mm512_setr_ps(
28180        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
28181    )
28182}
28183
28184/// Sets packed 32-bit integers in `dst` with the supplied values in
28185/// reverse order.
28186///
28187/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
28188#[inline]
28189#[target_feature(enable = "avx512f")]
28190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28191pub unsafe fn _mm512_setr_ps(
28192    e0: f32,
28193    e1: f32,
28194    e2: f32,
28195    e3: f32,
28196    e4: f32,
28197    e5: f32,
28198    e6: f32,
28199    e7: f32,
28200    e8: f32,
28201    e9: f32,
28202    e10: f32,
28203    e11: f32,
28204    e12: f32,
28205    e13: f32,
28206    e14: f32,
28207    e15: f32,
28208) -> __m512 {
28209    let r = f32x16::new(
28210        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
28211    );
28212    transmute(r)
28213}
28214
28215/// Broadcast 64-bit float `a` to all elements of `dst`.
28216///
28217/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
28218#[inline]
28219#[target_feature(enable = "avx512f")]
28220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28221pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
28222    transmute(f64x8::splat(a))
28223}
28224
28225/// Broadcast 32-bit float `a` to all elements of `dst`.
28226///
28227/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
28228#[inline]
28229#[target_feature(enable = "avx512f")]
28230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28231pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
28232    transmute(f32x16::splat(a))
28233}
28234
28235/// Sets packed 32-bit integers in `dst` with the supplied values.
28236///
28237/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
28238#[inline]
28239#[target_feature(enable = "avx512f")]
28240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28241pub unsafe fn _mm512_set_epi32(
28242    e15: i32,
28243    e14: i32,
28244    e13: i32,
28245    e12: i32,
28246    e11: i32,
28247    e10: i32,
28248    e9: i32,
28249    e8: i32,
28250    e7: i32,
28251    e6: i32,
28252    e5: i32,
28253    e4: i32,
28254    e3: i32,
28255    e2: i32,
28256    e1: i32,
28257    e0: i32,
28258) -> __m512i {
28259    _mm512_setr_epi32(
28260        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
28261    )
28262}
28263
28264/// Broadcast 8-bit integer a to all elements of dst.
28265///
28266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
28267#[inline]
28268#[target_feature(enable = "avx512f")]
28269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28270pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
28271    transmute(i8x64::splat(a))
28272}
28273
28274/// Broadcast the low packed 16-bit integer from a to all elements of dst.
28275///
28276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
28277#[inline]
28278#[target_feature(enable = "avx512f")]
28279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28280pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i {
28281    transmute(i16x32::splat(a))
28282}
28283
28284/// Broadcast 32-bit integer `a` to all elements of `dst`.
28285///
28286/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
28287#[inline]
28288#[target_feature(enable = "avx512f")]
28289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28290pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
28291    transmute(i32x16::splat(a))
28292}
28293
28294/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28295///
28296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
28297#[inline]
28298#[target_feature(enable = "avx512f")]
28299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28300#[cfg_attr(test, assert_instr(vpbroadcastd))]
28301pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
28302    let r = _mm512_set1_epi32(a).as_i32x16();
28303    transmute(simd_select_bitmask(k, r, src.as_i32x16()))
28304}
28305
28306/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28307///
28308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
28309#[inline]
28310#[target_feature(enable = "avx512f")]
28311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28312#[cfg_attr(test, assert_instr(vpbroadcastd))]
28313pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
28314    let r = _mm512_set1_epi32(a).as_i32x16();
28315    transmute(simd_select_bitmask(k, r, i32x16::ZERO))
28316}
28317
28318/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28319///
28320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
28321#[inline]
28322#[target_feature(enable = "avx512f,avx512vl")]
28323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28324#[cfg_attr(test, assert_instr(vpbroadcastd))]
28325pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
28326    let r = _mm256_set1_epi32(a).as_i32x8();
28327    transmute(simd_select_bitmask(k, r, src.as_i32x8()))
28328}
28329
28330/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28331///
28332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
28333#[inline]
28334#[target_feature(enable = "avx512f,avx512vl")]
28335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28336#[cfg_attr(test, assert_instr(vpbroadcastd))]
28337pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
28338    let r = _mm256_set1_epi32(a).as_i32x8();
28339    transmute(simd_select_bitmask(k, r, i32x8::ZERO))
28340}
28341
28342/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28343///
28344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
28345#[inline]
28346#[target_feature(enable = "avx512f,avx512vl")]
28347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28348#[cfg_attr(test, assert_instr(vpbroadcastd))]
28349pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
28350    let r = _mm_set1_epi32(a).as_i32x4();
28351    transmute(simd_select_bitmask(k, r, src.as_i32x4()))
28352}
28353
28354/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28355///
28356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
28357#[inline]
28358#[target_feature(enable = "avx512f,avx512vl")]
28359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28360#[cfg_attr(test, assert_instr(vpbroadcastd))]
28361pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
28362    let r = _mm_set1_epi32(a).as_i32x4();
28363    transmute(simd_select_bitmask(k, r, i32x4::ZERO))
28364}
28365
28366/// Broadcast 64-bit integer `a` to all elements of `dst`.
28367///
28368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
28369#[inline]
28370#[target_feature(enable = "avx512f")]
28371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28372pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
28373    transmute(i64x8::splat(a))
28374}
28375
28376/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28377///
28378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
28379#[inline]
28380#[target_feature(enable = "avx512f")]
28381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28382#[cfg_attr(test, assert_instr(vpbroadcastq))]
28383pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
28384    let r = _mm512_set1_epi64(a).as_i64x8();
28385    transmute(simd_select_bitmask(k, r, src.as_i64x8()))
28386}
28387
28388/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28389///
28390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
28391#[inline]
28392#[target_feature(enable = "avx512f")]
28393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28394#[cfg_attr(test, assert_instr(vpbroadcastq))]
28395pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
28396    let r = _mm512_set1_epi64(a).as_i64x8();
28397    transmute(simd_select_bitmask(k, r, i64x8::ZERO))
28398}
28399
28400/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28401///
28402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
28403#[inline]
28404#[target_feature(enable = "avx512f,avx512vl")]
28405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28406#[cfg_attr(test, assert_instr(vpbroadcastq))]
28407pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
28408    let r = _mm256_set1_epi64x(a).as_i64x4();
28409    transmute(simd_select_bitmask(k, r, src.as_i64x4()))
28410}
28411
28412/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28413///
28414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
28415#[inline]
28416#[target_feature(enable = "avx512f,avx512vl")]
28417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28418#[cfg_attr(test, assert_instr(vpbroadcastq))]
28419pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
28420    let r = _mm256_set1_epi64x(a).as_i64x4();
28421    transmute(simd_select_bitmask(k, r, i64x4::ZERO))
28422}
28423
28424/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28425///
28426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
28427#[inline]
28428#[target_feature(enable = "avx512f,avx512vl")]
28429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28430#[cfg_attr(test, assert_instr(vpbroadcastq))]
28431pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
28432    let r = _mm_set1_epi64x(a).as_i64x2();
28433    transmute(simd_select_bitmask(k, r, src.as_i64x2()))
28434}
28435
28436/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28437///
28438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
28439#[inline]
28440#[target_feature(enable = "avx512f,avx512vl")]
28441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28442#[cfg_attr(test, assert_instr(vpbroadcastq))]
28443pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
28444    let r = _mm_set1_epi64x(a).as_i64x2();
28445    transmute(simd_select_bitmask(k, r, i64x2::ZERO))
28446}
28447
28448/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
28449///
28450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
28451#[inline]
28452#[target_feature(enable = "avx512f")]
28453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28454pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
28455    _mm512_set_epi64(d, c, b, a, d, c, b, a)
28456}
28457
28458/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
28459///
28460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
28461#[inline]
28462#[target_feature(enable = "avx512f")]
28463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28464pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
28465    _mm512_set_epi64(a, b, c, d, a, b, c, d)
28466}
28467
28468/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
28469///
28470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
28471#[inline]
28472#[target_feature(enable = "avx512f")]
28473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28474#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28475pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28476    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
28477}
28478
28479/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28480///
28481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
28482#[inline]
28483#[target_feature(enable = "avx512f")]
28484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28485#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28486pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28487    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
28488}
28489
28490/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
28491///
28492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
28493#[inline]
28494#[target_feature(enable = "avx512f")]
28495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28496#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28497pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28498    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
28499}
28500
28501/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28502///
28503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
28504#[inline]
28505#[target_feature(enable = "avx512f")]
28506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28507#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28508pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28509    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
28510}
28511
28512/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
28513///
28514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
28515#[inline]
28516#[target_feature(enable = "avx512f")]
28517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28518#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28519pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28520    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
28521}
28522
28523/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28524///
28525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
28526#[inline]
28527#[target_feature(enable = "avx512f")]
28528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28529#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28530pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28531    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
28532}
28533
28534/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
28535///
28536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
28537#[inline]
28538#[target_feature(enable = "avx512f")]
28539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28540#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28541pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28542    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
28543}
28544
28545/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28546///
28547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
28548#[inline]
28549#[target_feature(enable = "avx512f")]
28550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28551#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28552pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28553    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
28554}
28555
28556/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
28557///
28558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
28559#[inline]
28560#[target_feature(enable = "avx512f")]
28561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28562#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28563pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28564    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
28565}
28566
28567/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28568///
28569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
28570#[inline]
28571#[target_feature(enable = "avx512f")]
28572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28573#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28574pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28575    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
28576}
28577
28578/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
28579///
28580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
28581#[inline]
28582#[target_feature(enable = "avx512f")]
28583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28584#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28585pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28586    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
28587}
28588
28589/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28590///
28591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
28592#[inline]
28593#[target_feature(enable = "avx512f")]
28594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28595#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28596pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28597    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
28598}
28599
28600/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28601///
28602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
28603#[inline]
28604#[target_feature(enable = "avx512f")]
28605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28606#[rustc_legacy_const_generics(2)]
28607#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28608pub unsafe fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
28609    static_assert_uimm_bits!(IMM8, 5);
28610    let neg_one = -1;
28611    let a = a.as_f32x16();
28612    let b = b.as_f32x16();
28613    let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
28614    transmute(r)
28615}
28616
28617/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28618///
28619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
28620#[inline]
28621#[target_feature(enable = "avx512f")]
28622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28623#[rustc_legacy_const_generics(3)]
28624#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28625pub unsafe fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(
28626    k1: __mmask16,
28627    a: __m512,
28628    b: __m512,
28629) -> __mmask16 {
28630    static_assert_uimm_bits!(IMM8, 5);
28631    let a = a.as_f32x16();
28632    let b = b.as_f32x16();
28633    let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
28634    transmute(r)
28635}
28636
28637/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28638///
28639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
28640#[inline]
28641#[target_feature(enable = "avx512f,avx512vl")]
28642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28643#[rustc_legacy_const_generics(2)]
28644#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28645pub unsafe fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
28646    static_assert_uimm_bits!(IMM8, 5);
28647    let neg_one = -1;
28648    let a = a.as_f32x8();
28649    let b = b.as_f32x8();
28650    let r = vcmpps256(a, b, IMM8, neg_one);
28651    transmute(r)
28652}
28653
28654/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28655///
28656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
28657#[inline]
28658#[target_feature(enable = "avx512f,avx512vl")]
28659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28660#[rustc_legacy_const_generics(3)]
28661#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28662pub unsafe fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(
28663    k1: __mmask8,
28664    a: __m256,
28665    b: __m256,
28666) -> __mmask8 {
28667    static_assert_uimm_bits!(IMM8, 5);
28668    let a = a.as_f32x8();
28669    let b = b.as_f32x8();
28670    let r = vcmpps256(a, b, IMM8, k1 as i8);
28671    transmute(r)
28672}
28673
28674/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28675///
28676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
28677#[inline]
28678#[target_feature(enable = "avx512f,avx512vl")]
28679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28680#[rustc_legacy_const_generics(2)]
28681#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28682pub unsafe fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
28683    static_assert_uimm_bits!(IMM8, 5);
28684    let neg_one = -1;
28685    let a = a.as_f32x4();
28686    let b = b.as_f32x4();
28687    let r = vcmpps128(a, b, IMM8, neg_one);
28688    transmute(r)
28689}
28690
28691/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28692///
28693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
28694#[inline]
28695#[target_feature(enable = "avx512f,avx512vl")]
28696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28697#[rustc_legacy_const_generics(3)]
28698#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28699pub unsafe fn _mm_mask_cmp_ps_mask<const IMM8: i32>(
28700    k1: __mmask8,
28701    a: __m128,
28702    b: __m128,
28703) -> __mmask8 {
28704    static_assert_uimm_bits!(IMM8, 5);
28705    let a = a.as_f32x4();
28706    let b = b.as_f32x4();
28707    let r = vcmpps128(a, b, IMM8, k1 as i8);
28708    transmute(r)
28709}
28710
28711/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
28712/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28713///
28714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
28715#[inline]
28716#[target_feature(enable = "avx512f")]
28717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28718#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
28719#[rustc_legacy_const_generics(2, 3)]
28720pub unsafe fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
28721    a: __m512,
28722    b: __m512,
28723) -> __mmask16 {
28724    static_assert_uimm_bits!(IMM5, 5);
28725    static_assert_mantissas_sae!(SAE);
28726    let neg_one = -1;
28727    let a = a.as_f32x16();
28728    let b = b.as_f32x16();
28729    let r = vcmpps(a, b, IMM5, neg_one, SAE);
28730    transmute(r)
28731}
28732
28733/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
28734/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
28735///
28736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
28737#[inline]
28738#[target_feature(enable = "avx512f")]
28739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28740#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
28741#[rustc_legacy_const_generics(3, 4)]
28742pub unsafe fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
28743    m: __mmask16,
28744    a: __m512,
28745    b: __m512,
28746) -> __mmask16 {
28747    static_assert_uimm_bits!(IMM5, 5);
28748    static_assert_mantissas_sae!(SAE);
28749    let a = a.as_f32x16();
28750    let b = b.as_f32x16();
28751    let r = vcmpps(a, b, IMM5, m as i16, SAE);
28752    transmute(r)
28753}
28754
28755/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
28756///
28757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
28758#[inline]
28759#[target_feature(enable = "avx512f")]
28760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28761#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
28762pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28763    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
28764}
28765
28766/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28767///
28768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
28769#[inline]
28770#[target_feature(enable = "avx512f")]
28771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28772#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28773pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28774    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
28775}
28776
28777/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
28778///
28779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
28780#[inline]
28781#[target_feature(enable = "avx512f")]
28782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28783#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28784pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
28785    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
28786}
28787
28788/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28789///
28790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
28791#[inline]
28792#[target_feature(enable = "avx512f")]
28793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28794#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
28795pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
28796    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
28797}
28798
28799/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
28800///
28801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
28802#[inline]
28803#[target_feature(enable = "avx512f")]
28804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28805#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28806pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28807    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
28808}
28809
28810/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28811///
28812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
28813#[inline]
28814#[target_feature(enable = "avx512f")]
28815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28816#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28817pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28818    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
28819}
28820
28821/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
28822///
28823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
28824#[inline]
28825#[target_feature(enable = "avx512f")]
28826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28827#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28828pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28829    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
28830}
28831
28832/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28833///
28834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
28835#[inline]
28836#[target_feature(enable = "avx512f")]
28837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28838#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28839pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28840    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
28841}
28842
28843/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
28844///
28845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
28846#[inline]
28847#[target_feature(enable = "avx512f")]
28848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28849#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28850pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28851    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
28852}
28853
28854/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28855///
28856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
28857#[inline]
28858#[target_feature(enable = "avx512f")]
28859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28860#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28861pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28862    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
28863}
28864
28865/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
28866///
28867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
28868#[inline]
28869#[target_feature(enable = "avx512f")]
28870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28871#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28872pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28873    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
28874}
28875
28876/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28877///
28878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
28879#[inline]
28880#[target_feature(enable = "avx512f")]
28881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28882#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28883pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28884    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
28885}
28886
28887/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
28888///
28889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
28890#[inline]
28891#[target_feature(enable = "avx512f")]
28892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28893#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28894pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28895    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
28896}
28897
28898/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28899///
28900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
28901#[inline]
28902#[target_feature(enable = "avx512f")]
28903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28904#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28905pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28906    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
28907}
28908
28909/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
28910///
28911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
28912#[inline]
28913#[target_feature(enable = "avx512f")]
28914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28915#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28916pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
28917    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
28918}
28919
28920/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28921///
28922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
28923#[inline]
28924#[target_feature(enable = "avx512f")]
28925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28926#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
28927pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
28928    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
28929}
28930
28931/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28932///
28933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
28934#[inline]
28935#[target_feature(enable = "avx512f")]
28936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28937#[rustc_legacy_const_generics(2)]
28938#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28939pub unsafe fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
28940    static_assert_uimm_bits!(IMM8, 5);
28941    let neg_one = -1;
28942    let a = a.as_f64x8();
28943    let b = b.as_f64x8();
28944    let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
28945    transmute(r)
28946}
28947
28948/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28949///
28950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
28951#[inline]
28952#[target_feature(enable = "avx512f")]
28953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28954#[rustc_legacy_const_generics(3)]
28955#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28956pub unsafe fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(
28957    k1: __mmask8,
28958    a: __m512d,
28959    b: __m512d,
28960) -> __mmask8 {
28961    static_assert_uimm_bits!(IMM8, 5);
28962    let a = a.as_f64x8();
28963    let b = b.as_f64x8();
28964    let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
28965    transmute(r)
28966}
28967
28968/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
28969///
28970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
28971#[inline]
28972#[target_feature(enable = "avx512f,avx512vl")]
28973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28974#[rustc_legacy_const_generics(2)]
28975#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28976pub unsafe fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
28977    static_assert_uimm_bits!(IMM8, 5);
28978    let neg_one = -1;
28979    let a = a.as_f64x4();
28980    let b = b.as_f64x4();
28981    let r = vcmppd256(a, b, IMM8, neg_one);
28982    transmute(r)
28983}
28984
28985/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
28986///
28987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
28988#[inline]
28989#[target_feature(enable = "avx512f,avx512vl")]
28990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28991#[rustc_legacy_const_generics(3)]
28992#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
28993pub unsafe fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(
28994    k1: __mmask8,
28995    a: __m256d,
28996    b: __m256d,
28997) -> __mmask8 {
28998    static_assert_uimm_bits!(IMM8, 5);
28999    let a = a.as_f64x4();
29000    let b = b.as_f64x4();
29001    let r = vcmppd256(a, b, IMM8, k1 as i8);
29002    transmute(r)
29003}
29004
29005/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29006///
29007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
29008#[inline]
29009#[target_feature(enable = "avx512f,avx512vl")]
29010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29011#[rustc_legacy_const_generics(2)]
29012#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29013pub unsafe fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
29014    static_assert_uimm_bits!(IMM8, 5);
29015    let neg_one = -1;
29016    let a = a.as_f64x2();
29017    let b = b.as_f64x2();
29018    let r = vcmppd128(a, b, IMM8, neg_one);
29019    transmute(r)
29020}
29021
29022/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29023///
29024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
29025#[inline]
29026#[target_feature(enable = "avx512f,avx512vl")]
29027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29028#[rustc_legacy_const_generics(3)]
29029#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29030pub unsafe fn _mm_mask_cmp_pd_mask<const IMM8: i32>(
29031    k1: __mmask8,
29032    a: __m128d,
29033    b: __m128d,
29034) -> __mmask8 {
29035    static_assert_uimm_bits!(IMM8, 5);
29036    let a = a.as_f64x2();
29037    let b = b.as_f64x2();
29038    let r = vcmppd128(a, b, IMM8, k1 as i8);
29039    transmute(r)
29040}
29041
29042/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
29043/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29044///
29045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
29046#[inline]
29047#[target_feature(enable = "avx512f")]
29048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29049#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29050#[rustc_legacy_const_generics(2, 3)]
29051pub unsafe fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
29052    a: __m512d,
29053    b: __m512d,
29054) -> __mmask8 {
29055    static_assert_uimm_bits!(IMM5, 5);
29056    static_assert_mantissas_sae!(SAE);
29057    let neg_one = -1;
29058    let a = a.as_f64x8();
29059    let b = b.as_f64x8();
29060    let r = vcmppd(a, b, IMM5, neg_one, SAE);
29061    transmute(r)
29062}
29063
29064/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
29065/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29066///
29067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
29068#[inline]
29069#[target_feature(enable = "avx512f")]
29070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29071#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29072#[rustc_legacy_const_generics(3, 4)]
29073pub unsafe fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
29074    k1: __mmask8,
29075    a: __m512d,
29076    b: __m512d,
29077) -> __mmask8 {
29078    static_assert_uimm_bits!(IMM5, 5);
29079    static_assert_mantissas_sae!(SAE);
29080    let a = a.as_f64x8();
29081    let b = b.as_f64x8();
29082    let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
29083    transmute(r)
29084}
29085
29086/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
29087///
29088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
29089#[inline]
29090#[target_feature(enable = "avx512f")]
29091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29092#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29093pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
29094    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
29095}
29096
29097/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29098///
29099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
29100#[inline]
29101#[target_feature(enable = "avx512f")]
29102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29103#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29104pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
29105    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
29106}
29107
29108/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
29109///
29110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
29111#[inline]
29112#[target_feature(enable = "avx512f")]
29113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29114#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29115pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
29116    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
29117}
29118
29119/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29120///
29121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
29122#[inline]
29123#[target_feature(enable = "avx512f")]
29124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29125#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
29126pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
29127    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
29128}
29129
29130/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29136#[rustc_legacy_const_generics(2)]
29137#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29138pub unsafe fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
29139    static_assert_uimm_bits!(IMM8, 5);
29140    let neg_one = -1;
29141    let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
29142    transmute(r)
29143}
29144
29145/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
29146///
29147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
29148#[inline]
29149#[target_feature(enable = "avx512f")]
29150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29151#[rustc_legacy_const_generics(3)]
29152#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29153pub unsafe fn _mm_mask_cmp_ss_mask<const IMM8: i32>(
29154    k1: __mmask8,
29155    a: __m128,
29156    b: __m128,
29157) -> __mmask8 {
29158    static_assert_uimm_bits!(IMM8, 5);
29159    let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
29160    transmute(r)
29161}
29162
29163/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
29164/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29165///
29166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
29167#[inline]
29168#[target_feature(enable = "avx512f")]
29169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29170#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29171#[rustc_legacy_const_generics(2, 3)]
29172pub unsafe fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
29173    a: __m128,
29174    b: __m128,
29175) -> __mmask8 {
29176    static_assert_uimm_bits!(IMM5, 5);
29177    static_assert_mantissas_sae!(SAE);
29178    let neg_one = -1;
29179    let r = vcmpss(a, b, IMM5, neg_one, SAE);
29180    transmute(r)
29181}
29182
29183/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
29184/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29185///
29186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
29187#[inline]
29188#[target_feature(enable = "avx512f")]
29189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29190#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29191#[rustc_legacy_const_generics(3, 4)]
29192pub unsafe fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
29193    k1: __mmask8,
29194    a: __m128,
29195    b: __m128,
29196) -> __mmask8 {
29197    static_assert_uimm_bits!(IMM5, 5);
29198    static_assert_mantissas_sae!(SAE);
29199    let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
29200    transmute(r)
29201}
29202
29203/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
29204///
29205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
29206#[inline]
29207#[target_feature(enable = "avx512f")]
29208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29209#[rustc_legacy_const_generics(2)]
29210#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29211pub unsafe fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
29212    static_assert_uimm_bits!(IMM8, 5);
29213    let neg_one = -1;
29214    let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
29215    transmute(r)
29216}
29217
29218/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
29219///
29220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
29221#[inline]
29222#[target_feature(enable = "avx512f")]
29223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29224#[rustc_legacy_const_generics(3)]
29225#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
29226pub unsafe fn _mm_mask_cmp_sd_mask<const IMM8: i32>(
29227    k1: __mmask8,
29228    a: __m128d,
29229    b: __m128d,
29230) -> __mmask8 {
29231    static_assert_uimm_bits!(IMM8, 5);
29232    let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
29233    transmute(r)
29234}
29235
29236/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
29237/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29238///
29239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
29240#[inline]
29241#[target_feature(enable = "avx512f")]
29242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29243#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29244#[rustc_legacy_const_generics(2, 3)]
29245pub unsafe fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
29246    a: __m128d,
29247    b: __m128d,
29248) -> __mmask8 {
29249    static_assert_uimm_bits!(IMM5, 5);
29250    static_assert_mantissas_sae!(SAE);
29251    let neg_one = -1;
29252    let r = vcmpsd(a, b, IMM5, neg_one, SAE);
29253    transmute(r)
29254}
29255
29256/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
29257/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
29258///
29259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
29260#[inline]
29261#[target_feature(enable = "avx512f")]
29262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29263#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
29264#[rustc_legacy_const_generics(3, 4)]
29265pub unsafe fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
29266    k1: __mmask8,
29267    a: __m128d,
29268    b: __m128d,
29269) -> __mmask8 {
29270    static_assert_uimm_bits!(IMM5, 5);
29271    static_assert_mantissas_sae!(SAE);
29272    let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
29273    transmute(r)
29274}
29275
29276/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29277///
29278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
29279#[inline]
29280#[target_feature(enable = "avx512f")]
29281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29282#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29283pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29284    simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
29285}
29286
29287/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29288///
29289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
29290#[inline]
29291#[target_feature(enable = "avx512f")]
29292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29293#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29294pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29295    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
29296}
29297
29298/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29299///
29300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
29301#[inline]
29302#[target_feature(enable = "avx512f,avx512vl")]
29303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29304#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29305pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29306    simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8()))
29307}
29308
29309/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29310///
29311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
29312#[inline]
29313#[target_feature(enable = "avx512f,avx512vl")]
29314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29315#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29316pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29317    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
29318}
29319
29320/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
29321///
29322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
29323#[inline]
29324#[target_feature(enable = "avx512f,avx512vl")]
29325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29326#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29327pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29328    simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4()))
29329}
29330
29331/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29332///
29333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
29334#[inline]
29335#[target_feature(enable = "avx512f,avx512vl")]
29336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29337#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29338pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29339    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
29340}
29341
29342/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29343///
29344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
29345#[inline]
29346#[target_feature(enable = "avx512f")]
29347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29348#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29349pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29350    simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
29351}
29352
29353/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29354///
29355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
29356#[inline]
29357#[target_feature(enable = "avx512f")]
29358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29359#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29360pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29361    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
29362}
29363
29364/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29365///
29366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
29367#[inline]
29368#[target_feature(enable = "avx512f,avx512vl")]
29369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29370#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29371pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29372    simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8()))
29373}
29374
29375/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29376///
29377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
29378#[inline]
29379#[target_feature(enable = "avx512f,avx512vl")]
29380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29381#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29382pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29383    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
29384}
29385
29386/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29387///
29388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
29389#[inline]
29390#[target_feature(enable = "avx512f,avx512vl")]
29391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29392#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29393pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29394    simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4()))
29395}
29396
29397/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29398///
29399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
29400#[inline]
29401#[target_feature(enable = "avx512f,avx512vl")]
29402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29403#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29404pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29405    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
29406}
29407
29408/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29409///
29410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
29411#[inline]
29412#[target_feature(enable = "avx512f")]
29413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29414#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29415pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29416    simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
29417}
29418
29419/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29420///
29421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
29422#[inline]
29423#[target_feature(enable = "avx512f")]
29424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29425#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29426pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29427    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
29428}
29429
29430/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29431///
29432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
29433#[inline]
29434#[target_feature(enable = "avx512f,avx512vl")]
29435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29436#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29437pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29438    simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8()))
29439}
29440
29441/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29442///
29443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
29444#[inline]
29445#[target_feature(enable = "avx512f,avx512vl")]
29446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29447#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29448pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29449    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
29450}
29451
29452/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29453///
29454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
29455#[inline]
29456#[target_feature(enable = "avx512f,avx512vl")]
29457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29458#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29459pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29460    simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4()))
29461}
29462
29463/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29464///
29465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
29466#[inline]
29467#[target_feature(enable = "avx512f,avx512vl")]
29468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29469#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29470pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29471    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
29472}
29473
29474/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29475///
29476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
29477#[inline]
29478#[target_feature(enable = "avx512f")]
29479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29480#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29481pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29482    simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
29483}
29484
29485/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29486///
29487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
29488#[inline]
29489#[target_feature(enable = "avx512f")]
29490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29491#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29492pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29493    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
29494}
29495
29496/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29497///
29498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
29499#[inline]
29500#[target_feature(enable = "avx512f,avx512vl")]
29501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29502#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29503pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29504    simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8()))
29505}
29506
29507/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29508///
29509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
29510#[inline]
29511#[target_feature(enable = "avx512f,avx512vl")]
29512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29513#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29514pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29515    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
29516}
29517
29518/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
29519///
29520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
29521#[inline]
29522#[target_feature(enable = "avx512f,avx512vl")]
29523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29524#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29525pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29526    simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4()))
29527}
29528
29529/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29530///
29531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
29532#[inline]
29533#[target_feature(enable = "avx512f,avx512vl")]
29534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29535#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29536pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29537    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
29538}
29539
29540/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29541///
29542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
29543#[inline]
29544#[target_feature(enable = "avx512f")]
29545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29546#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29547pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29548    simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
29549}
29550
29551/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29552///
29553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
29554#[inline]
29555#[target_feature(enable = "avx512f")]
29556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29557#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29558pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29559    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
29560}
29561
29562/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29563///
29564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
29565#[inline]
29566#[target_feature(enable = "avx512f,avx512vl")]
29567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29568#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29569pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29570    simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8()))
29571}
29572
29573/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29574///
29575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
29576#[inline]
29577#[target_feature(enable = "avx512f,avx512vl")]
29578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29579#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29580pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29581    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
29582}
29583
29584/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
29585///
29586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
29587#[inline]
29588#[target_feature(enable = "avx512f,avx512vl")]
29589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29590#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29591pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29592    simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4()))
29593}
29594
29595/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29596///
29597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
29598#[inline]
29599#[target_feature(enable = "avx512f,avx512vl")]
29600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29601#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29602pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29603    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
29604}
29605
29606/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29607///
29608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
29609#[inline]
29610#[target_feature(enable = "avx512f")]
29611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29612#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29613pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29614    simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
29615}
29616
29617/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29618///
29619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
29620#[inline]
29621#[target_feature(enable = "avx512f")]
29622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29623#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29624pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29625    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
29626}
29627
29628/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29629///
29630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
29631#[inline]
29632#[target_feature(enable = "avx512f,avx512vl")]
29633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29634#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29635pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29636    simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8()))
29637}
29638
29639/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29640///
29641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
29642#[inline]
29643#[target_feature(enable = "avx512f,avx512vl")]
29644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29645#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29646pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29647    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
29648}
29649
29650/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
29651///
29652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
29653#[inline]
29654#[target_feature(enable = "avx512f,avx512vl")]
29655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29656#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29657pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29658    simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4()))
29659}
29660
29661/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29662///
29663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
29664#[inline]
29665#[target_feature(enable = "avx512f,avx512vl")]
29666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29667#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
29668pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29669    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
29670}
29671
29672/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29673///
29674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
29675#[inline]
29676#[target_feature(enable = "avx512f")]
29677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29678#[rustc_legacy_const_generics(2)]
29679#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29680pub unsafe fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29681    a: __m512i,
29682    b: __m512i,
29683) -> __mmask16 {
29684    static_assert_uimm_bits!(IMM3, 3);
29685    let a = a.as_u32x16();
29686    let b = b.as_u32x16();
29687    let r = match IMM3 {
29688        0 => simd_eq(a, b),
29689        1 => simd_lt(a, b),
29690        2 => simd_le(a, b),
29691        3 => i32x16::ZERO,
29692        4 => simd_ne(a, b),
29693        5 => simd_ge(a, b),
29694        6 => simd_gt(a, b),
29695        _ => i32x16::splat(-1),
29696    };
29697    simd_bitmask(r)
29698}
29699
29700/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29701///
29702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
29703#[inline]
29704#[target_feature(enable = "avx512f")]
29705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29706#[rustc_legacy_const_generics(3)]
29707#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29708pub unsafe fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29709    k1: __mmask16,
29710    a: __m512i,
29711    b: __m512i,
29712) -> __mmask16 {
29713    static_assert_uimm_bits!(IMM3, 3);
29714    let a = a.as_u32x16();
29715    let b = b.as_u32x16();
29716    let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
29717    let r = match IMM3 {
29718        0 => simd_and(k1, simd_eq(a, b)),
29719        1 => simd_and(k1, simd_lt(a, b)),
29720        2 => simd_and(k1, simd_le(a, b)),
29721        3 => i32x16::ZERO,
29722        4 => simd_and(k1, simd_ne(a, b)),
29723        5 => simd_and(k1, simd_ge(a, b)),
29724        6 => simd_and(k1, simd_gt(a, b)),
29725        _ => i32x16::splat(-1),
29726    };
29727    simd_bitmask(r)
29728}
29729
29730/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29731///
29732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
29733#[inline]
29734#[target_feature(enable = "avx512f,avx512vl")]
29735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29736#[rustc_legacy_const_generics(2)]
29737#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29738pub unsafe fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29739    a: __m256i,
29740    b: __m256i,
29741) -> __mmask8 {
29742    static_assert_uimm_bits!(IMM3, 3);
29743    let a = a.as_u32x8();
29744    let b = b.as_u32x8();
29745    let r = match IMM3 {
29746        0 => simd_eq(a, b),
29747        1 => simd_lt(a, b),
29748        2 => simd_le(a, b),
29749        3 => i32x8::ZERO,
29750        4 => simd_ne(a, b),
29751        5 => simd_ge(a, b),
29752        6 => simd_gt(a, b),
29753        _ => i32x8::splat(-1),
29754    };
29755    simd_bitmask(r)
29756}
29757
29758/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29759///
29760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
29761#[inline]
29762#[target_feature(enable = "avx512f,avx512vl")]
29763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29764#[rustc_legacy_const_generics(3)]
29765#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29766pub unsafe fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29767    k1: __mmask8,
29768    a: __m256i,
29769    b: __m256i,
29770) -> __mmask8 {
29771    static_assert_uimm_bits!(IMM3, 3);
29772    let a = a.as_u32x8();
29773    let b = b.as_u32x8();
29774    let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
29775    let r = match IMM3 {
29776        0 => simd_and(k1, simd_eq(a, b)),
29777        1 => simd_and(k1, simd_lt(a, b)),
29778        2 => simd_and(k1, simd_le(a, b)),
29779        3 => i32x8::ZERO,
29780        4 => simd_and(k1, simd_ne(a, b)),
29781        5 => simd_and(k1, simd_ge(a, b)),
29782        6 => simd_and(k1, simd_gt(a, b)),
29783        _ => i32x8::splat(-1),
29784    };
29785    simd_bitmask(r)
29786}
29787
29788/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
29789///
29790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
29791#[inline]
29792#[target_feature(enable = "avx512f,avx512vl")]
29793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29794#[rustc_legacy_const_generics(2)]
29795#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29796pub unsafe fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
29797    static_assert_uimm_bits!(IMM3, 3);
29798    let a = a.as_u32x4();
29799    let b = b.as_u32x4();
29800    let r = match IMM3 {
29801        0 => simd_eq(a, b),
29802        1 => simd_lt(a, b),
29803        2 => simd_le(a, b),
29804        3 => i32x4::ZERO,
29805        4 => simd_ne(a, b),
29806        5 => simd_ge(a, b),
29807        6 => simd_gt(a, b),
29808        _ => i32x4::splat(-1),
29809    };
29810    simd_bitmask(r)
29811}
29812
29813/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29814///
29815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
29816#[inline]
29817#[target_feature(enable = "avx512f,avx512vl")]
29818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29819#[rustc_legacy_const_generics(3)]
29820#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
29821pub unsafe fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
29822    k1: __mmask8,
29823    a: __m128i,
29824    b: __m128i,
29825) -> __mmask8 {
29826    static_assert_uimm_bits!(IMM3, 3);
29827    let a = a.as_u32x4();
29828    let b = b.as_u32x4();
29829    let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
29830    let r = match IMM3 {
29831        0 => simd_and(k1, simd_eq(a, b)),
29832        1 => simd_and(k1, simd_lt(a, b)),
29833        2 => simd_and(k1, simd_le(a, b)),
29834        3 => i32x4::ZERO,
29835        4 => simd_and(k1, simd_ne(a, b)),
29836        5 => simd_and(k1, simd_ge(a, b)),
29837        6 => simd_and(k1, simd_gt(a, b)),
29838        _ => i32x4::splat(-1),
29839    };
29840    simd_bitmask(r)
29841}
29842
29843/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29844///
29845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
29846#[inline]
29847#[target_feature(enable = "avx512f")]
29848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29849#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29850pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29851    simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
29852}
29853
29854/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29855///
29856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
29857#[inline]
29858#[target_feature(enable = "avx512f")]
29859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29861pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29862    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
29863}
29864
29865/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29866///
29867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
29868#[inline]
29869#[target_feature(enable = "avx512f,avx512vl")]
29870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29871#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29872pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29873    simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8()))
29874}
29875
29876/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29877///
29878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
29879#[inline]
29880#[target_feature(enable = "avx512f,avx512vl")]
29881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29882#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29883pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29884    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
29885}
29886
29887/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
29888///
29889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
29890#[inline]
29891#[target_feature(enable = "avx512f,avx512vl")]
29892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29893#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29894pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29895    simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
29896}
29897
29898/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29899///
29900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
29901#[inline]
29902#[target_feature(enable = "avx512f,avx512vl")]
29903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29904#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29905pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29906    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
29907}
29908
29909/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29910///
29911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
29912#[inline]
29913#[target_feature(enable = "avx512f")]
29914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29916pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29917    simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
29918}
29919
29920/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29921///
29922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
29923#[inline]
29924#[target_feature(enable = "avx512f")]
29925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29926#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29927pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29928    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
29929}
29930
29931/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29932///
29933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
29934#[inline]
29935#[target_feature(enable = "avx512f,avx512vl")]
29936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29937#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29938pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29939    simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8()))
29940}
29941
29942/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29943///
29944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
29945#[inline]
29946#[target_feature(enable = "avx512f,avx512vl")]
29947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29948#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29949pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29950    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
29951}
29952
29953/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
29954///
29955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
29956#[inline]
29957#[target_feature(enable = "avx512f,avx512vl")]
29958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29959#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29960pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29961    simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
29962}
29963
29964/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29965///
29966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
29967#[inline]
29968#[target_feature(enable = "avx512f,avx512vl")]
29969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29970#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29971pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29972    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
29973}
29974
29975/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29976///
29977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
29978#[inline]
29979#[target_feature(enable = "avx512f")]
29980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29981#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29982pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29983    simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
29984}
29985
29986/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
29987///
29988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
29989#[inline]
29990#[target_feature(enable = "avx512f")]
29991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29992#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
29993pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29994    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
29995}
29996
29997/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
29998///
29999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
30000#[inline]
30001#[target_feature(enable = "avx512f,avx512vl")]
30002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30003#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30004pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30005    simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8()))
30006}
30007
30008/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30009///
30010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
30011#[inline]
30012#[target_feature(enable = "avx512f,avx512vl")]
30013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30014#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30015pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30016    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
30017}
30018
30019/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30020///
30021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
30022#[inline]
30023#[target_feature(enable = "avx512f,avx512vl")]
30024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30025#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30026pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30027    simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4()))
30028}
30029
30030/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30031///
30032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
30033#[inline]
30034#[target_feature(enable = "avx512f,avx512vl")]
30035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30036#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30037pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30038    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
30039}
30040
30041/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30042///
30043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
30044#[inline]
30045#[target_feature(enable = "avx512f")]
30046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30048pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30049    simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
30050}
30051
30052/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30053///
30054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
30055#[inline]
30056#[target_feature(enable = "avx512f")]
30057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30058#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30059pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30060    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
30061}
30062
30063/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30064///
30065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
30066#[inline]
30067#[target_feature(enable = "avx512f,avx512vl")]
30068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30069#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30070pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30071    simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8()))
30072}
30073
30074/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30075///
30076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
30077#[inline]
30078#[target_feature(enable = "avx512f,avx512vl")]
30079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30080#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30081pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30082    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
30083}
30084
30085/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30086///
30087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
30088#[inline]
30089#[target_feature(enable = "avx512f,avx512vl")]
30090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30091#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30092pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30093    simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4()))
30094}
30095
30096/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30097///
30098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
30099#[inline]
30100#[target_feature(enable = "avx512f,avx512vl")]
30101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30102#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30103pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30104    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
30105}
30106
30107/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
30108///
30109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
30110#[inline]
30111#[target_feature(enable = "avx512f")]
30112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30113#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30114pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30115    simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
30116}
30117
30118/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30119///
30120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
30121#[inline]
30122#[target_feature(enable = "avx512f")]
30123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30124#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30125pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30126    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
30127}
30128
30129/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
30130///
30131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
30132#[inline]
30133#[target_feature(enable = "avx512f,avx512vl")]
30134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30135#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30136pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30137    simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8()))
30138}
30139
30140/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30141///
30142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
30143#[inline]
30144#[target_feature(enable = "avx512f,avx512vl")]
30145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30146#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30147pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30148    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
30149}
30150
30151/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
30152///
30153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
30154#[inline]
30155#[target_feature(enable = "avx512f,avx512vl")]
30156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30158pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30159    simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
30160}
30161
30162/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30163///
30164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
30165#[inline]
30166#[target_feature(enable = "avx512f,avx512vl")]
30167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30168#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30169pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30170    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
30171}
30172
30173/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30174///
30175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
30176#[inline]
30177#[target_feature(enable = "avx512f")]
30178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30180pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30181    simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
30182}
30183
30184/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30185///
30186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
30187#[inline]
30188#[target_feature(enable = "avx512f")]
30189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30190#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30191pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30192    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
30193}
30194
30195/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30196///
30197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
30198#[inline]
30199#[target_feature(enable = "avx512f,avx512vl")]
30200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30201#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30202pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30203    simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8()))
30204}
30205
30206/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30207///
30208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
30209#[inline]
30210#[target_feature(enable = "avx512f,avx512vl")]
30211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30212#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30213pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30214    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
30215}
30216
30217/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
30218///
30219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
30220#[inline]
30221#[target_feature(enable = "avx512f,avx512vl")]
30222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30223#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30224pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30225    simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4()))
30226}
30227
30228/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30229///
30230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
30231#[inline]
30232#[target_feature(enable = "avx512f,avx512vl")]
30233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30234#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
30235pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30236    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
30237}
30238
30239/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30240///
30241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
30242#[inline]
30243#[target_feature(enable = "avx512f")]
30244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30245#[rustc_legacy_const_generics(2)]
30246#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30247pub unsafe fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30248    a: __m512i,
30249    b: __m512i,
30250) -> __mmask16 {
30251    static_assert_uimm_bits!(IMM3, 3);
30252    let a = a.as_i32x16();
30253    let b = b.as_i32x16();
30254    let r = match IMM3 {
30255        0 => simd_eq(a, b),
30256        1 => simd_lt(a, b),
30257        2 => simd_le(a, b),
30258        3 => i32x16::ZERO,
30259        4 => simd_ne(a, b),
30260        5 => simd_ge(a, b),
30261        6 => simd_gt(a, b),
30262        _ => i32x16::splat(-1),
30263    };
30264    simd_bitmask(r)
30265}
30266
30267/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30268///
30269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
30270#[inline]
30271#[target_feature(enable = "avx512f")]
30272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30273#[rustc_legacy_const_generics(3)]
30274#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30275pub unsafe fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30276    k1: __mmask16,
30277    a: __m512i,
30278    b: __m512i,
30279) -> __mmask16 {
30280    static_assert_uimm_bits!(IMM3, 3);
30281    let a = a.as_i32x16();
30282    let b = b.as_i32x16();
30283    let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
30284    let r = match IMM3 {
30285        0 => simd_and(k1, simd_eq(a, b)),
30286        1 => simd_and(k1, simd_lt(a, b)),
30287        2 => simd_and(k1, simd_le(a, b)),
30288        3 => i32x16::ZERO,
30289        4 => simd_and(k1, simd_ne(a, b)),
30290        5 => simd_and(k1, simd_ge(a, b)),
30291        6 => simd_and(k1, simd_gt(a, b)),
30292        _ => i32x16::splat(-1),
30293    };
30294    simd_bitmask(r)
30295}
30296
30297/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30298///
30299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
30300#[inline]
30301#[target_feature(enable = "avx512f,avx512vl")]
30302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30303#[rustc_legacy_const_generics(2)]
30304#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30305pub unsafe fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30306    a: __m256i,
30307    b: __m256i,
30308) -> __mmask8 {
30309    static_assert_uimm_bits!(IMM3, 3);
30310    let a = a.as_i32x8();
30311    let b = b.as_i32x8();
30312    let r = match IMM3 {
30313        0 => simd_eq(a, b),
30314        1 => simd_lt(a, b),
30315        2 => simd_le(a, b),
30316        3 => i32x8::ZERO,
30317        4 => simd_ne(a, b),
30318        5 => simd_ge(a, b),
30319        6 => simd_gt(a, b),
30320        _ => i32x8::splat(-1),
30321    };
30322    simd_bitmask(r)
30323}
30324
30325/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30326///
30327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
30328#[inline]
30329#[target_feature(enable = "avx512f,avx512vl")]
30330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30331#[rustc_legacy_const_generics(3)]
30332#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30333pub unsafe fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30334    k1: __mmask8,
30335    a: __m256i,
30336    b: __m256i,
30337) -> __mmask8 {
30338    static_assert_uimm_bits!(IMM3, 3);
30339    let a = a.as_i32x8();
30340    let b = b.as_i32x8();
30341    let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
30342    let r = match IMM3 {
30343        0 => simd_and(k1, simd_eq(a, b)),
30344        1 => simd_and(k1, simd_lt(a, b)),
30345        2 => simd_and(k1, simd_le(a, b)),
30346        3 => i32x8::ZERO,
30347        4 => simd_and(k1, simd_ne(a, b)),
30348        5 => simd_and(k1, simd_ge(a, b)),
30349        6 => simd_and(k1, simd_gt(a, b)),
30350        _ => i32x8::splat(-1),
30351    };
30352    simd_bitmask(r)
30353}
30354
30355/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30356///
30357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
30358#[inline]
30359#[target_feature(enable = "avx512f,avx512vl")]
30360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30361#[rustc_legacy_const_generics(2)]
30362#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30363pub unsafe fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
30364    static_assert_uimm_bits!(IMM3, 3);
30365    let a = a.as_i32x4();
30366    let b = b.as_i32x4();
30367    let r = match IMM3 {
30368        0 => simd_eq(a, b),
30369        1 => simd_lt(a, b),
30370        2 => simd_le(a, b),
30371        3 => i32x4::ZERO,
30372        4 => simd_ne(a, b),
30373        5 => simd_ge(a, b),
30374        6 => simd_gt(a, b),
30375        _ => i32x4::splat(-1),
30376    };
30377    simd_bitmask(r)
30378}
30379
30380/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30381///
30382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
30383#[inline]
30384#[target_feature(enable = "avx512f,avx512vl")]
30385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30386#[rustc_legacy_const_generics(3)]
30387#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30388pub unsafe fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
30389    k1: __mmask8,
30390    a: __m128i,
30391    b: __m128i,
30392) -> __mmask8 {
30393    static_assert_uimm_bits!(IMM3, 3);
30394    let a = a.as_i32x4();
30395    let b = b.as_i32x4();
30396    let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
30397    let r = match IMM3 {
30398        0 => simd_and(k1, simd_eq(a, b)),
30399        1 => simd_and(k1, simd_lt(a, b)),
30400        2 => simd_and(k1, simd_le(a, b)),
30401        3 => i32x4::ZERO,
30402        4 => simd_and(k1, simd_ne(a, b)),
30403        5 => simd_and(k1, simd_ge(a, b)),
30404        6 => simd_and(k1, simd_gt(a, b)),
30405        _ => i32x4::splat(-1),
30406    };
30407    simd_bitmask(r)
30408}
30409
30410/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30411///
30412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
30413#[inline]
30414#[target_feature(enable = "avx512f")]
30415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30416#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30417pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30418    simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
30419}
30420
30421/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30427#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30428pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30429    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
30430}
30431
30432/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
30435#[inline]
30436#[target_feature(enable = "avx512f,avx512vl")]
30437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30438#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30439pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30440    simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4()))
30441}
30442
30443/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
30446#[inline]
30447#[target_feature(enable = "avx512f,avx512vl")]
30448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30449#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30450pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30451    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
30452}
30453
30454/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
30455///
30456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
30457#[inline]
30458#[target_feature(enable = "avx512f,avx512vl")]
30459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30460#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30461pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30462    simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2()))
30463}
30464
30465/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30466///
30467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
30468#[inline]
30469#[target_feature(enable = "avx512f,avx512vl")]
30470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30471#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30472pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30473    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
30474}
30475
30476/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30477///
30478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
30479#[inline]
30480#[target_feature(enable = "avx512f")]
30481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30482#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30483pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30484    simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
30485}
30486
30487/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30488///
30489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
30490#[inline]
30491#[target_feature(enable = "avx512f")]
30492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30493#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30494pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30495    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
30496}
30497
30498/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30499///
30500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
30501#[inline]
30502#[target_feature(enable = "avx512f,avx512vl")]
30503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30504#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30505pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30506    simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4()))
30507}
30508
30509/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30510///
30511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
30512#[inline]
30513#[target_feature(enable = "avx512f,avx512vl")]
30514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30515#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30516pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30517    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
30518}
30519
30520/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
30521///
30522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
30523#[inline]
30524#[target_feature(enable = "avx512f,avx512vl")]
30525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30526#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30527pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30528    simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2()))
30529}
30530
30531/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30532///
30533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
30534#[inline]
30535#[target_feature(enable = "avx512f,avx512vl")]
30536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30537#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30538pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30539    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
30540}
30541
30542/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30543///
30544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
30545#[inline]
30546#[target_feature(enable = "avx512f")]
30547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30548#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30549pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30550    simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
30551}
30552
30553/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30554///
30555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30559#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30560pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30561    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
30562}
30563
30564/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30565///
30566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
30567#[inline]
30568#[target_feature(enable = "avx512f,avx512vl")]
30569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30570#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30571pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30572    simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4()))
30573}
30574
30575/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30576///
30577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
30578#[inline]
30579#[target_feature(enable = "avx512f,avx512vl")]
30580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30581#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30582pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30583    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
30584}
30585
30586/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
30587///
30588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
30589#[inline]
30590#[target_feature(enable = "avx512f,avx512vl")]
30591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30592#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30593pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30594    simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2()))
30595}
30596
30597/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30598///
30599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
30600#[inline]
30601#[target_feature(enable = "avx512f,avx512vl")]
30602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30603#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30604pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30605    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
30606}
30607
30608/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30609///
30610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
30611#[inline]
30612#[target_feature(enable = "avx512f")]
30613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30614#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30615pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30616    simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
30617}
30618
30619/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30620///
30621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
30622#[inline]
30623#[target_feature(enable = "avx512f")]
30624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30625#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30626pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30627    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
30628}
30629
30630/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30631///
30632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
30633#[inline]
30634#[target_feature(enable = "avx512f,avx512vl")]
30635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30636#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30637pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30638    simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4()))
30639}
30640
30641/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30642///
30643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
30644#[inline]
30645#[target_feature(enable = "avx512f,avx512vl")]
30646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30647#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30648pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30649    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
30650}
30651
30652/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
30653///
30654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
30655#[inline]
30656#[target_feature(enable = "avx512f,avx512vl")]
30657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30658#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30659pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30660    simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2()))
30661}
30662
30663/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30664///
30665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
30666#[inline]
30667#[target_feature(enable = "avx512f,avx512vl")]
30668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30669#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30670pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30671    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
30672}
30673
30674/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30675///
30676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
30677#[inline]
30678#[target_feature(enable = "avx512f")]
30679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30680#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30681pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30682    simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
30683}
30684
30685/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30686///
30687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
30688#[inline]
30689#[target_feature(enable = "avx512f")]
30690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30691#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30692pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30693    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
30694}
30695
30696/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30697///
30698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
30699#[inline]
30700#[target_feature(enable = "avx512f,avx512vl")]
30701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30702#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30703pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30704    simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4()))
30705}
30706
30707/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30708///
30709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
30710#[inline]
30711#[target_feature(enable = "avx512f,avx512vl")]
30712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30713#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30714pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30715    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
30716}
30717
30718/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
30719///
30720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
30721#[inline]
30722#[target_feature(enable = "avx512f,avx512vl")]
30723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30724#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30725pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30726    simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2()))
30727}
30728
30729/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30730///
30731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
30732#[inline]
30733#[target_feature(enable = "avx512f,avx512vl")]
30734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30735#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30736pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30737    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
30738}
30739
30740/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30741///
30742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
30743#[inline]
30744#[target_feature(enable = "avx512f")]
30745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30746#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30747pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30748    simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
30749}
30750
30751/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30752///
30753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
30754#[inline]
30755#[target_feature(enable = "avx512f")]
30756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30757#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30758pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30759    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
30760}
30761
30762/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30763///
30764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
30765#[inline]
30766#[target_feature(enable = "avx512f,avx512vl")]
30767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30768#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30769pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30770    simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4()))
30771}
30772
30773/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30774///
30775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
30776#[inline]
30777#[target_feature(enable = "avx512f,avx512vl")]
30778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30779#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30780pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30781    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
30782}
30783
30784/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
30785///
30786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
30787#[inline]
30788#[target_feature(enable = "avx512f,avx512vl")]
30789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30790#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30791pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30792    simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2()))
30793}
30794
30795/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30796///
30797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
30798#[inline]
30799#[target_feature(enable = "avx512f,avx512vl")]
30800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30801#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
30802pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30803    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
30804}
30805
30806/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30807///
30808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
30809#[inline]
30810#[target_feature(enable = "avx512f")]
30811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30812#[rustc_legacy_const_generics(2)]
30813#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30814pub unsafe fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30815    a: __m512i,
30816    b: __m512i,
30817) -> __mmask8 {
30818    static_assert_uimm_bits!(IMM3, 3);
30819    let a = a.as_u64x8();
30820    let b = b.as_u64x8();
30821    let r = match IMM3 {
30822        0 => simd_eq(a, b),
30823        1 => simd_lt(a, b),
30824        2 => simd_le(a, b),
30825        3 => i64x8::ZERO,
30826        4 => simd_ne(a, b),
30827        5 => simd_ge(a, b),
30828        6 => simd_gt(a, b),
30829        _ => i64x8::splat(-1),
30830    };
30831    simd_bitmask(r)
30832}
30833
30834/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30835///
30836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
30837#[inline]
30838#[target_feature(enable = "avx512f")]
30839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30840#[rustc_legacy_const_generics(3)]
30841#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30842pub unsafe fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30843    k1: __mmask8,
30844    a: __m512i,
30845    b: __m512i,
30846) -> __mmask8 {
30847    static_assert_uimm_bits!(IMM3, 3);
30848    let a = a.as_u64x8();
30849    let b = b.as_u64x8();
30850    let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
30851    let r = match IMM3 {
30852        0 => simd_and(k1, simd_eq(a, b)),
30853        1 => simd_and(k1, simd_lt(a, b)),
30854        2 => simd_and(k1, simd_le(a, b)),
30855        3 => i64x8::ZERO,
30856        4 => simd_and(k1, simd_ne(a, b)),
30857        5 => simd_and(k1, simd_ge(a, b)),
30858        6 => simd_and(k1, simd_gt(a, b)),
30859        _ => i64x8::splat(-1),
30860    };
30861    simd_bitmask(r)
30862}
30863
30864/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30865///
30866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
30867#[inline]
30868#[target_feature(enable = "avx512f,avx512vl")]
30869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30870#[rustc_legacy_const_generics(2)]
30871#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30872pub unsafe fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30873    a: __m256i,
30874    b: __m256i,
30875) -> __mmask8 {
30876    static_assert_uimm_bits!(IMM3, 3);
30877    let a = a.as_u64x4();
30878    let b = b.as_u64x4();
30879    let r = match IMM3 {
30880        0 => simd_eq(a, b),
30881        1 => simd_lt(a, b),
30882        2 => simd_le(a, b),
30883        3 => i64x4::ZERO,
30884        4 => simd_ne(a, b),
30885        5 => simd_ge(a, b),
30886        6 => simd_gt(a, b),
30887        _ => i64x4::splat(-1),
30888    };
30889    simd_bitmask(r)
30890}
30891
30892/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30893///
30894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
30895#[inline]
30896#[target_feature(enable = "avx512f,avx512vl")]
30897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30898#[rustc_legacy_const_generics(3)]
30899#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30900pub unsafe fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30901    k1: __mmask8,
30902    a: __m256i,
30903    b: __m256i,
30904) -> __mmask8 {
30905    static_assert_uimm_bits!(IMM3, 3);
30906    let a = a.as_u64x4();
30907    let b = b.as_u64x4();
30908    let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
30909    let r = match IMM3 {
30910        0 => simd_and(k1, simd_eq(a, b)),
30911        1 => simd_and(k1, simd_lt(a, b)),
30912        2 => simd_and(k1, simd_le(a, b)),
30913        3 => i64x4::ZERO,
30914        4 => simd_and(k1, simd_ne(a, b)),
30915        5 => simd_and(k1, simd_ge(a, b)),
30916        6 => simd_and(k1, simd_gt(a, b)),
30917        _ => i64x4::splat(-1),
30918    };
30919    simd_bitmask(r)
30920}
30921
30922/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30923///
30924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
30925#[inline]
30926#[target_feature(enable = "avx512f,avx512vl")]
30927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30928#[rustc_legacy_const_generics(2)]
30929#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30930pub unsafe fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
30931    static_assert_uimm_bits!(IMM3, 3);
30932    let a = a.as_u64x2();
30933    let b = b.as_u64x2();
30934    let r = match IMM3 {
30935        0 => simd_eq(a, b),
30936        1 => simd_lt(a, b),
30937        2 => simd_le(a, b),
30938        3 => i64x2::ZERO,
30939        4 => simd_ne(a, b),
30940        5 => simd_ge(a, b),
30941        6 => simd_gt(a, b),
30942        _ => i64x2::splat(-1),
30943    };
30944    simd_bitmask(r)
30945}
30946
30947/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30948///
30949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
30950#[inline]
30951#[target_feature(enable = "avx512f,avx512vl")]
30952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30953#[rustc_legacy_const_generics(3)]
30954#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
30955pub unsafe fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
30956    k1: __mmask8,
30957    a: __m128i,
30958    b: __m128i,
30959) -> __mmask8 {
30960    static_assert_uimm_bits!(IMM3, 3);
30961    let a = a.as_u64x2();
30962    let b = b.as_u64x2();
30963    let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
30964    let r = match IMM3 {
30965        0 => simd_and(k1, simd_eq(a, b)),
30966        1 => simd_and(k1, simd_lt(a, b)),
30967        2 => simd_and(k1, simd_le(a, b)),
30968        3 => i64x2::ZERO,
30969        4 => simd_and(k1, simd_ne(a, b)),
30970        5 => simd_and(k1, simd_ge(a, b)),
30971        6 => simd_and(k1, simd_gt(a, b)),
30972        _ => i64x2::splat(-1),
30973    };
30974    simd_bitmask(r)
30975}
30976
30977/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
30978///
30979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
30980#[inline]
30981#[target_feature(enable = "avx512f")]
30982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30983#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30984pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30985    simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
30986}
30987
30988/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30989///
30990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
30991#[inline]
30992#[target_feature(enable = "avx512f")]
30993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30994#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
30995pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30996    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
30997}
30998
30999/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
31000///
31001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
31002#[inline]
31003#[target_feature(enable = "avx512f,avx512vl")]
31004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31005#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31006pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31007    simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4()))
31008}
31009
31010/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31011///
31012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
31013#[inline]
31014#[target_feature(enable = "avx512f,avx512vl")]
31015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31016#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31017pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31018    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
31019}
31020
31021/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
31022///
31023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
31024#[inline]
31025#[target_feature(enable = "avx512f,avx512vl")]
31026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31027#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31028pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31029    simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2()))
31030}
31031
31032/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31033///
31034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
31035#[inline]
31036#[target_feature(enable = "avx512f,avx512vl")]
31037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31038#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31039pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31040    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
31041}
31042
31043/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
31044///
31045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
31046#[inline]
31047#[target_feature(enable = "avx512f")]
31048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31049#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31050pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31051    simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
31052}
31053
31054/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31055///
31056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
31057#[inline]
31058#[target_feature(enable = "avx512f")]
31059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31060#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31061pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31062    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
31063}
31064
31065/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
31066///
31067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
31068#[inline]
31069#[target_feature(enable = "avx512f,avx512vl")]
31070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31071#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31072pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31073    simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4()))
31074}
31075
31076/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31077///
31078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
31079#[inline]
31080#[target_feature(enable = "avx512f,avx512vl")]
31081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31082#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31083pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31084    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
31085}
31086
31087/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
31088///
31089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
31090#[inline]
31091#[target_feature(enable = "avx512f,avx512vl")]
31092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31093#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31094pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31095    simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2()))
31096}
31097
31098/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31099///
31100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
31101#[inline]
31102#[target_feature(enable = "avx512f,avx512vl")]
31103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31104#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31105pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31106    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
31107}
31108
31109/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31110///
31111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
31112#[inline]
31113#[target_feature(enable = "avx512f")]
31114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31115#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31116pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31117    simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
31118}
31119
31120/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31121///
31122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
31123#[inline]
31124#[target_feature(enable = "avx512f")]
31125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31126#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31127pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31128    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
31129}
31130
31131/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31132///
31133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
31134#[inline]
31135#[target_feature(enable = "avx512f,avx512vl")]
31136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31137#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31138pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31139    simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4()))
31140}
31141
31142/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31143///
31144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
31145#[inline]
31146#[target_feature(enable = "avx512f,avx512vl")]
31147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31148#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31149pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31150    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
31151}
31152
31153/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31154///
31155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
31156#[inline]
31157#[target_feature(enable = "avx512f,avx512vl")]
31158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31159#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31160pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31161    simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2()))
31162}
31163
31164/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31165///
31166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
31167#[inline]
31168#[target_feature(enable = "avx512f,avx512vl")]
31169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31170#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31171pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31172    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
31173}
31174
31175/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31176///
31177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
31178#[inline]
31179#[target_feature(enable = "avx512f")]
31180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31181#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31182pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31183    simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
31184}
31185
31186/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31187///
31188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
31189#[inline]
31190#[target_feature(enable = "avx512f")]
31191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31192#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31193pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31194    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
31195}
31196
31197/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31198///
31199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
31200#[inline]
31201#[target_feature(enable = "avx512f,avx512vl")]
31202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31203#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31204pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31205    simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4()))
31206}
31207
31208/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31209///
31210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
31211#[inline]
31212#[target_feature(enable = "avx512f,avx512vl")]
31213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31214#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31215pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31216    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
31217}
31218
31219/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31220///
31221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
31222#[inline]
31223#[target_feature(enable = "avx512f,avx512vl")]
31224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31225#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31226pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31227    simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2()))
31228}
31229
31230/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31231///
31232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
31233#[inline]
31234#[target_feature(enable = "avx512f,avx512vl")]
31235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31236#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31237pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31238    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
31239}
31240
31241/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31242///
31243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
31244#[inline]
31245#[target_feature(enable = "avx512f")]
31246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31247#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31248pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31249    simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
31250}
31251
31252/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31253///
31254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
31255#[inline]
31256#[target_feature(enable = "avx512f")]
31257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31258#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31259pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31260    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
31261}
31262
31263/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31264///
31265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
31266#[inline]
31267#[target_feature(enable = "avx512f,avx512vl")]
31268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31269#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31270pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31271    simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4()))
31272}
31273
31274/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31275///
31276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
31277#[inline]
31278#[target_feature(enable = "avx512f,avx512vl")]
31279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31280#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31281pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31282    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
31283}
31284
31285/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
31286///
31287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
31288#[inline]
31289#[target_feature(enable = "avx512f,avx512vl")]
31290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31291#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31292pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31293    simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2()))
31294}
31295
31296/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31297///
31298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
31299#[inline]
31300#[target_feature(enable = "avx512f,avx512vl")]
31301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31302#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31303pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31304    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
31305}
31306
31307/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31308///
31309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
31310#[inline]
31311#[target_feature(enable = "avx512f")]
31312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31313#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31314pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
31315    simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
31316}
31317
31318/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31319///
31320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
31321#[inline]
31322#[target_feature(enable = "avx512f")]
31323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31324#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31325pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
31326    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
31327}
31328
31329/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31330///
31331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
31332#[inline]
31333#[target_feature(enable = "avx512f,avx512vl")]
31334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31335#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31336pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
31337    simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4()))
31338}
31339
31340/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31341///
31342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
31343#[inline]
31344#[target_feature(enable = "avx512f,avx512vl")]
31345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31346#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31347pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31348    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
31349}
31350
31351/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
31352///
31353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
31354#[inline]
31355#[target_feature(enable = "avx512f,avx512vl")]
31356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31357#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31358pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
31359    simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2()))
31360}
31361
31362/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31363///
31364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
31365#[inline]
31366#[target_feature(enable = "avx512f,avx512vl")]
31367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31368#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
31369pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31370    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
31371}
31372
31373/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31374///
31375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
31376#[inline]
31377#[target_feature(enable = "avx512f")]
31378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31379#[rustc_legacy_const_generics(2)]
31380#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31381pub unsafe fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31382    a: __m512i,
31383    b: __m512i,
31384) -> __mmask8 {
31385    static_assert_uimm_bits!(IMM3, 3);
31386    let a = a.as_i64x8();
31387    let b = b.as_i64x8();
31388    let r = match IMM3 {
31389        0 => simd_eq(a, b),
31390        1 => simd_lt(a, b),
31391        2 => simd_le(a, b),
31392        3 => i64x8::ZERO,
31393        4 => simd_ne(a, b),
31394        5 => simd_ge(a, b),
31395        6 => simd_gt(a, b),
31396        _ => i64x8::splat(-1),
31397    };
31398    simd_bitmask(r)
31399}
31400
31401/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31402///
31403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
31404#[inline]
31405#[target_feature(enable = "avx512f")]
31406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31407#[rustc_legacy_const_generics(3)]
31408#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31409pub unsafe fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31410    k1: __mmask8,
31411    a: __m512i,
31412    b: __m512i,
31413) -> __mmask8 {
31414    static_assert_uimm_bits!(IMM3, 3);
31415    let a = a.as_i64x8();
31416    let b = b.as_i64x8();
31417    let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
31418    let r = match IMM3 {
31419        0 => simd_and(k1, simd_eq(a, b)),
31420        1 => simd_and(k1, simd_lt(a, b)),
31421        2 => simd_and(k1, simd_le(a, b)),
31422        3 => i64x8::ZERO,
31423        4 => simd_and(k1, simd_ne(a, b)),
31424        5 => simd_and(k1, simd_ge(a, b)),
31425        6 => simd_and(k1, simd_gt(a, b)),
31426        _ => i64x8::splat(-1),
31427    };
31428    simd_bitmask(r)
31429}
31430
31431/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31432///
31433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
31434#[inline]
31435#[target_feature(enable = "avx512f,avx512vl")]
31436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31437#[rustc_legacy_const_generics(2)]
31438#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31439pub unsafe fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31440    a: __m256i,
31441    b: __m256i,
31442) -> __mmask8 {
31443    static_assert_uimm_bits!(IMM3, 3);
31444    let a = a.as_i64x4();
31445    let b = b.as_i64x4();
31446    let r = match IMM3 {
31447        0 => simd_eq(a, b),
31448        1 => simd_lt(a, b),
31449        2 => simd_le(a, b),
31450        3 => i64x4::ZERO,
31451        4 => simd_ne(a, b),
31452        5 => simd_ge(a, b),
31453        6 => simd_gt(a, b),
31454        _ => i64x4::splat(-1),
31455    };
31456    simd_bitmask(r)
31457}
31458
31459/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31460///
31461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
31462#[inline]
31463#[target_feature(enable = "avx512f,avx512vl")]
31464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31465#[rustc_legacy_const_generics(3)]
31466#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31467pub unsafe fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31468    k1: __mmask8,
31469    a: __m256i,
31470    b: __m256i,
31471) -> __mmask8 {
31472    static_assert_uimm_bits!(IMM3, 3);
31473    let a = a.as_i64x4();
31474    let b = b.as_i64x4();
31475    let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
31476    let r = match IMM3 {
31477        0 => simd_and(k1, simd_eq(a, b)),
31478        1 => simd_and(k1, simd_lt(a, b)),
31479        2 => simd_and(k1, simd_le(a, b)),
31480        3 => i64x4::ZERO,
31481        4 => simd_and(k1, simd_ne(a, b)),
31482        5 => simd_and(k1, simd_ge(a, b)),
31483        6 => simd_and(k1, simd_gt(a, b)),
31484        _ => i64x4::splat(-1),
31485    };
31486    simd_bitmask(r)
31487}
31488
31489/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31490///
31491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
31492#[inline]
31493#[target_feature(enable = "avx512f,avx512vl")]
31494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31495#[rustc_legacy_const_generics(2)]
31496#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31497pub unsafe fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31498    static_assert_uimm_bits!(IMM3, 3);
31499    let a = a.as_i64x2();
31500    let b = b.as_i64x2();
31501    let r = match IMM3 {
31502        0 => simd_eq(a, b),
31503        1 => simd_lt(a, b),
31504        2 => simd_le(a, b),
31505        3 => i64x2::ZERO,
31506        4 => simd_ne(a, b),
31507        5 => simd_ge(a, b),
31508        6 => simd_gt(a, b),
31509        _ => i64x2::splat(-1),
31510    };
31511    simd_bitmask(r)
31512}
31513
31514/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31515///
31516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
31517#[inline]
31518#[target_feature(enable = "avx512f,avx512vl")]
31519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31520#[rustc_legacy_const_generics(3)]
31521#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31522pub unsafe fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
31523    k1: __mmask8,
31524    a: __m128i,
31525    b: __m128i,
31526) -> __mmask8 {
31527    static_assert_uimm_bits!(IMM3, 3);
31528    let a = a.as_i64x2();
31529    let b = b.as_i64x2();
31530    let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
31531    let r = match IMM3 {
31532        0 => simd_and(k1, simd_eq(a, b)),
31533        1 => simd_and(k1, simd_lt(a, b)),
31534        2 => simd_and(k1, simd_le(a, b)),
31535        3 => i64x2::ZERO,
31536        4 => simd_and(k1, simd_ne(a, b)),
31537        5 => simd_and(k1, simd_ge(a, b)),
31538        6 => simd_and(k1, simd_gt(a, b)),
31539        _ => i64x2::splat(-1),
31540    };
31541    simd_bitmask(r)
31542}
31543
31544/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
31545///
31546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
31547#[inline]
31548#[target_feature(enable = "avx512f")]
31549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31550pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
31551    simd_reduce_add_unordered(a.as_i32x16())
31552}
31553
31554/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31555///
31556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
31557#[inline]
31558#[target_feature(enable = "avx512f")]
31559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31560pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
31561    simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO))
31562}
31563
31564/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
31565///
31566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
31567#[inline]
31568#[target_feature(enable = "avx512f")]
31569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31570pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
31571    simd_reduce_add_unordered(a.as_i64x8())
31572}
31573
31574/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
31575///
31576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
31577#[inline]
31578#[target_feature(enable = "avx512f")]
31579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31580pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
31581    simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO))
31582}
31583
31584/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
31585///
31586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
31587#[inline]
31588#[target_feature(enable = "avx512f")]
31589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31590pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
31591    // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
31592    let a = _mm256_add_ps(
31593        simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
31594        simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
31595    );
31596    let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
31597    let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
31598    simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
31599}
31600
31601/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
31602///
31603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
31604#[inline]
31605#[target_feature(enable = "avx512f")]
31606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31607pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
31608    _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps()))
31609}
31610
31611/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
31612///
31613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
31614#[inline]
31615#[target_feature(enable = "avx512f")]
31616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31617pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
31618    let a = _mm256_add_pd(
31619        _mm512_extractf64x4_pd::<0>(a),
31620        _mm512_extractf64x4_pd::<1>(a),
31621    );
31622    let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
31623    simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
31624}
31625
31626/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
31627///
31628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
31629#[inline]
31630#[target_feature(enable = "avx512f")]
31631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31632pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
31633    _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd()))
31634}
31635
31636/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
31637///
31638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
31639#[inline]
31640#[target_feature(enable = "avx512f")]
31641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31642pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
31643    simd_reduce_mul_unordered(a.as_i32x16())
31644}
31645
31646/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
31647///
31648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
31649#[inline]
31650#[target_feature(enable = "avx512f")]
31651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31652pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
31653    simd_reduce_mul_unordered(simd_select_bitmask(
31654        k,
31655        a.as_i32x16(),
31656        _mm512_set1_epi32(1).as_i32x16(),
31657    ))
31658}
31659
31660/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
31661///
31662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
31663#[inline]
31664#[target_feature(enable = "avx512f")]
31665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31666pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
31667    simd_reduce_mul_unordered(a.as_i64x8())
31668}
31669
31670/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
31671///
31672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
31673#[inline]
31674#[target_feature(enable = "avx512f")]
31675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31676pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
31677    simd_reduce_mul_unordered(simd_select_bitmask(
31678        k,
31679        a.as_i64x8(),
31680        _mm512_set1_epi64(1).as_i64x8(),
31681    ))
31682}
31683
31684/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
31685///
31686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
31687#[inline]
31688#[target_feature(enable = "avx512f")]
31689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31690pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
31691    // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
31692    let a = _mm256_mul_ps(
31693        simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
31694        simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
31695    );
31696    let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
31697    let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
31698    simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
31699}
31700
31701/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
31702///
31703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
31704#[inline]
31705#[target_feature(enable = "avx512f")]
31706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31707pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
31708    _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.)))
31709}
31710
31711/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
31712///
31713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
31714#[inline]
31715#[target_feature(enable = "avx512f")]
31716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31717pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
31718    let a = _mm256_mul_pd(
31719        _mm512_extractf64x4_pd::<0>(a),
31720        _mm512_extractf64x4_pd::<1>(a),
31721    );
31722    let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
31723    simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
31724}
31725
31726/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
31727///
31728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
31729#[inline]
31730#[target_feature(enable = "avx512f")]
31731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31732pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
31733    _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.)))
31734}
31735
31736/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
31737///
31738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
31739#[inline]
31740#[target_feature(enable = "avx512f")]
31741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31742pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
31743    simd_reduce_max(a.as_i32x16())
31744}
31745
31746/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31747///
31748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
31749#[inline]
31750#[target_feature(enable = "avx512f")]
31751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31752pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
31753    simd_reduce_max(simd_select_bitmask(
31754        k,
31755        a.as_i32x16(),
31756        i32x16::splat(i32::MIN),
31757    ))
31758}
31759
31760/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
31761///
31762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
31763#[inline]
31764#[target_feature(enable = "avx512f")]
31765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31766pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
31767    simd_reduce_max(a.as_i64x8())
31768}
31769
31770/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31771///
31772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
31773#[inline]
31774#[target_feature(enable = "avx512f")]
31775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31776pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
31777    simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN)))
31778}
31779
31780/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
31781///
31782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
31783#[inline]
31784#[target_feature(enable = "avx512f")]
31785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31786pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
31787    simd_reduce_max(a.as_u32x16())
31788}
31789
31790/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31791///
31792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
31793#[inline]
31794#[target_feature(enable = "avx512f")]
31795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31796pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
31797    simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO))
31798}
31799
31800/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
31801///
31802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
31803#[inline]
31804#[target_feature(enable = "avx512f")]
31805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31806pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
31807    simd_reduce_max(a.as_u64x8())
31808}
31809
31810/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
31811///
31812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
31813#[inline]
31814#[target_feature(enable = "avx512f")]
31815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31816pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
31817    simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO))
31818}
31819
31820/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
31821///
31822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
31823#[inline]
31824#[target_feature(enable = "avx512f")]
31825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31826pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 {
31827    let a = _mm256_max_ps(
31828        simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
31829        simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
31830    );
31831    let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
31832    let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
31833    _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
31834}
31835
31836/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
31837///
31838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
31839#[inline]
31840#[target_feature(enable = "avx512f")]
31841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31842pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
31843    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
31844}
31845
31846/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
31847///
31848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
31849#[inline]
31850#[target_feature(enable = "avx512f")]
31851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31852pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
31853    let a = _mm256_max_pd(
31854        _mm512_extractf64x4_pd::<0>(a),
31855        _mm512_extractf64x4_pd::<1>(a),
31856    );
31857    let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
31858    _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
31859}
31860
31861/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
31862///
31863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
31864#[inline]
31865#[target_feature(enable = "avx512f")]
31866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31867pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
31868    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
31869}
31870
31871/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
31872///
31873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
31874#[inline]
31875#[target_feature(enable = "avx512f")]
31876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31877pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
31878    simd_reduce_min(a.as_i32x16())
31879}
31880
31881/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31882///
31883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
31884#[inline]
31885#[target_feature(enable = "avx512f")]
31886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31887pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
31888    simd_reduce_min(simd_select_bitmask(
31889        k,
31890        a.as_i32x16(),
31891        i32x16::splat(i32::MAX),
31892    ))
31893}
31894
31895/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
31896///
31897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
31898#[inline]
31899#[target_feature(enable = "avx512f")]
31900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31901pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
31902    simd_reduce_min(a.as_i64x8())
31903}
31904
31905/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31906///
31907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
31908#[inline]
31909#[target_feature(enable = "avx512f")]
31910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31911pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
31912    simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX)))
31913}
31914
31915/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
31916///
31917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
31918#[inline]
31919#[target_feature(enable = "avx512f")]
31920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31921pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
31922    simd_reduce_min(a.as_u32x16())
31923}
31924
31925/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31926///
31927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
31928#[inline]
31929#[target_feature(enable = "avx512f")]
31930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31931pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
31932    simd_reduce_min(simd_select_bitmask(
31933        k,
31934        a.as_u32x16(),
31935        u32x16::splat(u32::MAX),
31936    ))
31937}
31938
31939/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
31940///
31941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
31942#[inline]
31943#[target_feature(enable = "avx512f")]
31944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31945pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
31946    simd_reduce_min(a.as_u64x8())
31947}
31948
31949/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
31950///
31951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
31952#[inline]
31953#[target_feature(enable = "avx512f")]
31954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31955pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
31956    simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX)))
31957}
31958
31959/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
31960///
31961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
31962#[inline]
31963#[target_feature(enable = "avx512f")]
31964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31965pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 {
31966    let a = _mm256_min_ps(
31967        simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
31968        simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
31969    );
31970    let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
31971    let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
31972    _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
31973}
31974
31975/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
31976///
31977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
31978#[inline]
31979#[target_feature(enable = "avx512f")]
31980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31981pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
31982    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
31983}
31984
31985/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
31986///
31987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
31988#[inline]
31989#[target_feature(enable = "avx512f")]
31990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31991pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
31992    let a = _mm256_min_pd(
31993        _mm512_extractf64x4_pd::<0>(a),
31994        _mm512_extractf64x4_pd::<1>(a),
31995    );
31996    let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
31997    _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
31998}
31999
32000/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
32001///
32002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
32003#[inline]
32004#[target_feature(enable = "avx512f")]
32005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32006pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
32007    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
32008}
32009
32010/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
32011///
32012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
32013#[inline]
32014#[target_feature(enable = "avx512f")]
32015#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32016pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
32017    simd_reduce_and(a.as_i32x16())
32018}
32019
32020/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
32021///
32022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
32023#[inline]
32024#[target_feature(enable = "avx512f")]
32025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32026pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
32027    simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1)))
32028}
32029
32030/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
32031///
32032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
32033#[inline]
32034#[target_feature(enable = "avx512f")]
32035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32036pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
32037    simd_reduce_and(a.as_i64x8())
32038}
32039
32040/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
32041///
32042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
32043#[inline]
32044#[target_feature(enable = "avx512f")]
32045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32046pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
32047    simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1)))
32048}
32049
32050/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
32051///
32052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
32053#[inline]
32054#[target_feature(enable = "avx512f")]
32055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32056pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
32057    simd_reduce_or(a.as_i32x16())
32058}
32059
32060/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
32061///
32062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
32063#[inline]
32064#[target_feature(enable = "avx512f")]
32065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32066pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
32067    simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO))
32068}
32069
32070/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
32071///
32072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
32073#[inline]
32074#[target_feature(enable = "avx512f")]
32075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32076pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
32077    simd_reduce_or(a.as_i64x8())
32078}
32079
32080/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
32081///
32082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
32083#[inline]
32084#[target_feature(enable = "avx512f")]
32085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32086pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
32087    simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO))
32088}
32089
32090/// Returns vector of type `__m512d` with indeterminate elements.
32091/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
32092/// In practice, this is equivalent to [`mem::zeroed`].
32093///
32094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
32095#[inline]
32096#[target_feature(enable = "avx512f")]
32097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32098// This intrinsic has no corresponding instruction.
32099pub unsafe fn _mm512_undefined_pd() -> __m512d {
32100    const { mem::zeroed() }
32101}
32102
32103/// Returns vector of type `__m512` with indeterminate elements.
32104/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
32105/// In practice, this is equivalent to [`mem::zeroed`].
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
32108#[inline]
32109#[target_feature(enable = "avx512f")]
32110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32111// This intrinsic has no corresponding instruction.
32112pub unsafe fn _mm512_undefined_ps() -> __m512 {
32113    const { mem::zeroed() }
32114}
32115
32116/// Return vector of type __m512i with indeterminate elements.
32117/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
32118/// In practice, this is equivalent to [`mem::zeroed`].
32119///
32120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
32121#[inline]
32122#[target_feature(enable = "avx512f")]
32123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32124// This intrinsic has no corresponding instruction.
32125pub unsafe fn _mm512_undefined_epi32() -> __m512i {
32126    const { mem::zeroed() }
32127}
32128
32129/// Return vector of type __m512 with indeterminate elements.
32130/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
32131/// In practice, this is equivalent to [`mem::zeroed`].
32132///
32133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
32134#[inline]
32135#[target_feature(enable = "avx512f")]
32136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32137// This intrinsic has no corresponding instruction.
32138pub unsafe fn _mm512_undefined() -> __m512 {
32139    const { mem::zeroed() }
32140}
32141
32142/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32143///
32144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
32145#[inline]
32146#[target_feature(enable = "avx512f")]
32147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32148#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32149pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
32150    ptr::read_unaligned(mem_addr as *const __m512i)
32151}
32152
32153/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32154///
32155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
32156#[inline]
32157#[target_feature(enable = "avx512f,avx512vl")]
32158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32159#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32160pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
32161    ptr::read_unaligned(mem_addr as *const __m256i)
32162}
32163
32164/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32165///
32166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
32167#[inline]
32168#[target_feature(enable = "avx512f,avx512vl")]
32169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32170#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32171pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
32172    ptr::read_unaligned(mem_addr as *const __m128i)
32173}
32174
32175/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32176///
32177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
32178#[inline]
32179#[target_feature(enable = "avx512f")]
32180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32181#[cfg_attr(test, assert_instr(vpmovdw))]
32182pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32183    vpmovdwmem(mem_addr, a.as_i32x16(), k);
32184}
32185
32186/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32187///
32188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
32189#[inline]
32190#[target_feature(enable = "avx512f,avx512vl")]
32191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32192#[cfg_attr(test, assert_instr(vpmovdw))]
32193pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32194    vpmovdwmem256(mem_addr, a.as_i32x8(), k);
32195}
32196
32197/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32198///
32199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
32200#[inline]
32201#[target_feature(enable = "avx512f,avx512vl")]
32202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32203#[cfg_attr(test, assert_instr(vpmovdw))]
32204pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32205    vpmovdwmem128(mem_addr, a.as_i32x4(), k);
32206}
32207
32208/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32209///
32210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
32211#[inline]
32212#[target_feature(enable = "avx512f")]
32213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32214#[cfg_attr(test, assert_instr(vpmovsdw))]
32215pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32216    vpmovsdwmem(mem_addr, a.as_i32x16(), k);
32217}
32218
32219/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32220///
32221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
32222#[inline]
32223#[target_feature(enable = "avx512f,avx512vl")]
32224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32225#[cfg_attr(test, assert_instr(vpmovsdw))]
32226pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32227    vpmovsdwmem256(mem_addr, a.as_i32x8(), k);
32228}
32229
32230/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32231///
32232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
32233#[inline]
32234#[target_feature(enable = "avx512f,avx512vl")]
32235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32236#[cfg_attr(test, assert_instr(vpmovsdw))]
32237pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32238    vpmovsdwmem128(mem_addr, a.as_i32x4(), k);
32239}
32240
32241/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32242///
32243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
32244#[inline]
32245#[target_feature(enable = "avx512f")]
32246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32247#[cfg_attr(test, assert_instr(vpmovusdw))]
32248pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32249    vpmovusdwmem(mem_addr, a.as_i32x16(), k);
32250}
32251
32252/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32253///
32254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
32255#[inline]
32256#[target_feature(enable = "avx512f,avx512vl")]
32257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32258#[cfg_attr(test, assert_instr(vpmovusdw))]
32259pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32260    vpmovusdwmem256(mem_addr, a.as_i32x8(), k);
32261}
32262
32263/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32264///
32265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
32266#[inline]
32267#[target_feature(enable = "avx512f,avx512vl")]
32268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32269#[cfg_attr(test, assert_instr(vpmovusdw))]
32270pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32271    vpmovusdwmem128(mem_addr, a.as_i32x4(), k);
32272}
32273
32274/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32275///
32276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
32277#[inline]
32278#[target_feature(enable = "avx512f")]
32279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32280#[cfg_attr(test, assert_instr(vpmovdb))]
32281pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32282    vpmovdbmem(mem_addr, a.as_i32x16(), k);
32283}
32284
32285/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32286///
32287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
32288#[inline]
32289#[target_feature(enable = "avx512f,avx512vl")]
32290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32291#[cfg_attr(test, assert_instr(vpmovdb))]
32292pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32293    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
32294}
32295
32296/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32297///
32298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
32299#[inline]
32300#[target_feature(enable = "avx512f,avx512vl")]
32301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32302#[cfg_attr(test, assert_instr(vpmovdb))]
32303pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32304    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
32305}
32306
32307/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32308///
32309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
32310#[inline]
32311#[target_feature(enable = "avx512f")]
32312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32313#[cfg_attr(test, assert_instr(vpmovsdb))]
32314pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32315    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
32316}
32317
32318/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32319///
32320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
32321#[inline]
32322#[target_feature(enable = "avx512f,avx512vl")]
32323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32324#[cfg_attr(test, assert_instr(vpmovsdb))]
32325pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32326    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
32327}
32328
32329/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32330///
32331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
32332#[inline]
32333#[target_feature(enable = "avx512f,avx512vl")]
32334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32335#[cfg_attr(test, assert_instr(vpmovsdb))]
32336pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32337    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
32338}
32339
32340/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32341///
32342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
32343#[inline]
32344#[target_feature(enable = "avx512f")]
32345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32346#[cfg_attr(test, assert_instr(vpmovusdb))]
32347pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
32348    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
32349}
32350
32351/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32352///
32353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
32354#[inline]
32355#[target_feature(enable = "avx512f,avx512vl")]
32356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32357#[cfg_attr(test, assert_instr(vpmovusdb))]
32358pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32359    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
32360}
32361
32362/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32363///
32364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
32365#[inline]
32366#[target_feature(enable = "avx512f,avx512vl")]
32367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32368#[cfg_attr(test, assert_instr(vpmovusdb))]
32369pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32370    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
32371}
32372
32373/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32374///
32375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
32376#[inline]
32377#[target_feature(enable = "avx512f")]
32378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32379#[cfg_attr(test, assert_instr(vpmovqw))]
32380pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32381    vpmovqwmem(mem_addr, a.as_i64x8(), k);
32382}
32383
32384/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32385///
32386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
32387#[inline]
32388#[target_feature(enable = "avx512f,avx512vl")]
32389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32390#[cfg_attr(test, assert_instr(vpmovqw))]
32391pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32392    vpmovqwmem256(mem_addr, a.as_i64x4(), k);
32393}
32394
32395/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32396///
32397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
32398#[inline]
32399#[target_feature(enable = "avx512f,avx512vl")]
32400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32401#[cfg_attr(test, assert_instr(vpmovqw))]
32402pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32403    vpmovqwmem128(mem_addr, a.as_i64x2(), k);
32404}
32405
32406/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32407///
32408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
32409#[inline]
32410#[target_feature(enable = "avx512f")]
32411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32412#[cfg_attr(test, assert_instr(vpmovsqw))]
32413pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32414    vpmovsqwmem(mem_addr, a.as_i64x8(), k);
32415}
32416
32417/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32418///
32419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
32420#[inline]
32421#[target_feature(enable = "avx512f,avx512vl")]
32422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32423#[cfg_attr(test, assert_instr(vpmovsqw))]
32424pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32425    vpmovsqwmem256(mem_addr, a.as_i64x4(), k);
32426}
32427
32428/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32429///
32430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
32431#[inline]
32432#[target_feature(enable = "avx512f,avx512vl")]
32433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32434#[cfg_attr(test, assert_instr(vpmovsqw))]
32435pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32436    vpmovsqwmem128(mem_addr, a.as_i64x2(), k);
32437}
32438
32439/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32440///
32441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
32442#[inline]
32443#[target_feature(enable = "avx512f")]
32444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32445#[cfg_attr(test, assert_instr(vpmovusqw))]
32446pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32447    vpmovusqwmem(mem_addr, a.as_i64x8(), k);
32448}
32449
32450/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32451///
32452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
32453#[inline]
32454#[target_feature(enable = "avx512f,avx512vl")]
32455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32456#[cfg_attr(test, assert_instr(vpmovusqw))]
32457pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32458    vpmovusqwmem256(mem_addr, a.as_i64x4(), k);
32459}
32460
32461/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32462///
32463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
32464#[inline]
32465#[target_feature(enable = "avx512f,avx512vl")]
32466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32467#[cfg_attr(test, assert_instr(vpmovusqw))]
32468pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32469    vpmovusqwmem128(mem_addr, a.as_i64x2(), k);
32470}
32471
32472/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32473///
32474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
32475#[inline]
32476#[target_feature(enable = "avx512f")]
32477#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32478#[cfg_attr(test, assert_instr(vpmovqb))]
32479pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32480    vpmovqbmem(mem_addr, a.as_i64x8(), k);
32481}
32482
32483/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32484///
32485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
32486#[inline]
32487#[target_feature(enable = "avx512f,avx512vl")]
32488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32489#[cfg_attr(test, assert_instr(vpmovqb))]
32490pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32491    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
32492}
32493
32494/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32495///
32496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
32497#[inline]
32498#[target_feature(enable = "avx512f,avx512vl")]
32499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32500#[cfg_attr(test, assert_instr(vpmovqb))]
32501pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32502    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
32503}
32504
32505/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32506///
32507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
32508#[inline]
32509#[target_feature(enable = "avx512f")]
32510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32511#[cfg_attr(test, assert_instr(vpmovsqb))]
32512pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32513    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
32514}
32515
32516/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32517///
32518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
32519#[inline]
32520#[target_feature(enable = "avx512f,avx512vl")]
32521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32522#[cfg_attr(test, assert_instr(vpmovsqb))]
32523pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32524    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
32525}
32526
32527/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32528///
32529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
32530#[inline]
32531#[target_feature(enable = "avx512f,avx512vl")]
32532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32533#[cfg_attr(test, assert_instr(vpmovsqb))]
32534pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32535    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
32536}
32537
32538/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32539///
32540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
32541#[inline]
32542#[target_feature(enable = "avx512f")]
32543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32544#[cfg_attr(test, assert_instr(vpmovusqb))]
32545pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32546    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
32547}
32548
32549/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32550///
32551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
32552#[inline]
32553#[target_feature(enable = "avx512f,avx512vl")]
32554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32555#[cfg_attr(test, assert_instr(vpmovusqb))]
32556pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32557    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
32558}
32559
32560/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32561///
32562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
32563#[inline]
32564#[target_feature(enable = "avx512f,avx512vl")]
32565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32566#[cfg_attr(test, assert_instr(vpmovusqb))]
32567pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32568    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
32569}
32570
32571///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32572///
32573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
32574#[inline]
32575#[target_feature(enable = "avx512f")]
32576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32577#[cfg_attr(test, assert_instr(vpmovqd))]
32578pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32579    vpmovqdmem(mem_addr, a.as_i64x8(), k);
32580}
32581
32582///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32583///
32584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
32585#[inline]
32586#[target_feature(enable = "avx512f,avx512vl")]
32587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32588#[cfg_attr(test, assert_instr(vpmovqd))]
32589pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32590    vpmovqdmem256(mem_addr, a.as_i64x4(), k);
32591}
32592
32593///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32594///
32595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
32596#[inline]
32597#[target_feature(enable = "avx512f,avx512vl")]
32598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32599#[cfg_attr(test, assert_instr(vpmovqd))]
32600pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32601    vpmovqdmem128(mem_addr, a.as_i64x2(), k);
32602}
32603
32604/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32605///
32606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
32607#[inline]
32608#[target_feature(enable = "avx512f")]
32609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32610#[cfg_attr(test, assert_instr(vpmovsqd))]
32611pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32612    vpmovsqdmem(mem_addr, a.as_i64x8(), k);
32613}
32614
32615/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32616///
32617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
32618#[inline]
32619#[target_feature(enable = "avx512f,avx512vl")]
32620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32621#[cfg_attr(test, assert_instr(vpmovsqd))]
32622pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32623    vpmovsqdmem256(mem_addr, a.as_i64x4(), k);
32624}
32625
32626/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32627///
32628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
32629#[inline]
32630#[target_feature(enable = "avx512f,avx512vl")]
32631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32632#[cfg_attr(test, assert_instr(vpmovsqd))]
32633pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32634    vpmovsqdmem128(mem_addr, a.as_i64x2(), k);
32635}
32636
32637/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32638///
32639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
32640#[inline]
32641#[target_feature(enable = "avx512f")]
32642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32643#[cfg_attr(test, assert_instr(vpmovusqd))]
32644pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
32645    vpmovusqdmem(mem_addr, a.as_i64x8(), k);
32646}
32647
32648/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32649///
32650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
32651#[inline]
32652#[target_feature(enable = "avx512f,avx512vl")]
32653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32654#[cfg_attr(test, assert_instr(vpmovusqd))]
32655pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
32656    vpmovusqdmem256(mem_addr, a.as_i64x4(), k);
32657}
32658
32659/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
32660///
32661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
32662#[inline]
32663#[target_feature(enable = "avx512f,avx512vl")]
32664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32665#[cfg_attr(test, assert_instr(vpmovusqd))]
32666pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
32667    vpmovusqdmem128(mem_addr, a.as_i64x2(), k);
32668}
32669
32670/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32671///
32672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
32673#[inline]
32674#[target_feature(enable = "avx512f")]
32675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32676#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32677pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
32678    ptr::write_unaligned(mem_addr as *mut __m512i, a);
32679}
32680
32681/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32682///
32683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
32684#[inline]
32685#[target_feature(enable = "avx512f,avx512vl")]
32686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32687#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32688pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
32689    ptr::write_unaligned(mem_addr as *mut __m256i, a);
32690}
32691
32692/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32693///
32694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
32695#[inline]
32696#[target_feature(enable = "avx512f,avx512vl")]
32697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32698#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32699pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
32700    ptr::write_unaligned(mem_addr as *mut __m128i, a);
32701}
32702
32703/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32704///
32705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
32706#[inline]
32707#[target_feature(enable = "avx512f")]
32708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32709#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32710pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
32711    ptr::read_unaligned(mem_addr as *const __m512i)
32712}
32713
32714/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32715///
32716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
32717#[inline]
32718#[target_feature(enable = "avx512f,avx512vl")]
32719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32720#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32721pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
32722    ptr::read_unaligned(mem_addr as *const __m256i)
32723}
32724
32725/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32726///
32727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
32728#[inline]
32729#[target_feature(enable = "avx512f,avx512vl")]
32730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32731#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32732pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
32733    ptr::read_unaligned(mem_addr as *const __m128i)
32734}
32735
32736/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32737///
32738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
32739#[inline]
32740#[target_feature(enable = "avx512f")]
32741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32742#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32743pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
32744    ptr::write_unaligned(mem_addr as *mut __m512i, a);
32745}
32746
32747/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32748///
32749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
32750#[inline]
32751#[target_feature(enable = "avx512f,avx512vl")]
32752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32753#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32754pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
32755    ptr::write_unaligned(mem_addr as *mut __m256i, a);
32756}
32757
32758/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
32759///
32760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
32761#[inline]
32762#[target_feature(enable = "avx512f,avx512vl")]
32763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32764#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
32765pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
32766    ptr::write_unaligned(mem_addr as *mut __m128i, a);
32767}
32768
32769/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
32770///
32771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
32772#[inline]
32773#[target_feature(enable = "avx512f")]
32774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32775#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32776pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
32777    ptr::read_unaligned(mem_addr as *const __m512i)
32778}
32779
32780/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
32781///
32782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
32783#[inline]
32784#[target_feature(enable = "avx512f")]
32785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32786#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
32787pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
32788    ptr::write_unaligned(mem_addr, a);
32789}
32790
32791/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
32792/// floating-point elements) from memory into result.
32793/// `mem_addr` does not need to be aligned on any particular boundary.
32794///
32795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
32796#[inline]
32797#[target_feature(enable = "avx512f")]
32798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32799#[cfg_attr(test, assert_instr(vmovups))]
32800pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
32801    ptr::read_unaligned(mem_addr as *const __m512d)
32802}
32803
32804/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
32805/// floating-point elements) from `a` into memory.
32806/// `mem_addr` does not need to be aligned on any particular boundary.
32807///
32808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
32809#[inline]
32810#[target_feature(enable = "avx512f")]
32811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32812#[cfg_attr(test, assert_instr(vmovups))]
32813pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
32814    ptr::write_unaligned(mem_addr as *mut __m512d, a);
32815}
32816
32817/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
32818/// floating-point elements) from memory into result.
32819/// `mem_addr` does not need to be aligned on any particular boundary.
32820///
32821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
32822#[inline]
32823#[target_feature(enable = "avx512f")]
32824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32825#[cfg_attr(test, assert_instr(vmovups))]
32826pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
32827    ptr::read_unaligned(mem_addr as *const __m512)
32828}
32829
32830/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
32831/// floating-point elements) from `a` into memory.
32832/// `mem_addr` does not need to be aligned on any particular boundary.
32833///
32834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
32835#[inline]
32836#[target_feature(enable = "avx512f")]
32837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32838#[cfg_attr(test, assert_instr(vmovups))]
32839pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
32840    ptr::write_unaligned(mem_addr as *mut __m512, a);
32841}
32842
32843/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32844///
32845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
32846#[inline]
32847#[target_feature(enable = "avx512f")]
32848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32849#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32850pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
32851    ptr::read(mem_addr as *const __m512i)
32852}
32853
32854/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32855///
32856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
32857#[inline]
32858#[target_feature(enable = "avx512f")]
32859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32860#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32861pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
32862    ptr::write(mem_addr, a);
32863}
32864
32865/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32866///
32867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
32868#[inline]
32869#[target_feature(enable = "avx512f")]
32870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32871#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32872pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
32873    ptr::read(mem_addr as *const __m512i)
32874}
32875
32876/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32877///
32878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
32879#[inline]
32880#[target_feature(enable = "avx512f,avx512vl")]
32881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32882#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32883pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
32884    ptr::read(mem_addr as *const __m256i)
32885}
32886
32887/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32888///
32889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
32890#[inline]
32891#[target_feature(enable = "avx512f,avx512vl")]
32892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32893#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32894pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
32895    ptr::read(mem_addr as *const __m128i)
32896}
32897
32898/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32899///
32900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
32901#[inline]
32902#[target_feature(enable = "avx512f")]
32903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32904#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32905pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
32906    ptr::write(mem_addr as *mut __m512i, a);
32907}
32908
32909/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32915#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32916pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
32917    ptr::write(mem_addr as *mut __m256i, a);
32918}
32919
32920/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32921///
32922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
32923#[inline]
32924#[target_feature(enable = "avx512f,avx512vl")]
32925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32926#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
32927pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
32928    ptr::write(mem_addr as *mut __m128i, a);
32929}
32930
32931/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32932///
32933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
32934#[inline]
32935#[target_feature(enable = "avx512f")]
32936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32937#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32938pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
32939    ptr::read(mem_addr as *const __m512i)
32940}
32941
32942/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32943///
32944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
32945#[inline]
32946#[target_feature(enable = "avx512f,avx512vl")]
32947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32948#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32949pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
32950    ptr::read(mem_addr as *const __m256i)
32951}
32952
32953/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32954///
32955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
32956#[inline]
32957#[target_feature(enable = "avx512f,avx512vl")]
32958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32959#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32960pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
32961    ptr::read(mem_addr as *const __m128i)
32962}
32963
32964/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32965///
32966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
32967#[inline]
32968#[target_feature(enable = "avx512f")]
32969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32970#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32971pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
32972    ptr::write(mem_addr as *mut __m512i, a);
32973}
32974
32975/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
32976///
32977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
32978#[inline]
32979#[target_feature(enable = "avx512f,avx512vl")]
32980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32981#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32982pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
32983    ptr::write(mem_addr as *mut __m256i, a);
32984}
32985
32986/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
32987///
32988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
32989#[inline]
32990#[target_feature(enable = "avx512f,avx512vl")]
32991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32992#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
32993pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
32994    ptr::write(mem_addr as *mut __m128i, a);
32995}
32996
32997/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
32998///
32999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
33000#[inline]
33001#[target_feature(enable = "avx512f")]
33002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33003#[cfg_attr(test, assert_instr(vmovaps))]
33004pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
33005    ptr::read(mem_addr as *const __m512)
33006}
33007
33008/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33009///
33010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
33011#[inline]
33012#[target_feature(enable = "avx512f")]
33013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33014#[cfg_attr(test, assert_instr(vmovaps))]
33015pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
33016    ptr::write(mem_addr as *mut __m512, a);
33017}
33018
33019/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33020///
33021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
33022#[inline]
33023#[target_feature(enable = "avx512f")]
33024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33025#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
33026pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
33027    ptr::read(mem_addr as *const __m512d)
33028}
33029
33030/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33031///
33032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
33033#[inline]
33034#[target_feature(enable = "avx512f")]
33035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33036#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
33037pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
33038    ptr::write(mem_addr as *mut __m512d, a);
33039}
33040
33041/// Load packed 32-bit integers from memory into dst using writemask k
33042/// (elements are copied from src when the corresponding mask bit is not set).
33043/// mem_addr does not need to be aligned on any particular boundary.
33044///
33045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
33046#[inline]
33047#[target_feature(enable = "avx512f")]
33048#[cfg_attr(test, assert_instr(vmovdqu32))]
33049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33050pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
33051    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
33052}
33053
33054/// Load packed 32-bit integers from memory into dst using zeromask k
33055/// (elements are zeroed out when the corresponding mask bit is not set).
33056/// mem_addr does not need to be aligned on any particular boundary.
33057///
33058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
33059#[inline]
33060#[target_feature(enable = "avx512f")]
33061#[cfg_attr(test, assert_instr(vmovdqu32))]
33062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33063pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
33064    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
33065}
33066
33067/// Load packed 64-bit integers from memory into dst using writemask k
33068/// (elements are copied from src when the corresponding mask bit is not set).
33069/// mem_addr does not need to be aligned on any particular boundary.
33070///
33071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
33072#[inline]
33073#[target_feature(enable = "avx512f")]
33074#[cfg_attr(test, assert_instr(vmovdqu64))]
33075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33076pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
33077    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
33078}
33079
33080/// Load packed 64-bit integers from memory into dst using zeromask k
33081/// (elements are zeroed out when the corresponding mask bit is not set).
33082/// mem_addr does not need to be aligned on any particular boundary.
33083///
33084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
33085#[inline]
33086#[target_feature(enable = "avx512f")]
33087#[cfg_attr(test, assert_instr(vmovdqu64))]
33088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33089pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
33090    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
33091}
33092
33093/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33094/// (elements are copied from src when the corresponding mask bit is not set).
33095/// mem_addr does not need to be aligned on any particular boundary.
33096///
33097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
33098#[inline]
33099#[target_feature(enable = "avx512f")]
33100#[cfg_attr(test, assert_instr(vmovups))]
33101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33102pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
33103    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
33104}
33105
33106/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33107/// (elements are zeroed out when the corresponding mask bit is not set).
33108/// mem_addr does not need to be aligned on any particular boundary.
33109///
33110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
33111#[inline]
33112#[target_feature(enable = "avx512f")]
33113#[cfg_attr(test, assert_instr(vmovups))]
33114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33115pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
33116    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
33117}
33118
33119/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33120/// (elements are copied from src when the corresponding mask bit is not set).
33121/// mem_addr does not need to be aligned on any particular boundary.
33122///
33123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
33124#[inline]
33125#[target_feature(enable = "avx512f")]
33126#[cfg_attr(test, assert_instr(vmovupd))]
33127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33128pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
33129    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
33130}
33131
33132/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33133/// (elements are zeroed out when the corresponding mask bit is not set).
33134/// mem_addr does not need to be aligned on any particular boundary.
33135///
33136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
33137#[inline]
33138#[target_feature(enable = "avx512f")]
33139#[cfg_attr(test, assert_instr(vmovupd))]
33140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33141pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
33142    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
33143}
33144
33145/// Load packed 32-bit integers from memory into dst using writemask k
33146/// (elements are copied from src when the corresponding mask bit is not set).
33147/// mem_addr does not need to be aligned on any particular boundary.
33148///
33149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
33150#[inline]
33151#[target_feature(enable = "avx512f,avx512vl")]
33152#[cfg_attr(test, assert_instr(vmovdqu32))]
33153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33154pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
33155    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
33156}
33157
33158/// Load packed 32-bit integers from memory into dst using zeromask k
33159/// (elements are zeroed out when the corresponding mask bit is not set).
33160/// mem_addr does not need to be aligned on any particular boundary.
33161///
33162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
33163#[inline]
33164#[target_feature(enable = "avx512f,avx512vl")]
33165#[cfg_attr(test, assert_instr(vmovdqu32))]
33166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33167pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
33168    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
33169}
33170
33171/// Load packed 64-bit integers from memory into dst using writemask k
33172/// (elements are copied from src when the corresponding mask bit is not set).
33173/// mem_addr does not need to be aligned on any particular boundary.
33174///
33175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
33176#[inline]
33177#[target_feature(enable = "avx512f,avx512vl")]
33178#[cfg_attr(test, assert_instr(vmovdqu64))]
33179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33180pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
33181    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
33182}
33183
33184/// Load packed 64-bit integers from memory into dst using zeromask k
33185/// (elements are zeroed out when the corresponding mask bit is not set).
33186/// mem_addr does not need to be aligned on any particular boundary.
33187///
33188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
33189#[inline]
33190#[target_feature(enable = "avx512f,avx512vl")]
33191#[cfg_attr(test, assert_instr(vmovdqu64))]
33192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33193pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
33194    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
33195}
33196
33197/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33198/// (elements are copied from src when the corresponding mask bit is not set).
33199/// mem_addr does not need to be aligned on any particular boundary.
33200///
33201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
33202#[inline]
33203#[target_feature(enable = "avx512f,avx512vl")]
33204#[cfg_attr(test, assert_instr(vmovups))]
33205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33206pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
33207    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
33208}
33209
33210/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33211/// (elements are zeroed out when the corresponding mask bit is not set).
33212/// mem_addr does not need to be aligned on any particular boundary.
33213///
33214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
33215#[inline]
33216#[target_feature(enable = "avx512f,avx512vl")]
33217#[cfg_attr(test, assert_instr(vmovups))]
33218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33219pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
33220    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
33221}
33222
33223/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33224/// (elements are copied from src when the corresponding mask bit is not set).
33225/// mem_addr does not need to be aligned on any particular boundary.
33226///
33227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
33228#[inline]
33229#[target_feature(enable = "avx512f,avx512vl")]
33230#[cfg_attr(test, assert_instr(vmovupd))]
33231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33232pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
33233    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
33234}
33235
33236/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33237/// (elements are zeroed out when the corresponding mask bit is not set).
33238/// mem_addr does not need to be aligned on any particular boundary.
33239///
33240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
33241#[inline]
33242#[target_feature(enable = "avx512f,avx512vl")]
33243#[cfg_attr(test, assert_instr(vmovupd))]
33244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33245pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
33246    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
33247}
33248
33249/// Load packed 32-bit integers from memory into dst using writemask k
33250/// (elements are copied from src when the corresponding mask bit is not set).
33251/// mem_addr does not need to be aligned on any particular boundary.
33252///
33253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
33254#[inline]
33255#[target_feature(enable = "avx512f,avx512vl")]
33256#[cfg_attr(test, assert_instr(vmovdqu32))]
33257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33258pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
33259    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
33260}
33261
33262/// Load packed 32-bit integers from memory into dst using zeromask k
33263/// (elements are zeroed out when the corresponding mask bit is not set).
33264/// mem_addr does not need to be aligned on any particular boundary.
33265///
33266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
33267#[inline]
33268#[target_feature(enable = "avx512f,avx512vl")]
33269#[cfg_attr(test, assert_instr(vmovdqu32))]
33270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33271pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
33272    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
33273}
33274
33275/// Load packed 64-bit integers from memory into dst using writemask k
33276/// (elements are copied from src when the corresponding mask bit is not set).
33277/// mem_addr does not need to be aligned on any particular boundary.
33278///
33279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
33280#[inline]
33281#[target_feature(enable = "avx512f,avx512vl")]
33282#[cfg_attr(test, assert_instr(vmovdqu64))]
33283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33284pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
33285    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
33286}
33287
33288/// Load packed 64-bit integers from memory into dst using zeromask k
33289/// (elements are zeroed out when the corresponding mask bit is not set).
33290/// mem_addr does not need to be aligned on any particular boundary.
33291///
33292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
33293#[inline]
33294#[target_feature(enable = "avx512f,avx512vl")]
33295#[cfg_attr(test, assert_instr(vmovdqu64))]
33296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33297pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
33298    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
33299}
33300
33301/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33302/// (elements are copied from src when the corresponding mask bit is not set).
33303/// mem_addr does not need to be aligned on any particular boundary.
33304///
33305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
33306#[inline]
33307#[target_feature(enable = "avx512f,avx512vl")]
33308#[cfg_attr(test, assert_instr(vmovups))]
33309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33310pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33311    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
33312}
33313
33314/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33315/// (elements are zeroed out when the corresponding mask bit is not set).
33316/// mem_addr does not need to be aligned on any particular boundary.
33317///
33318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
33319#[inline]
33320#[target_feature(enable = "avx512f,avx512vl")]
33321#[cfg_attr(test, assert_instr(vmovups))]
33322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33323pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
33324    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
33325}
33326
33327/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33328/// (elements are copied from src when the corresponding mask bit is not set).
33329/// mem_addr does not need to be aligned on any particular boundary.
33330///
33331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
33332#[inline]
33333#[target_feature(enable = "avx512f,avx512vl")]
33334#[cfg_attr(test, assert_instr(vmovupd))]
33335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33336pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33337    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
33338}
33339
33340/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33341/// (elements are zeroed out when the corresponding mask bit is not set).
33342/// mem_addr does not need to be aligned on any particular boundary.
33343///
33344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
33345#[inline]
33346#[target_feature(enable = "avx512f,avx512vl")]
33347#[cfg_attr(test, assert_instr(vmovupd))]
33348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33349pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33350    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
33351}
33352
33353/// Load packed 32-bit integers from memory into dst using writemask k
33354/// (elements are copied from src when the corresponding mask bit is not set).
33355/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33356///
33357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
33358#[inline]
33359#[target_feature(enable = "avx512f")]
33360#[cfg_attr(test, assert_instr(vmovdqa32))]
33361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33362pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
33363    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
33364}
33365
33366/// Load packed 32-bit integers from memory into dst using zeromask k
33367/// (elements are zeroed out when the corresponding mask bit is not set).
33368/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33369///
33370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
33371#[inline]
33372#[target_feature(enable = "avx512f")]
33373#[cfg_attr(test, assert_instr(vmovdqa32))]
33374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33375pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
33376    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
33377}
33378
33379/// Load packed 64-bit integers from memory into dst using writemask k
33380/// (elements are copied from src when the corresponding mask bit is not set).
33381/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33382///
33383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
33384#[inline]
33385#[target_feature(enable = "avx512f")]
33386#[cfg_attr(test, assert_instr(vmovdqa64))]
33387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33388pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
33389    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
33390}
33391
33392/// Load packed 64-bit integers from memory into dst using zeromask k
33393/// (elements are zeroed out when the corresponding mask bit is not set).
33394/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33395///
33396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
33397#[inline]
33398#[target_feature(enable = "avx512f")]
33399#[cfg_attr(test, assert_instr(vmovdqa64))]
33400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33401pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
33402    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
33403}
33404
33405/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33406/// (elements are copied from src when the corresponding mask bit is not set).
33407/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33408///
33409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
33410#[inline]
33411#[target_feature(enable = "avx512f")]
33412#[cfg_attr(test, assert_instr(vmovaps))]
33413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33414pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
33415    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
33416}
33417
33418/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33419/// (elements are zeroed out when the corresponding mask bit is not set).
33420/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33421///
33422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
33423#[inline]
33424#[target_feature(enable = "avx512f")]
33425#[cfg_attr(test, assert_instr(vmovaps))]
33426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33427pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
33428    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
33429}
33430
33431/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33432/// (elements are copied from src when the corresponding mask bit is not set).
33433/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33434///
33435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
33436#[inline]
33437#[target_feature(enable = "avx512f")]
33438#[cfg_attr(test, assert_instr(vmovapd))]
33439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33440pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
33441    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
33442}
33443
33444/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33445/// (elements are zeroed out when the corresponding mask bit is not set).
33446/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[cfg_attr(test, assert_instr(vmovapd))]
33452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33453pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
33454    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
33455}
33456
33457/// Load packed 32-bit integers from memory into dst using writemask k
33458/// (elements are copied from src when the corresponding mask bit is not set).
33459/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33460///
33461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
33462#[inline]
33463#[target_feature(enable = "avx512f,avx512vl")]
33464#[cfg_attr(test, assert_instr(vmovdqa32))]
33465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33466pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
33467    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
33468}
33469
33470/// Load packed 32-bit integers from memory into dst using zeromask k
33471/// (elements are zeroed out when the corresponding mask bit is not set).
33472/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33473///
33474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
33475#[inline]
33476#[target_feature(enable = "avx512f,avx512vl")]
33477#[cfg_attr(test, assert_instr(vmovdqa32))]
33478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33479pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
33480    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
33481}
33482
33483/// Load packed 64-bit integers from memory into dst using writemask k
33484/// (elements are copied from src when the corresponding mask bit is not set).
33485/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33486///
33487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
33488#[inline]
33489#[target_feature(enable = "avx512f,avx512vl")]
33490#[cfg_attr(test, assert_instr(vmovdqa64))]
33491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33492pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
33493    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
33494}
33495
33496/// Load packed 64-bit integers from memory into dst using zeromask k
33497/// (elements are zeroed out when the corresponding mask bit is not set).
33498/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33499///
33500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
33501#[inline]
33502#[target_feature(enable = "avx512f,avx512vl")]
33503#[cfg_attr(test, assert_instr(vmovdqa64))]
33504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33505pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
33506    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
33507}
33508
33509/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33510/// (elements are copied from src when the corresponding mask bit is not set).
33511/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33512///
33513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
33514#[inline]
33515#[target_feature(enable = "avx512f,avx512vl")]
33516#[cfg_attr(test, assert_instr(vmovaps))]
33517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33518pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
33519    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
33520}
33521
33522/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33523/// (elements are zeroed out when the corresponding mask bit is not set).
33524/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33525///
33526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
33527#[inline]
33528#[target_feature(enable = "avx512f,avx512vl")]
33529#[cfg_attr(test, assert_instr(vmovaps))]
33530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33531pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
33532    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
33533}
33534
33535/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33536/// (elements are copied from src when the corresponding mask bit is not set).
33537/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33538///
33539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
33540#[inline]
33541#[target_feature(enable = "avx512f,avx512vl")]
33542#[cfg_attr(test, assert_instr(vmovapd))]
33543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33544pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
33545    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
33546}
33547
33548/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33549/// (elements are zeroed out when the corresponding mask bit is not set).
33550/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33551///
33552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
33553#[inline]
33554#[target_feature(enable = "avx512f,avx512vl")]
33555#[cfg_attr(test, assert_instr(vmovapd))]
33556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33557pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
33558    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
33559}
33560
33561/// Load packed 32-bit integers from memory into dst using writemask k
33562/// (elements are copied from src when the corresponding mask bit is not set).
33563/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33564///
33565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
33566#[inline]
33567#[target_feature(enable = "avx512f,avx512vl")]
33568#[cfg_attr(test, assert_instr(vmovdqa32))]
33569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33570pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
33571    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
33572}
33573
33574/// Load packed 32-bit integers from memory into dst using zeromask k
33575/// (elements are zeroed out when the corresponding mask bit is not set).
33576/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33577///
33578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
33579#[inline]
33580#[target_feature(enable = "avx512f,avx512vl")]
33581#[cfg_attr(test, assert_instr(vmovdqa32))]
33582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33583pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
33584    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
33585}
33586
33587/// Load packed 64-bit integers from memory into dst using writemask k
33588/// (elements are copied from src when the corresponding mask bit is not set).
33589/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33590///
33591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
33592#[inline]
33593#[target_feature(enable = "avx512f,avx512vl")]
33594#[cfg_attr(test, assert_instr(vmovdqa64))]
33595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33596pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
33597    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
33598}
33599
33600/// Load packed 64-bit integers from memory into dst using zeromask k
33601/// (elements are zeroed out when the corresponding mask bit is not set).
33602/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33603///
33604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
33605#[inline]
33606#[target_feature(enable = "avx512f,avx512vl")]
33607#[cfg_attr(test, assert_instr(vmovdqa64))]
33608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33609pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
33610    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
33611}
33612
33613/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
33614/// (elements are copied from src when the corresponding mask bit is not set).
33615/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33616///
33617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
33618#[inline]
33619#[target_feature(enable = "avx512f,avx512vl")]
33620#[cfg_attr(test, assert_instr(vmovaps))]
33621#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33622pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33623    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
33624}
33625
33626/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
33627/// (elements are zeroed out when the corresponding mask bit is not set).
33628/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33629///
33630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
33631#[inline]
33632#[target_feature(enable = "avx512f,avx512vl")]
33633#[cfg_attr(test, assert_instr(vmovaps))]
33634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33635pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
33636    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
33637}
33638
33639/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
33640/// (elements are copied from src when the corresponding mask bit is not set).
33641/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33642///
33643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
33644#[inline]
33645#[target_feature(enable = "avx512f,avx512vl")]
33646#[cfg_attr(test, assert_instr(vmovapd))]
33647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33648pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33649    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
33650}
33651
33652/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
33653/// (elements are zeroed out when the corresponding mask bit is not set).
33654/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33655///
33656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
33657#[inline]
33658#[target_feature(enable = "avx512f,avx512vl")]
33659#[cfg_attr(test, assert_instr(vmovapd))]
33660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33661pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33662    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
33663}
33664
33665/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
33666/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
33667/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
33668/// exception may be generated.
33669///
33670/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
33671#[inline]
33672#[cfg_attr(test, assert_instr(vmovss))]
33673#[target_feature(enable = "avx512f")]
33674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33675pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
33676    let mut dst: __m128 = src;
33677    asm!(
33678        vpl!("vmovss {dst}{{{k}}}"),
33679        p = in(reg) mem_addr,
33680        k = in(kreg) k,
33681        dst = inout(xmm_reg) dst,
33682        options(pure, readonly, nostack, preserves_flags),
33683    );
33684    dst
33685}
33686
33687/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
33688/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
33689/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
33690/// exception may be generated.
33691///
33692/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
33693#[inline]
33694#[cfg_attr(test, assert_instr(vmovss))]
33695#[target_feature(enable = "avx512f")]
33696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33697pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
33698    let mut dst: __m128;
33699    asm!(
33700        vpl!("vmovss {dst}{{{k}}} {{z}}"),
33701        p = in(reg) mem_addr,
33702        k = in(kreg) k,
33703        dst = out(xmm_reg) dst,
33704        options(pure, readonly, nostack, preserves_flags),
33705    );
33706    dst
33707}
33708
33709/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
33710/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
33711/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
33712/// exception may be generated.
33713///
33714/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
33715#[inline]
33716#[cfg_attr(test, assert_instr(vmovsd))]
33717#[target_feature(enable = "avx512f")]
33718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33719pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
33720    let mut dst: __m128d = src;
33721    asm!(
33722        vpl!("vmovsd {dst}{{{k}}}"),
33723        p = in(reg) mem_addr,
33724        k = in(kreg) k,
33725        dst = inout(xmm_reg) dst,
33726        options(pure, readonly, nostack, preserves_flags),
33727    );
33728    dst
33729}
33730
33731/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
33732/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
33733/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
33734/// may be generated.
33735///
33736/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
33737#[inline]
33738#[cfg_attr(test, assert_instr(vmovsd))]
33739#[target_feature(enable = "avx512f")]
33740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33741pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
33742    let mut dst: __m128d;
33743    asm!(
33744        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
33745        p = in(reg) mem_addr,
33746        k = in(kreg) k,
33747        dst = out(xmm_reg) dst,
33748        options(pure, readonly, nostack, preserves_flags),
33749    );
33750    dst
33751}
33752
33753/// Store packed 32-bit integers from a into memory using writemask k.
33754/// mem_addr does not need to be aligned on any particular boundary.
33755///
33756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
33757#[inline]
33758#[target_feature(enable = "avx512f")]
33759#[cfg_attr(test, assert_instr(vmovdqu32))]
33760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33761pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
33762    storedqu32_512(mem_addr, a.as_i32x16(), mask)
33763}
33764
33765/// Store packed 64-bit integers from a into memory using writemask k.
33766/// mem_addr does not need to be aligned on any particular boundary.
33767///
33768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
33769#[inline]
33770#[target_feature(enable = "avx512f")]
33771#[cfg_attr(test, assert_instr(vmovdqu64))]
33772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33773pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
33774    storedqu64_512(mem_addr, a.as_i64x8(), mask)
33775}
33776
33777/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33778/// mem_addr does not need to be aligned on any particular boundary.
33779///
33780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
33781#[inline]
33782#[target_feature(enable = "avx512f")]
33783#[cfg_attr(test, assert_instr(vmovups))]
33784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33785pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
33786    storeups_512(mem_addr, a.as_f32x16(), mask)
33787}
33788
33789/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33790/// mem_addr does not need to be aligned on any particular boundary.
33791///
33792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
33793#[inline]
33794#[target_feature(enable = "avx512f")]
33795#[cfg_attr(test, assert_instr(vmovupd))]
33796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33797pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
33798    storeupd_512(mem_addr, a.as_f64x8(), mask)
33799}
33800
33801/// Store packed 32-bit integers from a into memory using writemask k.
33802/// mem_addr does not need to be aligned on any particular boundary.
33803///
33804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
33805#[inline]
33806#[target_feature(enable = "avx512f,avx512vl")]
33807#[cfg_attr(test, assert_instr(vmovdqu32))]
33808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33809pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
33810    storedqu32_256(mem_addr, a.as_i32x8(), mask)
33811}
33812
33813/// Store packed 64-bit integers from a into memory using writemask k.
33814/// mem_addr does not need to be aligned on any particular boundary.
33815///
33816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
33817#[inline]
33818#[target_feature(enable = "avx512f,avx512vl")]
33819#[cfg_attr(test, assert_instr(vmovdqu64))]
33820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33821pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
33822    storedqu64_256(mem_addr, a.as_i64x4(), mask)
33823}
33824
33825/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33826/// mem_addr does not need to be aligned on any particular boundary.
33827///
33828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
33829#[inline]
33830#[target_feature(enable = "avx512f,avx512vl")]
33831#[cfg_attr(test, assert_instr(vmovups))]
33832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33833pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
33834    storeups_256(mem_addr, a.as_f32x8(), mask)
33835}
33836
33837/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33838/// mem_addr does not need to be aligned on any particular boundary.
33839///
33840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
33841#[inline]
33842#[target_feature(enable = "avx512f,avx512vl")]
33843#[cfg_attr(test, assert_instr(vmovupd))]
33844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33845pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
33846    storeupd_256(mem_addr, a.as_f64x4(), mask)
33847}
33848
33849/// Store packed 32-bit integers from a into memory using writemask k.
33850/// mem_addr does not need to be aligned on any particular boundary.
33851///
33852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
33853#[inline]
33854#[target_feature(enable = "avx512f,avx512vl")]
33855#[cfg_attr(test, assert_instr(vmovdqu32))]
33856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33857pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
33858    storedqu32_128(mem_addr, a.as_i32x4(), mask)
33859}
33860
33861/// Store packed 64-bit integers from a into memory using writemask k.
33862/// mem_addr does not need to be aligned on any particular boundary.
33863///
33864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
33865#[inline]
33866#[target_feature(enable = "avx512f,avx512vl")]
33867#[cfg_attr(test, assert_instr(vmovdqu64))]
33868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33869pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
33870    storedqu64_128(mem_addr, a.as_i64x2(), mask)
33871}
33872
33873/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33874/// mem_addr does not need to be aligned on any particular boundary.
33875///
33876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
33877#[inline]
33878#[target_feature(enable = "avx512f,avx512vl")]
33879#[cfg_attr(test, assert_instr(vmovups))]
33880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33881pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
33882    storeups_128(mem_addr, a.as_f32x4(), mask)
33883}
33884
33885/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33886/// mem_addr does not need to be aligned on any particular boundary.
33887///
33888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
33889#[inline]
33890#[target_feature(enable = "avx512f,avx512vl")]
33891#[cfg_attr(test, assert_instr(vmovupd))]
33892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33893pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
33894    storeupd_128(mem_addr, a.as_f64x2(), mask)
33895}
33896
33897/// Store packed 32-bit integers from a into memory using writemask k.
33898/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33899///
33900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
33901#[inline]
33902#[target_feature(enable = "avx512f")]
33903#[cfg_attr(test, assert_instr(vmovdqa32))]
33904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33905pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
33906    storedqa32_512(mem_addr, a.as_i32x16(), mask)
33907}
33908
33909/// Store packed 64-bit integers from a into memory using writemask k.
33910/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33911///
33912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
33913#[inline]
33914#[target_feature(enable = "avx512f")]
33915#[cfg_attr(test, assert_instr(vmovdqa64))]
33916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33917pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
33918    storedqa64_512(mem_addr, a.as_i64x8(), mask)
33919}
33920
33921/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33922/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33923///
33924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
33925#[inline]
33926#[target_feature(enable = "avx512f")]
33927#[cfg_attr(test, assert_instr(vmovaps))]
33928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33929pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
33930    storeaps_512(mem_addr, a.as_f32x16(), mask)
33931}
33932
33933/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33934/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
33935///
33936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
33937#[inline]
33938#[target_feature(enable = "avx512f")]
33939#[cfg_attr(test, assert_instr(vmovapd))]
33940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33941pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
33942    storeapd_512(mem_addr, a.as_f64x8(), mask)
33943}
33944
33945/// Store packed 32-bit integers from a into memory using writemask k.
33946/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33947///
33948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
33949#[inline]
33950#[target_feature(enable = "avx512f,avx512vl")]
33951#[cfg_attr(test, assert_instr(vmovdqa32))]
33952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33953pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
33954    storedqa32_256(mem_addr, a.as_i32x8(), mask)
33955}
33956
33957/// Store packed 64-bit integers from a into memory using writemask k.
33958/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33959///
33960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
33961#[inline]
33962#[target_feature(enable = "avx512f,avx512vl")]
33963#[cfg_attr(test, assert_instr(vmovdqa64))]
33964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33965pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
33966    storedqa64_256(mem_addr, a.as_i64x4(), mask)
33967}
33968
33969/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
33970/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33971///
33972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
33973#[inline]
33974#[target_feature(enable = "avx512f,avx512vl")]
33975#[cfg_attr(test, assert_instr(vmovaps))]
33976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33977pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
33978    storeaps_256(mem_addr, a.as_f32x8(), mask)
33979}
33980
33981/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
33982/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
33983///
33984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
33985#[inline]
33986#[target_feature(enable = "avx512f,avx512vl")]
33987#[cfg_attr(test, assert_instr(vmovapd))]
33988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33989pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
33990    storeapd_256(mem_addr, a.as_f64x4(), mask)
33991}
33992
33993/// Store packed 32-bit integers from a into memory using writemask k.
33994/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
33995///
33996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
33997#[inline]
33998#[target_feature(enable = "avx512f,avx512vl")]
33999#[cfg_attr(test, assert_instr(vmovdqa32))]
34000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34001pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
34002    storedqa32_128(mem_addr, a.as_i32x4(), mask)
34003}
34004
34005/// Store packed 64-bit integers from a into memory using writemask k.
34006/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34007///
34008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
34009#[inline]
34010#[target_feature(enable = "avx512f,avx512vl")]
34011#[cfg_attr(test, assert_instr(vmovdqa64))]
34012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34013pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
34014    storedqa64_128(mem_addr, a.as_i64x2(), mask)
34015}
34016
34017/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
34018/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34019///
34020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
34021#[inline]
34022#[target_feature(enable = "avx512f,avx512vl")]
34023#[cfg_attr(test, assert_instr(vmovaps))]
34024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34025pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
34026    storeaps_128(mem_addr, a.as_f32x4(), mask)
34027}
34028
34029/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
34030/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34031///
34032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
34033#[inline]
34034#[target_feature(enable = "avx512f,avx512vl")]
34035#[cfg_attr(test, assert_instr(vmovapd))]
34036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34037pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
34038    storeapd_128(mem_addr, a.as_f64x2(), mask)
34039}
34040
34041/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
34042/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34043///
34044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
34045#[inline]
34046#[cfg_attr(test, assert_instr(vmovss))]
34047#[target_feature(enable = "avx512f")]
34048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34049pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
34050    asm!(
34051        vps!("vmovss", "{{{k}}}, {a}"),
34052        p = in(reg) mem_addr,
34053        k = in(kreg) k,
34054        a = in(xmm_reg) a,
34055        options(nostack, preserves_flags),
34056    );
34057}
34058
34059/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
34060/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34061///
34062/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
34063#[inline]
34064#[cfg_attr(test, assert_instr(vmovsd))]
34065#[target_feature(enable = "avx512f")]
34066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34067pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
34068    asm!(
34069        vps!("vmovsd", "{{{k}}}, {a}"),
34070        p = in(reg) mem_addr,
34071        k = in(kreg) k,
34072        a = in(xmm_reg) a,
34073        options(nostack, preserves_flags),
34074    );
34075}
34076
34077/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34078///
34079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
34080#[inline]
34081#[target_feature(enable = "avx512f")]
34082#[cfg_attr(test, assert_instr(vpexpandd))]
34083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34084pub unsafe fn _mm512_mask_expandloadu_epi32(
34085    src: __m512i,
34086    k: __mmask16,
34087    mem_addr: *const i32,
34088) -> __m512i {
34089    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
34090}
34091
34092/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34093///
34094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
34095#[inline]
34096#[target_feature(enable = "avx512f")]
34097#[cfg_attr(test, assert_instr(vpexpandd))]
34098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34099pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34100    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
34101}
34102
34103/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34104///
34105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
34106#[inline]
34107#[target_feature(enable = "avx512f,avx512vl")]
34108#[cfg_attr(test, assert_instr(vpexpandd))]
34109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34110pub unsafe fn _mm256_mask_expandloadu_epi32(
34111    src: __m256i,
34112    k: __mmask8,
34113    mem_addr: *const i32,
34114) -> __m256i {
34115    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
34116}
34117
34118/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34119///
34120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
34121#[inline]
34122#[target_feature(enable = "avx512f,avx512vl")]
34123#[cfg_attr(test, assert_instr(vpexpandd))]
34124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34125pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34126    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
34127}
34128
34129/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34130///
34131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
34132#[inline]
34133#[target_feature(enable = "avx512f,avx512vl")]
34134#[cfg_attr(test, assert_instr(vpexpandd))]
34135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34136pub unsafe fn _mm_mask_expandloadu_epi32(
34137    src: __m128i,
34138    k: __mmask8,
34139    mem_addr: *const i32,
34140) -> __m128i {
34141    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
34142}
34143
34144/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34145///
34146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
34147#[inline]
34148#[target_feature(enable = "avx512f,avx512vl")]
34149#[cfg_attr(test, assert_instr(vpexpandd))]
34150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34151pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34152    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
34153}
34154
34155/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34156///
34157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
34158#[inline]
34159#[target_feature(enable = "avx512f")]
34160#[cfg_attr(test, assert_instr(vpexpandq))]
34161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34162pub unsafe fn _mm512_mask_expandloadu_epi64(
34163    src: __m512i,
34164    k: __mmask8,
34165    mem_addr: *const i64,
34166) -> __m512i {
34167    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
34168}
34169
34170/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34171///
34172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
34173#[inline]
34174#[target_feature(enable = "avx512f")]
34175#[cfg_attr(test, assert_instr(vpexpandq))]
34176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34177pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34178    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
34179}
34180
34181/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34182///
34183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
34184#[inline]
34185#[target_feature(enable = "avx512f,avx512vl")]
34186#[cfg_attr(test, assert_instr(vpexpandq))]
34187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34188pub unsafe fn _mm256_mask_expandloadu_epi64(
34189    src: __m256i,
34190    k: __mmask8,
34191    mem_addr: *const i64,
34192) -> __m256i {
34193    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
34194}
34195
34196/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34197///
34198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
34199#[inline]
34200#[target_feature(enable = "avx512f,avx512vl")]
34201#[cfg_attr(test, assert_instr(vpexpandq))]
34202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34203pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34204    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
34205}
34206
34207/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34208///
34209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
34210#[inline]
34211#[target_feature(enable = "avx512f,avx512vl")]
34212#[cfg_attr(test, assert_instr(vpexpandq))]
34213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34214pub unsafe fn _mm_mask_expandloadu_epi64(
34215    src: __m128i,
34216    k: __mmask8,
34217    mem_addr: *const i64,
34218) -> __m128i {
34219    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
34220}
34221
34222/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34223///
34224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
34225#[inline]
34226#[target_feature(enable = "avx512f,avx512vl")]
34227#[cfg_attr(test, assert_instr(vpexpandq))]
34228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34229pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34230    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
34231}
34232
34233/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34234///
34235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
34236#[inline]
34237#[target_feature(enable = "avx512f")]
34238#[cfg_attr(test, assert_instr(vexpandps))]
34239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34240pub unsafe fn _mm512_mask_expandloadu_ps(
34241    src: __m512,
34242    k: __mmask16,
34243    mem_addr: *const f32,
34244) -> __m512 {
34245    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
34246}
34247
34248/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34249///
34250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
34251#[inline]
34252#[target_feature(enable = "avx512f")]
34253#[cfg_attr(test, assert_instr(vexpandps))]
34254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34255pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34256    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
34257}
34258
34259/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34260///
34261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
34262#[inline]
34263#[target_feature(enable = "avx512f,avx512vl")]
34264#[cfg_attr(test, assert_instr(vexpandps))]
34265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34266pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34267    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
34268}
34269
34270/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34271///
34272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
34273#[inline]
34274#[target_feature(enable = "avx512f,avx512vl")]
34275#[cfg_attr(test, assert_instr(vexpandps))]
34276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34277pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34278    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
34279}
34280
34281/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34282///
34283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
34284#[inline]
34285#[target_feature(enable = "avx512f,avx512vl")]
34286#[cfg_attr(test, assert_instr(vexpandps))]
34287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34288pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34289    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
34290}
34291
34292/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34293///
34294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
34295#[inline]
34296#[target_feature(enable = "avx512f,avx512vl")]
34297#[cfg_attr(test, assert_instr(vexpandps))]
34298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34299pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34300    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
34301}
34302
34303/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34304///
34305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
34306#[inline]
34307#[target_feature(enable = "avx512f")]
34308#[cfg_attr(test, assert_instr(vexpandpd))]
34309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34310pub unsafe fn _mm512_mask_expandloadu_pd(
34311    src: __m512d,
34312    k: __mmask8,
34313    mem_addr: *const f64,
34314) -> __m512d {
34315    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
34316}
34317
34318/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34319///
34320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
34321#[inline]
34322#[target_feature(enable = "avx512f")]
34323#[cfg_attr(test, assert_instr(vexpandpd))]
34324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34325pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34326    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
34327}
34328
34329/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34330///
34331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
34332#[inline]
34333#[target_feature(enable = "avx512f,avx512vl")]
34334#[cfg_attr(test, assert_instr(vexpandpd))]
34335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34336pub unsafe fn _mm256_mask_expandloadu_pd(
34337    src: __m256d,
34338    k: __mmask8,
34339    mem_addr: *const f64,
34340) -> __m256d {
34341    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
34342}
34343
34344/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34345///
34346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
34347#[inline]
34348#[target_feature(enable = "avx512f,avx512vl")]
34349#[cfg_attr(test, assert_instr(vexpandpd))]
34350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34351pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34352    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
34353}
34354
34355/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
34356///
34357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
34358#[inline]
34359#[target_feature(enable = "avx512f,avx512vl")]
34360#[cfg_attr(test, assert_instr(vexpandpd))]
34361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34362pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34363    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
34364}
34365
34366/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
34367///
34368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
34369#[inline]
34370#[target_feature(enable = "avx512f,avx512vl")]
34371#[cfg_attr(test, assert_instr(vexpandpd))]
34372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34373pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
34374    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
34375}
34376
34377/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
34378///
34379/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
34380#[inline]
34381#[target_feature(enable = "avx512f")]
34382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34383pub unsafe fn _mm512_setr_pd(
34384    e0: f64,
34385    e1: f64,
34386    e2: f64,
34387    e3: f64,
34388    e4: f64,
34389    e5: f64,
34390    e6: f64,
34391    e7: f64,
34392) -> __m512d {
34393    let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
34394    transmute(r)
34395}
34396
34397/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
34398///
34399/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
34400#[inline]
34401#[target_feature(enable = "avx512f")]
34402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34403pub unsafe fn _mm512_set_pd(
34404    e0: f64,
34405    e1: f64,
34406    e2: f64,
34407    e3: f64,
34408    e4: f64,
34409    e5: f64,
34410    e6: f64,
34411    e7: f64,
34412) -> __m512d {
34413    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
34414}
34415
34416/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34417///
34418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
34419#[inline]
34420#[target_feature(enable = "avx512f")]
34421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34422#[cfg_attr(test, assert_instr(vmovss))]
34423pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34424    let extractsrc: f32 = simd_extract!(src, 0);
34425    let mut mov: f32 = extractsrc;
34426    if (k & 0b00000001) != 0 {
34427        mov = simd_extract!(b, 0);
34428    }
34429    simd_insert!(a, 0, mov)
34430}
34431
34432/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34433///
34434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
34435#[inline]
34436#[target_feature(enable = "avx512f")]
34437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34438#[cfg_attr(test, assert_instr(vmovss))]
34439pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34440    let mut mov: f32 = 0.;
34441    if (k & 0b00000001) != 0 {
34442        mov = simd_extract!(b, 0);
34443    }
34444    simd_insert!(a, 0, mov)
34445}
34446
34447/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34448///
34449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
34450#[inline]
34451#[target_feature(enable = "avx512f")]
34452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34453#[cfg_attr(test, assert_instr(vmovsd))]
34454pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34455    let extractsrc: f64 = simd_extract!(src, 0);
34456    let mut mov: f64 = extractsrc;
34457    if (k & 0b00000001) != 0 {
34458        mov = simd_extract!(b, 0);
34459    }
34460    simd_insert!(a, 0, mov)
34461}
34462
34463/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34464///
34465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
34466#[inline]
34467#[target_feature(enable = "avx512f")]
34468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34469#[cfg_attr(test, assert_instr(vmovsd))]
34470pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34471    let mut mov: f64 = 0.;
34472    if (k & 0b00000001) != 0 {
34473        mov = simd_extract!(b, 0);
34474    }
34475    simd_insert!(a, 0, mov)
34476}
34477
34478/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34479///
34480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
34481#[inline]
34482#[target_feature(enable = "avx512f")]
34483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34484#[cfg_attr(test, assert_instr(vaddss))]
34485pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34486    let extractsrc: f32 = simd_extract!(src, 0);
34487    let mut add: f32 = extractsrc;
34488    if (k & 0b00000001) != 0 {
34489        let extracta: f32 = simd_extract!(a, 0);
34490        let extractb: f32 = simd_extract!(b, 0);
34491        add = extracta + extractb;
34492    }
34493    simd_insert!(a, 0, add)
34494}
34495
34496/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34497///
34498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
34499#[inline]
34500#[target_feature(enable = "avx512f")]
34501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34502#[cfg_attr(test, assert_instr(vaddss))]
34503pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34504    let mut add: f32 = 0.;
34505    if (k & 0b00000001) != 0 {
34506        let extracta: f32 = simd_extract!(a, 0);
34507        let extractb: f32 = simd_extract!(b, 0);
34508        add = extracta + extractb;
34509    }
34510    simd_insert!(a, 0, add)
34511}
34512
34513/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34514///
34515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
34516#[inline]
34517#[target_feature(enable = "avx512f")]
34518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34519#[cfg_attr(test, assert_instr(vaddsd))]
34520pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34521    let extractsrc: f64 = simd_extract!(src, 0);
34522    let mut add: f64 = extractsrc;
34523    if (k & 0b00000001) != 0 {
34524        let extracta: f64 = simd_extract!(a, 0);
34525        let extractb: f64 = simd_extract!(b, 0);
34526        add = extracta + extractb;
34527    }
34528    simd_insert!(a, 0, add)
34529}
34530
34531/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34532///
34533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
34534#[inline]
34535#[target_feature(enable = "avx512f")]
34536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34537#[cfg_attr(test, assert_instr(vaddsd))]
34538pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34539    let mut add: f64 = 0.;
34540    if (k & 0b00000001) != 0 {
34541        let extracta: f64 = simd_extract!(a, 0);
34542        let extractb: f64 = simd_extract!(b, 0);
34543        add = extracta + extractb;
34544    }
34545    simd_insert!(a, 0, add)
34546}
34547
34548/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34549///
34550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
34551#[inline]
34552#[target_feature(enable = "avx512f")]
34553#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34554#[cfg_attr(test, assert_instr(vsubss))]
34555pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34556    let extractsrc: f32 = simd_extract!(src, 0);
34557    let mut add: f32 = extractsrc;
34558    if (k & 0b00000001) != 0 {
34559        let extracta: f32 = simd_extract!(a, 0);
34560        let extractb: f32 = simd_extract!(b, 0);
34561        add = extracta - extractb;
34562    }
34563    simd_insert!(a, 0, add)
34564}
34565
34566/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34567///
34568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
34569#[inline]
34570#[target_feature(enable = "avx512f")]
34571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34572#[cfg_attr(test, assert_instr(vsubss))]
34573pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34574    let mut add: f32 = 0.;
34575    if (k & 0b00000001) != 0 {
34576        let extracta: f32 = simd_extract!(a, 0);
34577        let extractb: f32 = simd_extract!(b, 0);
34578        add = extracta - extractb;
34579    }
34580    simd_insert!(a, 0, add)
34581}
34582
34583/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34584///
34585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
34586#[inline]
34587#[target_feature(enable = "avx512f")]
34588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34589#[cfg_attr(test, assert_instr(vsubsd))]
34590pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34591    let extractsrc: f64 = simd_extract!(src, 0);
34592    let mut add: f64 = extractsrc;
34593    if (k & 0b00000001) != 0 {
34594        let extracta: f64 = simd_extract!(a, 0);
34595        let extractb: f64 = simd_extract!(b, 0);
34596        add = extracta - extractb;
34597    }
34598    simd_insert!(a, 0, add)
34599}
34600
34601/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34602///
34603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
34604#[inline]
34605#[target_feature(enable = "avx512f")]
34606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34607#[cfg_attr(test, assert_instr(vsubsd))]
34608pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34609    let mut add: f64 = 0.;
34610    if (k & 0b00000001) != 0 {
34611        let extracta: f64 = simd_extract!(a, 0);
34612        let extractb: f64 = simd_extract!(b, 0);
34613        add = extracta - extractb;
34614    }
34615    simd_insert!(a, 0, add)
34616}
34617
34618/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34619///
34620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
34621#[inline]
34622#[target_feature(enable = "avx512f")]
34623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34624#[cfg_attr(test, assert_instr(vmulss))]
34625pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34626    let extractsrc: f32 = simd_extract!(src, 0);
34627    let mut add: f32 = extractsrc;
34628    if (k & 0b00000001) != 0 {
34629        let extracta: f32 = simd_extract!(a, 0);
34630        let extractb: f32 = simd_extract!(b, 0);
34631        add = extracta * extractb;
34632    }
34633    simd_insert!(a, 0, add)
34634}
34635
34636/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34637///
34638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
34639#[inline]
34640#[target_feature(enable = "avx512f")]
34641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34642#[cfg_attr(test, assert_instr(vmulss))]
34643pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34644    let mut add: f32 = 0.;
34645    if (k & 0b00000001) != 0 {
34646        let extracta: f32 = simd_extract!(a, 0);
34647        let extractb: f32 = simd_extract!(b, 0);
34648        add = extracta * extractb;
34649    }
34650    simd_insert!(a, 0, add)
34651}
34652
34653/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34654///
34655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
34656#[inline]
34657#[target_feature(enable = "avx512f")]
34658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34659#[cfg_attr(test, assert_instr(vmulsd))]
34660pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34661    let extractsrc: f64 = simd_extract!(src, 0);
34662    let mut add: f64 = extractsrc;
34663    if (k & 0b00000001) != 0 {
34664        let extracta: f64 = simd_extract!(a, 0);
34665        let extractb: f64 = simd_extract!(b, 0);
34666        add = extracta * extractb;
34667    }
34668    simd_insert!(a, 0, add)
34669}
34670
34671/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34672///
34673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
34674#[inline]
34675#[target_feature(enable = "avx512f")]
34676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34677#[cfg_attr(test, assert_instr(vmulsd))]
34678pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34679    let mut add: f64 = 0.;
34680    if (k & 0b00000001) != 0 {
34681        let extracta: f64 = simd_extract!(a, 0);
34682        let extractb: f64 = simd_extract!(b, 0);
34683        add = extracta * extractb;
34684    }
34685    simd_insert!(a, 0, add)
34686}
34687
34688/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34689///
34690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
34691#[inline]
34692#[target_feature(enable = "avx512f")]
34693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34694#[cfg_attr(test, assert_instr(vdivss))]
34695pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34696    let extractsrc: f32 = simd_extract!(src, 0);
34697    let mut add: f32 = extractsrc;
34698    if (k & 0b00000001) != 0 {
34699        let extracta: f32 = simd_extract!(a, 0);
34700        let extractb: f32 = simd_extract!(b, 0);
34701        add = extracta / extractb;
34702    }
34703    simd_insert!(a, 0, add)
34704}
34705
34706/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34707///
34708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
34709#[inline]
34710#[target_feature(enable = "avx512f")]
34711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34712#[cfg_attr(test, assert_instr(vdivss))]
34713pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34714    let mut add: f32 = 0.;
34715    if (k & 0b00000001) != 0 {
34716        let extracta: f32 = simd_extract!(a, 0);
34717        let extractb: f32 = simd_extract!(b, 0);
34718        add = extracta / extractb;
34719    }
34720    simd_insert!(a, 0, add)
34721}
34722
34723/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34724///
34725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
34726#[inline]
34727#[target_feature(enable = "avx512f")]
34728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34729#[cfg_attr(test, assert_instr(vdivsd))]
34730pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34731    let extractsrc: f64 = simd_extract!(src, 0);
34732    let mut add: f64 = extractsrc;
34733    if (k & 0b00000001) != 0 {
34734        let extracta: f64 = simd_extract!(a, 0);
34735        let extractb: f64 = simd_extract!(b, 0);
34736        add = extracta / extractb;
34737    }
34738    simd_insert!(a, 0, add)
34739}
34740
34741/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34742///
34743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
34744#[inline]
34745#[target_feature(enable = "avx512f")]
34746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34747#[cfg_attr(test, assert_instr(vdivsd))]
34748pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34749    let mut add: f64 = 0.;
34750    if (k & 0b00000001) != 0 {
34751        let extracta: f64 = simd_extract!(a, 0);
34752        let extractb: f64 = simd_extract!(b, 0);
34753        add = extracta / extractb;
34754    }
34755    simd_insert!(a, 0, add)
34756}
34757
34758/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34759///
34760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
34761#[inline]
34762#[target_feature(enable = "avx512f")]
34763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34764#[cfg_attr(test, assert_instr(vmaxss))]
34765pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34766    transmute(vmaxss(
34767        a.as_f32x4(),
34768        b.as_f32x4(),
34769        src.as_f32x4(),
34770        k,
34771        _MM_FROUND_CUR_DIRECTION,
34772    ))
34773}
34774
34775/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34776///
34777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
34778#[inline]
34779#[target_feature(enable = "avx512f")]
34780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34781#[cfg_attr(test, assert_instr(vmaxss))]
34782pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34783    transmute(vmaxss(
34784        a.as_f32x4(),
34785        b.as_f32x4(),
34786        f32x4::ZERO,
34787        k,
34788        _MM_FROUND_CUR_DIRECTION,
34789    ))
34790}
34791
34792/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34798#[cfg_attr(test, assert_instr(vmaxsd))]
34799pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34800    transmute(vmaxsd(
34801        a.as_f64x2(),
34802        b.as_f64x2(),
34803        src.as_f64x2(),
34804        k,
34805        _MM_FROUND_CUR_DIRECTION,
34806    ))
34807}
34808
34809/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34810///
34811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
34812#[inline]
34813#[target_feature(enable = "avx512f")]
34814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34815#[cfg_attr(test, assert_instr(vmaxsd))]
34816pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34817    transmute(vmaxsd(
34818        a.as_f64x2(),
34819        b.as_f64x2(),
34820        f64x2::ZERO,
34821        k,
34822        _MM_FROUND_CUR_DIRECTION,
34823    ))
34824}
34825
34826/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34827///
34828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
34829#[inline]
34830#[target_feature(enable = "avx512f")]
34831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34832#[cfg_attr(test, assert_instr(vminss))]
34833pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34834    transmute(vminss(
34835        a.as_f32x4(),
34836        b.as_f32x4(),
34837        src.as_f32x4(),
34838        k,
34839        _MM_FROUND_CUR_DIRECTION,
34840    ))
34841}
34842
34843/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34844///
34845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
34846#[inline]
34847#[target_feature(enable = "avx512f")]
34848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34849#[cfg_attr(test, assert_instr(vminss))]
34850pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34851    transmute(vminss(
34852        a.as_f32x4(),
34853        b.as_f32x4(),
34854        f32x4::ZERO,
34855        k,
34856        _MM_FROUND_CUR_DIRECTION,
34857    ))
34858}
34859
34860/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34861///
34862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
34863#[inline]
34864#[target_feature(enable = "avx512f")]
34865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34866#[cfg_attr(test, assert_instr(vminsd))]
34867pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34868    transmute(vminsd(
34869        a.as_f64x2(),
34870        b.as_f64x2(),
34871        src.as_f64x2(),
34872        k,
34873        _MM_FROUND_CUR_DIRECTION,
34874    ))
34875}
34876
34877/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34878///
34879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
34880#[inline]
34881#[target_feature(enable = "avx512f")]
34882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34883#[cfg_attr(test, assert_instr(vminsd))]
34884pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34885    transmute(vminsd(
34886        a.as_f64x2(),
34887        b.as_f64x2(),
34888        f64x2::ZERO,
34889        k,
34890        _MM_FROUND_CUR_DIRECTION,
34891    ))
34892}
34893
34894/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34895///
34896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
34897#[inline]
34898#[target_feature(enable = "avx512f")]
34899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34900#[cfg_attr(test, assert_instr(vsqrtss))]
34901pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34902    vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
34903}
34904
34905/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
34906///
34907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
34908#[inline]
34909#[target_feature(enable = "avx512f")]
34910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34911#[cfg_attr(test, assert_instr(vsqrtss))]
34912pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34913    vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION)
34914}
34915
34916/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34917///
34918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
34919#[inline]
34920#[target_feature(enable = "avx512f")]
34921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34922#[cfg_attr(test, assert_instr(vsqrtsd))]
34923pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34924    vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
34925}
34926
34927/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
34928///
34929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
34930#[inline]
34931#[target_feature(enable = "avx512f")]
34932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34933#[cfg_attr(test, assert_instr(vsqrtsd))]
34934pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34935    vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION)
34936}
34937
34938/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
34939///
34940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
34941#[inline]
34942#[target_feature(enable = "avx512f")]
34943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34944#[cfg_attr(test, assert_instr(vrsqrt14ss))]
34945pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
34946    transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1))
34947}
34948
34949/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
34950///
34951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
34952#[inline]
34953#[target_feature(enable = "avx512f")]
34954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955#[cfg_attr(test, assert_instr(vrsqrt14ss))]
34956pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34957    transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
34958}
34959
34960/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
34961///
34962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
34963#[inline]
34964#[target_feature(enable = "avx512f")]
34965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34966#[cfg_attr(test, assert_instr(vrsqrt14ss))]
34967pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34968    transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k))
34969}
34970
34971/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
34972///
34973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
34974#[inline]
34975#[target_feature(enable = "avx512f")]
34976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34977#[cfg_attr(test, assert_instr(vrsqrt14sd))]
34978pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
34979    transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1))
34980}
34981
34982/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
34983///
34984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
34985#[inline]
34986#[target_feature(enable = "avx512f")]
34987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34988#[cfg_attr(test, assert_instr(vrsqrt14sd))]
34989pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34990    transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
34991}
34992
34993/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
34994///
34995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
34996#[inline]
34997#[target_feature(enable = "avx512f")]
34998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34999#[cfg_attr(test, assert_instr(vrsqrt14sd))]
35000pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35001    transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k))
35002}
35003
35004/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35005///
35006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
35007#[inline]
35008#[target_feature(enable = "avx512f")]
35009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35010#[cfg_attr(test, assert_instr(vrcp14ss))]
35011pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
35012    transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1))
35013}
35014
35015/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35016///
35017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
35018#[inline]
35019#[target_feature(enable = "avx512f")]
35020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35021#[cfg_attr(test, assert_instr(vrcp14ss))]
35022pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35023    transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
35024}
35025
35026/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
35027///
35028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
35029#[inline]
35030#[target_feature(enable = "avx512f")]
35031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35032#[cfg_attr(test, assert_instr(vrcp14ss))]
35033pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35034    transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k))
35035}
35036
35037/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35038///
35039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
35040#[inline]
35041#[target_feature(enable = "avx512f")]
35042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35043#[cfg_attr(test, assert_instr(vrcp14sd))]
35044pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
35045    transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1))
35046}
35047
35048/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35049///
35050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
35051#[inline]
35052#[target_feature(enable = "avx512f")]
35053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35054#[cfg_attr(test, assert_instr(vrcp14sd))]
35055pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35056    transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
35057}
35058
35059/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
35060///
35061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
35062#[inline]
35063#[target_feature(enable = "avx512f")]
35064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35065#[cfg_attr(test, assert_instr(vrcp14sd))]
35066pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35067    transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k))
35068}
35069
35070/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35071///
35072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
35073#[inline]
35074#[target_feature(enable = "avx512f")]
35075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35076#[cfg_attr(test, assert_instr(vgetexpss))]
35077pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
35078    transmute(vgetexpss(
35079        a.as_f32x4(),
35080        b.as_f32x4(),
35081        f32x4::ZERO,
35082        0b1,
35083        _MM_FROUND_NO_EXC,
35084    ))
35085}
35086
35087/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35088///
35089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
35090#[inline]
35091#[target_feature(enable = "avx512f")]
35092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35093#[cfg_attr(test, assert_instr(vgetexpss))]
35094pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35095    transmute(vgetexpss(
35096        a.as_f32x4(),
35097        b.as_f32x4(),
35098        src.as_f32x4(),
35099        k,
35100        _MM_FROUND_NO_EXC,
35101    ))
35102}
35103
35104/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35105///
35106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
35107#[inline]
35108#[target_feature(enable = "avx512f")]
35109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35110#[cfg_attr(test, assert_instr(vgetexpss))]
35111pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35112    transmute(vgetexpss(
35113        a.as_f32x4(),
35114        b.as_f32x4(),
35115        f32x4::ZERO,
35116        k,
35117        _MM_FROUND_NO_EXC,
35118    ))
35119}
35120
35121/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35122///
35123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
35124#[inline]
35125#[target_feature(enable = "avx512f")]
35126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35127#[cfg_attr(test, assert_instr(vgetexpsd))]
35128pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
35129    transmute(vgetexpsd(
35130        a.as_f64x2(),
35131        b.as_f64x2(),
35132        f64x2::ZERO,
35133        0b1,
35134        _MM_FROUND_NO_EXC,
35135    ))
35136}
35137
35138/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35139///
35140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
35141#[inline]
35142#[target_feature(enable = "avx512f")]
35143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35144#[cfg_attr(test, assert_instr(vgetexpsd))]
35145pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35146    transmute(vgetexpsd(
35147        a.as_f64x2(),
35148        b.as_f64x2(),
35149        src.as_f64x2(),
35150        k,
35151        _MM_FROUND_NO_EXC,
35152    ))
35153}
35154
35155/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
35156///
35157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
35158#[inline]
35159#[target_feature(enable = "avx512f")]
35160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35161#[cfg_attr(test, assert_instr(vgetexpsd))]
35162pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35163    transmute(vgetexpsd(
35164        a.as_f64x2(),
35165        b.as_f64x2(),
35166        f64x2::ZERO,
35167        k,
35168        _MM_FROUND_NO_EXC,
35169    ))
35170}
35171
35172/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35173/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35174///    _MM_MANT_NORM_1_2     // interval [1, 2)\
35175///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
35176///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
35177///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35178/// The sign is determined by sc which can take the following values:\
35179///    _MM_MANT_SIGN_src     // sign = sign(src)\
35180///    _MM_MANT_SIGN_zero    // sign = 0\
35181///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
35182/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
35185#[inline]
35186#[target_feature(enable = "avx512f")]
35187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35188#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
35189#[rustc_legacy_const_generics(2, 3)]
35190pub unsafe fn _mm_getmant_ss<
35191    const NORM: _MM_MANTISSA_NORM_ENUM,
35192    const SIGN: _MM_MANTISSA_SIGN_ENUM,
35193>(
35194    a: __m128,
35195    b: __m128,
35196) -> __m128 {
35197    static_assert_uimm_bits!(NORM, 4);
35198    static_assert_uimm_bits!(SIGN, 2);
35199    let a = a.as_f32x4();
35200    let b = b.as_f32x4();
35201    let r = vgetmantss(
35202        a,
35203        b,
35204        SIGN << 2 | NORM,
35205        f32x4::ZERO,
35206        0b1,
35207        _MM_FROUND_CUR_DIRECTION,
35208    );
35209    transmute(r)
35210}
35211
35212/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35213/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35214///    _MM_MANT_NORM_1_2     // interval [1, 2)\
35215///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
35216///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
35217///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35218/// The sign is determined by sc which can take the following values:\
35219///    _MM_MANT_SIGN_src     // sign = sign(src)\
35220///    _MM_MANT_SIGN_zero    // sign = 0\
35221///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
35222/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35223///
35224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
35225#[inline]
35226#[target_feature(enable = "avx512f")]
35227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35228#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
35229#[rustc_legacy_const_generics(4, 5)]
35230pub unsafe fn _mm_mask_getmant_ss<
35231    const NORM: _MM_MANTISSA_NORM_ENUM,
35232    const SIGN: _MM_MANTISSA_SIGN_ENUM,
35233>(
35234    src: __m128,
35235    k: __mmask8,
35236    a: __m128,
35237    b: __m128,
35238) -> __m128 {
35239    static_assert_uimm_bits!(NORM, 4);
35240    static_assert_uimm_bits!(SIGN, 2);
35241    let a = a.as_f32x4();
35242    let b = b.as_f32x4();
35243    let src = src.as_f32x4();
35244    let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
35245    transmute(r)
35246}
35247
35248/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35249/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35250///    _MM_MANT_NORM_1_2     // interval [1, 2)\
35251///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
35252///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
35253///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35254/// The sign is determined by sc which can take the following values:\
35255///    _MM_MANT_SIGN_src     // sign = sign(src)\
35256///    _MM_MANT_SIGN_zero    // sign = 0\
35257///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
35258/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35259///
35260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
35261#[inline]
35262#[target_feature(enable = "avx512f")]
35263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35264#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
35265#[rustc_legacy_const_generics(3, 4)]
35266pub unsafe fn _mm_maskz_getmant_ss<
35267    const NORM: _MM_MANTISSA_NORM_ENUM,
35268    const SIGN: _MM_MANTISSA_SIGN_ENUM,
35269>(
35270    k: __mmask8,
35271    a: __m128,
35272    b: __m128,
35273) -> __m128 {
35274    static_assert_uimm_bits!(NORM, 4);
35275    static_assert_uimm_bits!(SIGN, 2);
35276    let a = a.as_f32x4();
35277    let b = b.as_f32x4();
35278    let r = vgetmantss(
35279        a,
35280        b,
35281        SIGN << 2 | NORM,
35282        f32x4::ZERO,
35283        k,
35284        _MM_FROUND_CUR_DIRECTION,
35285    );
35286    transmute(r)
35287}
35288
35289/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35290/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35291///    _MM_MANT_NORM_1_2     // interval [1, 2)\
35292///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
35293///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
35294///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35295/// The sign is determined by sc which can take the following values:\
35296///    _MM_MANT_SIGN_src     // sign = sign(src)\
35297///    _MM_MANT_SIGN_zero    // sign = 0\
35298///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
35299/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35300///
35301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
35302#[inline]
35303#[target_feature(enable = "avx512f")]
35304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35305#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
35306#[rustc_legacy_const_generics(2, 3)]
35307pub unsafe fn _mm_getmant_sd<
35308    const NORM: _MM_MANTISSA_NORM_ENUM,
35309    const SIGN: _MM_MANTISSA_SIGN_ENUM,
35310>(
35311    a: __m128d,
35312    b: __m128d,
35313) -> __m128d {
35314    static_assert_uimm_bits!(NORM, 4);
35315    static_assert_uimm_bits!(SIGN, 2);
35316    let a = a.as_f64x2();
35317    let b = b.as_f64x2();
35318    let r = vgetmantsd(
35319        a,
35320        b,
35321        SIGN << 2 | NORM,
35322        f64x2::ZERO,
35323        0b1,
35324        _MM_FROUND_CUR_DIRECTION,
35325    );
35326    transmute(r)
35327}
35328
35329/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35330/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35331///    _MM_MANT_NORM_1_2     // interval [1, 2)\
35332///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
35333///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
35334///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35335/// The sign is determined by sc which can take the following values:\
35336///    _MM_MANT_SIGN_src     // sign = sign(src)\
35337///    _MM_MANT_SIGN_zero    // sign = 0\
35338///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
35339/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35340///
35341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
35342#[inline]
35343#[target_feature(enable = "avx512f")]
35344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35345#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
35346#[rustc_legacy_const_generics(4, 5)]
35347pub unsafe fn _mm_mask_getmant_sd<
35348    const NORM: _MM_MANTISSA_NORM_ENUM,
35349    const SIGN: _MM_MANTISSA_SIGN_ENUM,
35350>(
35351    src: __m128d,
35352    k: __mmask8,
35353    a: __m128d,
35354    b: __m128d,
35355) -> __m128d {
35356    static_assert_uimm_bits!(NORM, 4);
35357    static_assert_uimm_bits!(SIGN, 2);
35358    let a = a.as_f64x2();
35359    let b = b.as_f64x2();
35360    let src = src.as_f64x2();
35361    let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
35362    transmute(r)
35363}
35364
35365/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
35366/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
35367///    _MM_MANT_NORM_1_2     // interval [1, 2)\
35368///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
35369///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
35370///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
35371/// The sign is determined by sc which can take the following values:\
35372///    _MM_MANT_SIGN_src     // sign = sign(src)\
35373///    _MM_MANT_SIGN_zero    // sign = 0\
35374///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
35375/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
35376///
35377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
35378#[inline]
35379#[target_feature(enable = "avx512f")]
35380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35381#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
35382#[rustc_legacy_const_generics(3, 4)]
35383pub unsafe fn _mm_maskz_getmant_sd<
35384    const NORM: _MM_MANTISSA_NORM_ENUM,
35385    const SIGN: _MM_MANTISSA_SIGN_ENUM,
35386>(
35387    k: __mmask8,
35388    a: __m128d,
35389    b: __m128d,
35390) -> __m128d {
35391    static_assert_uimm_bits!(NORM, 4);
35392    static_assert_uimm_bits!(SIGN, 2);
35393    let a = a.as_f64x2();
35394    let b = b.as_f64x2();
35395    let r = vgetmantsd(
35396        a,
35397        b,
35398        SIGN << 2 | NORM,
35399        f64x2::ZERO,
35400        k,
35401        _MM_FROUND_CUR_DIRECTION,
35402    );
35403    transmute(r)
35404}
35405
35406/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
35407/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35408/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
35409/// * [`_MM_FROUND_TO_NEG_INF`] : round down
35410/// * [`_MM_FROUND_TO_POS_INF`] : round up
35411/// * [`_MM_FROUND_TO_ZERO`] : truncate
35412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
35413///
35414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
35415#[inline]
35416#[target_feature(enable = "avx512f")]
35417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35418#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
35419#[rustc_legacy_const_generics(2)]
35420pub unsafe fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
35421    static_assert_uimm_bits!(IMM8, 8);
35422    let a = a.as_f32x4();
35423    let b = b.as_f32x4();
35424    let r = vrndscaless(
35425        a,
35426        b,
35427        f32x4::ZERO,
35428        0b11111111,
35429        IMM8,
35430        _MM_FROUND_CUR_DIRECTION,
35431    );
35432    transmute(r)
35433}
35434
35435/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
35436/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35437/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
35438/// * [`_MM_FROUND_TO_NEG_INF`] : round down
35439/// * [`_MM_FROUND_TO_POS_INF`] : round up
35440/// * [`_MM_FROUND_TO_ZERO`] : truncate
35441/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
35442///
35443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
35444#[inline]
35445#[target_feature(enable = "avx512f")]
35446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35447#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
35448#[rustc_legacy_const_generics(4)]
35449pub unsafe fn _mm_mask_roundscale_ss<const IMM8: i32>(
35450    src: __m128,
35451    k: __mmask8,
35452    a: __m128,
35453    b: __m128,
35454) -> __m128 {
35455    static_assert_uimm_bits!(IMM8, 8);
35456    let a = a.as_f32x4();
35457    let b = b.as_f32x4();
35458    let src = src.as_f32x4();
35459    let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
35460    transmute(r)
35461}
35462
35463/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
35464/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35465/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
35466/// * [`_MM_FROUND_TO_NEG_INF`] : round down
35467/// * [`_MM_FROUND_TO_POS_INF`] : round up
35468/// * [`_MM_FROUND_TO_ZERO`] : truncate
35469/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
35470///
35471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
35472#[inline]
35473#[target_feature(enable = "avx512f")]
35474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35475#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
35476#[rustc_legacy_const_generics(3)]
35477pub unsafe fn _mm_maskz_roundscale_ss<const IMM8: i32>(
35478    k: __mmask8,
35479    a: __m128,
35480    b: __m128,
35481) -> __m128 {
35482    static_assert_uimm_bits!(IMM8, 8);
35483    let a = a.as_f32x4();
35484    let b = b.as_f32x4();
35485    let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
35486    transmute(r)
35487}
35488
35489/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
35490/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35491/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
35492/// * [`_MM_FROUND_TO_NEG_INF`] : round down
35493/// * [`_MM_FROUND_TO_POS_INF`] : round up
35494/// * [`_MM_FROUND_TO_ZERO`] : truncate
35495/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
35496///
35497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
35498#[inline]
35499#[target_feature(enable = "avx512f")]
35500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35501#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
35502#[rustc_legacy_const_generics(2)]
35503pub unsafe fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
35504    static_assert_uimm_bits!(IMM8, 8);
35505    let a = a.as_f64x2();
35506    let b = b.as_f64x2();
35507    let r = vrndscalesd(
35508        a,
35509        b,
35510        f64x2::ZERO,
35511        0b11111111,
35512        IMM8,
35513        _MM_FROUND_CUR_DIRECTION,
35514    );
35515    transmute(r)
35516}
35517
35518/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
35519/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35520/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
35521/// * [`_MM_FROUND_TO_NEG_INF`] : round down
35522/// * [`_MM_FROUND_TO_POS_INF`] : round up
35523/// * [`_MM_FROUND_TO_ZERO`] : truncate
35524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
35525///
35526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
35527#[inline]
35528#[target_feature(enable = "avx512f")]
35529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35530#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
35531#[rustc_legacy_const_generics(4)]
35532pub unsafe fn _mm_mask_roundscale_sd<const IMM8: i32>(
35533    src: __m128d,
35534    k: __mmask8,
35535    a: __m128d,
35536    b: __m128d,
35537) -> __m128d {
35538    static_assert_uimm_bits!(IMM8, 8);
35539    let a = a.as_f64x2();
35540    let b = b.as_f64x2();
35541    let src = src.as_f64x2();
35542    let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
35543    transmute(r)
35544}
35545
35546/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
35547/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
35548/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
35549/// * [`_MM_FROUND_TO_NEG_INF`] : round down
35550/// * [`_MM_FROUND_TO_POS_INF`] : round up
35551/// * [`_MM_FROUND_TO_ZERO`] : truncate
35552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
35553///
35554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
35555#[inline]
35556#[target_feature(enable = "avx512f")]
35557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35558#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
35559#[rustc_legacy_const_generics(3)]
35560pub unsafe fn _mm_maskz_roundscale_sd<const IMM8: i32>(
35561    k: __mmask8,
35562    a: __m128d,
35563    b: __m128d,
35564) -> __m128d {
35565    static_assert_uimm_bits!(IMM8, 8);
35566    let a = a.as_f64x2();
35567    let b = b.as_f64x2();
35568    let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
35569    transmute(r)
35570}
35571
35572/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
35573///
35574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
35575#[inline]
35576#[target_feature(enable = "avx512f")]
35577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35578#[cfg_attr(test, assert_instr(vscalefss))]
35579pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
35580    let a = a.as_f32x4();
35581    let b = b.as_f32x4();
35582    transmute(vscalefss(
35583        a,
35584        b,
35585        f32x4::ZERO,
35586        0b11111111,
35587        _MM_FROUND_CUR_DIRECTION,
35588    ))
35589}
35590
35591/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35592///
35593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
35594#[inline]
35595#[target_feature(enable = "avx512f")]
35596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35597#[cfg_attr(test, assert_instr(vscalefss))]
35598pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
35599    let a = a.as_f32x4();
35600    let b = b.as_f32x4();
35601    let src = src.as_f32x4();
35602    transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
35603}
35604
35605/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35606///
35607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
35608#[inline]
35609#[target_feature(enable = "avx512f")]
35610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35611#[cfg_attr(test, assert_instr(vscalefss))]
35612pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
35613    transmute(vscalefss(
35614        a.as_f32x4(),
35615        b.as_f32x4(),
35616        f32x4::ZERO,
35617        k,
35618        _MM_FROUND_CUR_DIRECTION,
35619    ))
35620}
35621
35622/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
35623///
35624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
35625#[inline]
35626#[target_feature(enable = "avx512f")]
35627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35628#[cfg_attr(test, assert_instr(vscalefsd))]
35629pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
35630    transmute(vscalefsd(
35631        a.as_f64x2(),
35632        b.as_f64x2(),
35633        f64x2::ZERO,
35634        0b11111111,
35635        _MM_FROUND_CUR_DIRECTION,
35636    ))
35637}
35638
35639/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35640///
35641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
35642#[inline]
35643#[target_feature(enable = "avx512f")]
35644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35645#[cfg_attr(test, assert_instr(vscalefsd))]
35646pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35647    transmute(vscalefsd(
35648        a.as_f64x2(),
35649        b.as_f64x2(),
35650        src.as_f64x2(),
35651        k,
35652        _MM_FROUND_CUR_DIRECTION,
35653    ))
35654}
35655
35656/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35657///
35658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
35659#[inline]
35660#[target_feature(enable = "avx512f")]
35661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35662#[cfg_attr(test, assert_instr(vscalefsd))]
35663pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
35664    transmute(vscalefsd(
35665        a.as_f64x2(),
35666        b.as_f64x2(),
35667        f64x2::ZERO,
35668        k,
35669        _MM_FROUND_CUR_DIRECTION,
35670    ))
35671}
35672
35673/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35674///
35675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
35676#[inline]
35677#[target_feature(enable = "avx512f")]
35678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35679#[cfg_attr(test, assert_instr(vfmadd))]
35680pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
35681    let mut fmadd: f32 = simd_extract!(a, 0);
35682    if (k & 0b00000001) != 0 {
35683        let extractb: f32 = simd_extract!(b, 0);
35684        let extractc: f32 = simd_extract!(c, 0);
35685        fmadd = fmaf32(fmadd, extractb, extractc);
35686    }
35687    simd_insert!(a, 0, fmadd)
35688}
35689
35690/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35691///
35692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
35693#[inline]
35694#[target_feature(enable = "avx512f")]
35695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35696#[cfg_attr(test, assert_instr(vfmadd))]
35697pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
35698    let mut fmadd: f32 = 0.;
35699    if (k & 0b00000001) != 0 {
35700        let extracta: f32 = simd_extract!(a, 0);
35701        let extractb: f32 = simd_extract!(b, 0);
35702        let extractc: f32 = simd_extract!(c, 0);
35703        fmadd = fmaf32(extracta, extractb, extractc);
35704    }
35705    simd_insert!(a, 0, fmadd)
35706}
35707
35708/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
35709///
35710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
35711#[inline]
35712#[target_feature(enable = "avx512f")]
35713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35714#[cfg_attr(test, assert_instr(vfmadd))]
35715pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
35716    let mut fmadd: f32 = simd_extract!(c, 0);
35717    if (k & 0b00000001) != 0 {
35718        let extracta: f32 = simd_extract!(a, 0);
35719        let extractb: f32 = simd_extract!(b, 0);
35720        fmadd = fmaf32(extracta, extractb, fmadd);
35721    }
35722    simd_insert!(c, 0, fmadd)
35723}
35724
35725/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35726///
35727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
35728#[inline]
35729#[target_feature(enable = "avx512f")]
35730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35731#[cfg_attr(test, assert_instr(vfmadd))]
35732pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
35733    let mut fmadd: f64 = simd_extract!(a, 0);
35734    if (k & 0b00000001) != 0 {
35735        let extractb: f64 = simd_extract!(b, 0);
35736        let extractc: f64 = simd_extract!(c, 0);
35737        fmadd = fmaf64(fmadd, extractb, extractc);
35738    }
35739    simd_insert!(a, 0, fmadd)
35740}
35741
35742/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35743///
35744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
35745#[inline]
35746#[target_feature(enable = "avx512f")]
35747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35748#[cfg_attr(test, assert_instr(vfmadd))]
35749pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
35750    let mut fmadd: f64 = 0.;
35751    if (k & 0b00000001) != 0 {
35752        let extracta: f64 = simd_extract!(a, 0);
35753        let extractb: f64 = simd_extract!(b, 0);
35754        let extractc: f64 = simd_extract!(c, 0);
35755        fmadd = fmaf64(extracta, extractb, extractc);
35756    }
35757    simd_insert!(a, 0, fmadd)
35758}
35759
35760/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
35761///
35762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
35763#[inline]
35764#[target_feature(enable = "avx512f")]
35765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35766#[cfg_attr(test, assert_instr(vfmadd))]
35767pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
35768    let mut fmadd: f64 = simd_extract!(c, 0);
35769    if (k & 0b00000001) != 0 {
35770        let extracta: f64 = simd_extract!(a, 0);
35771        let extractb: f64 = simd_extract!(b, 0);
35772        fmadd = fmaf64(extracta, extractb, fmadd);
35773    }
35774    simd_insert!(c, 0, fmadd)
35775}
35776
35777/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
35778///
35779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
35780#[inline]
35781#[target_feature(enable = "avx512f")]
35782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35783#[cfg_attr(test, assert_instr(vfmsub))]
35784pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
35785    let mut fmsub: f32 = simd_extract!(a, 0);
35786    if (k & 0b00000001) != 0 {
35787        let extractb: f32 = simd_extract!(b, 0);
35788        let extractc: f32 = simd_extract!(c, 0);
35789        let extractc = -extractc;
35790        fmsub = fmaf32(fmsub, extractb, extractc);
35791    }
35792    simd_insert!(a, 0, fmsub)
35793}
35794
35795/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35796///
35797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
35798#[inline]
35799#[target_feature(enable = "avx512f")]
35800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35801#[cfg_attr(test, assert_instr(vfmsub))]
35802pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
35803    let mut fmsub: f32 = 0.;
35804    if (k & 0b00000001) != 0 {
35805        let extracta: f32 = simd_extract!(a, 0);
35806        let extractb: f32 = simd_extract!(b, 0);
35807        let extractc: f32 = simd_extract!(c, 0);
35808        let extractc = -extractc;
35809        fmsub = fmaf32(extracta, extractb, extractc);
35810    }
35811    simd_insert!(a, 0, fmsub)
35812}
35813
35814/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
35815///
35816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
35817#[inline]
35818#[target_feature(enable = "avx512f")]
35819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35820#[cfg_attr(test, assert_instr(vfmsub))]
35821pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
35822    let mut fmsub: f32 = simd_extract!(c, 0);
35823    if (k & 0b00000001) != 0 {
35824        let extracta: f32 = simd_extract!(a, 0);
35825        let extractb: f32 = simd_extract!(b, 0);
35826        let extractc = -fmsub;
35827        fmsub = fmaf32(extracta, extractb, extractc);
35828    }
35829    simd_insert!(c, 0, fmsub)
35830}
35831
35832/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35833///
35834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
35835#[inline]
35836#[target_feature(enable = "avx512f")]
35837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35838#[cfg_attr(test, assert_instr(vfmsub))]
35839pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
35840    let mut fmsub: f64 = simd_extract!(a, 0);
35841    if (k & 0b00000001) != 0 {
35842        let extractb: f64 = simd_extract!(b, 0);
35843        let extractc: f64 = simd_extract!(c, 0);
35844        let extractc = -extractc;
35845        fmsub = fmaf64(fmsub, extractb, extractc);
35846    }
35847    simd_insert!(a, 0, fmsub)
35848}
35849
35850/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35851///
35852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
35853#[inline]
35854#[target_feature(enable = "avx512f")]
35855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35856#[cfg_attr(test, assert_instr(vfmsub))]
35857pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
35858    let mut fmsub: f64 = 0.;
35859    if (k & 0b00000001) != 0 {
35860        let extracta: f64 = simd_extract!(a, 0);
35861        let extractb: f64 = simd_extract!(b, 0);
35862        let extractc: f64 = simd_extract!(c, 0);
35863        let extractc = -extractc;
35864        fmsub = fmaf64(extracta, extractb, extractc);
35865    }
35866    simd_insert!(a, 0, fmsub)
35867}
35868
35869/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
35870///
35871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
35872#[inline]
35873#[target_feature(enable = "avx512f")]
35874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35875#[cfg_attr(test, assert_instr(vfmsub))]
35876pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
35877    let mut fmsub: f64 = simd_extract!(c, 0);
35878    if (k & 0b00000001) != 0 {
35879        let extracta: f64 = simd_extract!(a, 0);
35880        let extractb: f64 = simd_extract!(b, 0);
35881        let extractc = -fmsub;
35882        fmsub = fmaf64(extracta, extractb, extractc);
35883    }
35884    simd_insert!(c, 0, fmsub)
35885}
35886
35887/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35888///
35889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
35890#[inline]
35891#[target_feature(enable = "avx512f")]
35892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35893#[cfg_attr(test, assert_instr(vfnmadd))]
35894pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
35895    let mut fnmadd: f32 = simd_extract!(a, 0);
35896    if (k & 0b00000001) != 0 {
35897        let extracta = -fnmadd;
35898        let extractb: f32 = simd_extract!(b, 0);
35899        let extractc: f32 = simd_extract!(c, 0);
35900        fnmadd = fmaf32(extracta, extractb, extractc);
35901    }
35902    simd_insert!(a, 0, fnmadd)
35903}
35904
35905/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35906///
35907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
35908#[inline]
35909#[target_feature(enable = "avx512f")]
35910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35911#[cfg_attr(test, assert_instr(vfnmadd))]
35912pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
35913    let mut fnmadd: f32 = 0.;
35914    if (k & 0b00000001) != 0 {
35915        let extracta: f32 = simd_extract!(a, 0);
35916        let extracta = -extracta;
35917        let extractb: f32 = simd_extract!(b, 0);
35918        let extractc: f32 = simd_extract!(c, 0);
35919        fnmadd = fmaf32(extracta, extractb, extractc);
35920    }
35921    simd_insert!(a, 0, fnmadd)
35922}
35923
35924/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
35925///
35926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
35927#[inline]
35928#[target_feature(enable = "avx512f")]
35929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35930#[cfg_attr(test, assert_instr(vfnmadd))]
35931pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
35932    let mut fnmadd: f32 = simd_extract!(c, 0);
35933    if (k & 0b00000001) != 0 {
35934        let extracta: f32 = simd_extract!(a, 0);
35935        let extracta = -extracta;
35936        let extractb: f32 = simd_extract!(b, 0);
35937        fnmadd = fmaf32(extracta, extractb, fnmadd);
35938    }
35939    simd_insert!(c, 0, fnmadd)
35940}
35941
35942/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35943///
35944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
35945#[inline]
35946#[target_feature(enable = "avx512f")]
35947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35948#[cfg_attr(test, assert_instr(vfnmadd))]
35949pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
35950    let mut fnmadd: f64 = simd_extract!(a, 0);
35951    if (k & 0b00000001) != 0 {
35952        let extracta = -fnmadd;
35953        let extractb: f64 = simd_extract!(b, 0);
35954        let extractc: f64 = simd_extract!(c, 0);
35955        fnmadd = fmaf64(extracta, extractb, extractc);
35956    }
35957    simd_insert!(a, 0, fnmadd)
35958}
35959
35960/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
35961///
35962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
35963#[inline]
35964#[target_feature(enable = "avx512f")]
35965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35966#[cfg_attr(test, assert_instr(vfnmadd))]
35967pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
35968    let mut fnmadd: f64 = 0.;
35969    if (k & 0b00000001) != 0 {
35970        let extracta: f64 = simd_extract!(a, 0);
35971        let extracta = -extracta;
35972        let extractb: f64 = simd_extract!(b, 0);
35973        let extractc: f64 = simd_extract!(c, 0);
35974        fnmadd = fmaf64(extracta, extractb, extractc);
35975    }
35976    simd_insert!(a, 0, fnmadd)
35977}
35978
35979/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
35980///
35981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
35982#[inline]
35983#[target_feature(enable = "avx512f")]
35984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35985#[cfg_attr(test, assert_instr(vfnmadd))]
35986pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
35987    let mut fnmadd: f64 = simd_extract!(c, 0);
35988    if (k & 0b00000001) != 0 {
35989        let extracta: f64 = simd_extract!(a, 0);
35990        let extracta = -extracta;
35991        let extractb: f64 = simd_extract!(b, 0);
35992        fnmadd = fmaf64(extracta, extractb, fnmadd);
35993    }
35994    simd_insert!(c, 0, fnmadd)
35995}
35996
35997/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
35998///
35999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
36000#[inline]
36001#[target_feature(enable = "avx512f")]
36002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36003#[cfg_attr(test, assert_instr(vfnmsub))]
36004pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
36005    let mut fnmsub: f32 = simd_extract!(a, 0);
36006    if (k & 0b00000001) != 0 {
36007        let extracta = -fnmsub;
36008        let extractb: f32 = simd_extract!(b, 0);
36009        let extractc: f32 = simd_extract!(c, 0);
36010        let extractc = -extractc;
36011        fnmsub = fmaf32(extracta, extractb, extractc);
36012    }
36013    simd_insert!(a, 0, fnmsub)
36014}
36015
36016/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36017///
36018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
36019#[inline]
36020#[target_feature(enable = "avx512f")]
36021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36022#[cfg_attr(test, assert_instr(vfnmsub))]
36023pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
36024    let mut fnmsub: f32 = 0.;
36025    if (k & 0b00000001) != 0 {
36026        let extracta: f32 = simd_extract!(a, 0);
36027        let extracta = -extracta;
36028        let extractb: f32 = simd_extract!(b, 0);
36029        let extractc: f32 = simd_extract!(c, 0);
36030        let extractc = -extractc;
36031        fnmsub = fmaf32(extracta, extractb, extractc);
36032    }
36033    simd_insert!(a, 0, fnmsub)
36034}
36035
36036/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
36037///
36038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
36039#[inline]
36040#[target_feature(enable = "avx512f")]
36041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36042#[cfg_attr(test, assert_instr(vfnmsub))]
36043pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
36044    let mut fnmsub: f32 = simd_extract!(c, 0);
36045    if (k & 0b00000001) != 0 {
36046        let extracta: f32 = simd_extract!(a, 0);
36047        let extracta = -extracta;
36048        let extractb: f32 = simd_extract!(b, 0);
36049        let extractc = -fnmsub;
36050        fnmsub = fmaf32(extracta, extractb, extractc);
36051    }
36052    simd_insert!(c, 0, fnmsub)
36053}
36054
36055/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36056///
36057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
36058#[inline]
36059#[target_feature(enable = "avx512f")]
36060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36061#[cfg_attr(test, assert_instr(vfnmsub))]
36062pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
36063    let mut fnmsub: f64 = simd_extract!(a, 0);
36064    if (k & 0b00000001) != 0 {
36065        let extracta = -fnmsub;
36066        let extractb: f64 = simd_extract!(b, 0);
36067        let extractc: f64 = simd_extract!(c, 0);
36068        let extractc = -extractc;
36069        fnmsub = fmaf64(extracta, extractb, extractc);
36070    }
36071    simd_insert!(a, 0, fnmsub)
36072}
36073
36074/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36075///
36076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
36077#[inline]
36078#[target_feature(enable = "avx512f")]
36079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36080#[cfg_attr(test, assert_instr(vfnmsub))]
36081pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
36082    let mut fnmsub: f64 = 0.;
36083    if (k & 0b00000001) != 0 {
36084        let extracta: f64 = simd_extract!(a, 0);
36085        let extracta = -extracta;
36086        let extractb: f64 = simd_extract!(b, 0);
36087        let extractc: f64 = simd_extract!(c, 0);
36088        let extractc = -extractc;
36089        fnmsub = fmaf64(extracta, extractb, extractc);
36090    }
36091    simd_insert!(a, 0, fnmsub)
36092}
36093
36094/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
36095///
36096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
36097#[inline]
36098#[target_feature(enable = "avx512f")]
36099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36100#[cfg_attr(test, assert_instr(vfnmsub))]
36101pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
36102    let mut fnmsub: f64 = simd_extract!(c, 0);
36103    if (k & 0b00000001) != 0 {
36104        let extracta: f64 = simd_extract!(a, 0);
36105        let extracta = -extracta;
36106        let extractb: f64 = simd_extract!(b, 0);
36107        let extractc = -fnmsub;
36108        fnmsub = fmaf64(extracta, extractb, extractc);
36109    }
36110    simd_insert!(c, 0, fnmsub)
36111}
36112
36113/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36114///
36115/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36116/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36117/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36118/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36119/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36120/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36121///
36122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
36123#[inline]
36124#[target_feature(enable = "avx512f")]
36125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36126#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
36127#[rustc_legacy_const_generics(2)]
36128pub unsafe fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36129    static_assert_rounding!(ROUNDING);
36130    let a = a.as_f32x4();
36131    let b = b.as_f32x4();
36132    let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
36133    transmute(r)
36134}
36135
36136/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36137///
36138/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36139/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36140/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36141/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36142/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36143/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36144///
36145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
36146#[inline]
36147#[target_feature(enable = "avx512f")]
36148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36149#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
36150#[rustc_legacy_const_generics(4)]
36151pub unsafe fn _mm_mask_add_round_ss<const ROUNDING: i32>(
36152    src: __m128,
36153    k: __mmask8,
36154    a: __m128,
36155    b: __m128,
36156) -> __m128 {
36157    static_assert_rounding!(ROUNDING);
36158    let a = a.as_f32x4();
36159    let b = b.as_f32x4();
36160    let src = src.as_f32x4();
36161    let r = vaddss(a, b, src, k, ROUNDING);
36162    transmute(r)
36163}
36164
36165/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36166///
36167/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36168/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36169/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36170/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36171/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36172/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36173///
36174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
36175#[inline]
36176#[target_feature(enable = "avx512f")]
36177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36178#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
36179#[rustc_legacy_const_generics(3)]
36180pub unsafe fn _mm_maskz_add_round_ss<const ROUNDING: i32>(
36181    k: __mmask8,
36182    a: __m128,
36183    b: __m128,
36184) -> __m128 {
36185    static_assert_rounding!(ROUNDING);
36186    let a = a.as_f32x4();
36187    let b = b.as_f32x4();
36188    let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
36189    transmute(r)
36190}
36191
36192/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36193///
36194/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36195/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36196/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36197/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36198/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36199/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36200///
36201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
36202#[inline]
36203#[target_feature(enable = "avx512f")]
36204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36205#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
36206#[rustc_legacy_const_generics(2)]
36207pub unsafe fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36208    static_assert_rounding!(ROUNDING);
36209    let a = a.as_f64x2();
36210    let b = b.as_f64x2();
36211    let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
36212    transmute(r)
36213}
36214
36215/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36216///
36217/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36218/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36219/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36220/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36221/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36222/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36223///
36224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
36225#[inline]
36226#[target_feature(enable = "avx512f")]
36227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36228#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
36229#[rustc_legacy_const_generics(4)]
36230pub unsafe fn _mm_mask_add_round_sd<const ROUNDING: i32>(
36231    src: __m128d,
36232    k: __mmask8,
36233    a: __m128d,
36234    b: __m128d,
36235) -> __m128d {
36236    static_assert_rounding!(ROUNDING);
36237    let a = a.as_f64x2();
36238    let b = b.as_f64x2();
36239    let src = src.as_f64x2();
36240    let r = vaddsd(a, b, src, k, ROUNDING);
36241    transmute(r)
36242}
36243
36244/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36245///
36246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36252///
36253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
36254#[inline]
36255#[target_feature(enable = "avx512f")]
36256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36257#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
36258#[rustc_legacy_const_generics(3)]
36259pub unsafe fn _mm_maskz_add_round_sd<const ROUNDING: i32>(
36260    k: __mmask8,
36261    a: __m128d,
36262    b: __m128d,
36263) -> __m128d {
36264    static_assert_rounding!(ROUNDING);
36265    let a = a.as_f64x2();
36266    let b = b.as_f64x2();
36267    let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
36268    transmute(r)
36269}
36270
36271/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36272///
36273/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36274/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36275/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36276/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36277/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36278/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36279///
36280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
36281#[inline]
36282#[target_feature(enable = "avx512f")]
36283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36284#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
36285#[rustc_legacy_const_generics(2)]
36286pub unsafe fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36287    static_assert_rounding!(ROUNDING);
36288    let a = a.as_f32x4();
36289    let b = b.as_f32x4();
36290    let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
36291    transmute(r)
36292}
36293
36294/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36295///
36296/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36297/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36298/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36299/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36300/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36301/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36302///
36303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
36304#[inline]
36305#[target_feature(enable = "avx512f")]
36306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36307#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
36308#[rustc_legacy_const_generics(4)]
36309pub unsafe fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
36310    src: __m128,
36311    k: __mmask8,
36312    a: __m128,
36313    b: __m128,
36314) -> __m128 {
36315    static_assert_rounding!(ROUNDING);
36316    let a = a.as_f32x4();
36317    let b = b.as_f32x4();
36318    let src = src.as_f32x4();
36319    let r = vsubss(a, b, src, k, ROUNDING);
36320    transmute(r)
36321}
36322
36323/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36324///
36325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36331///
36332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
36333#[inline]
36334#[target_feature(enable = "avx512f")]
36335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36336#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
36337#[rustc_legacy_const_generics(3)]
36338pub unsafe fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(
36339    k: __mmask8,
36340    a: __m128,
36341    b: __m128,
36342) -> __m128 {
36343    static_assert_rounding!(ROUNDING);
36344    let a = a.as_f32x4();
36345    let b = b.as_f32x4();
36346    let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
36347    transmute(r)
36348}
36349
36350/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36351///
36352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36358///
36359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
36360#[inline]
36361#[target_feature(enable = "avx512f")]
36362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36363#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
36364#[rustc_legacy_const_generics(2)]
36365pub unsafe fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36366    static_assert_rounding!(ROUNDING);
36367    let a = a.as_f64x2();
36368    let b = b.as_f64x2();
36369    let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
36370    transmute(r)
36371}
36372
36373/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36374///
36375/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36376/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36377/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36378/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36379/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36380/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36386#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
36387#[rustc_legacy_const_generics(4)]
36388pub unsafe fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
36389    src: __m128d,
36390    k: __mmask8,
36391    a: __m128d,
36392    b: __m128d,
36393) -> __m128d {
36394    static_assert_rounding!(ROUNDING);
36395    let a = a.as_f64x2();
36396    let b = b.as_f64x2();
36397    let src = src.as_f64x2();
36398    let r = vsubsd(a, b, src, k, ROUNDING);
36399    transmute(r)
36400}
36401
36402/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36403///
36404/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36405/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36406/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36407/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36408/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36409/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36410///
36411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
36412#[inline]
36413#[target_feature(enable = "avx512f")]
36414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36415#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
36416#[rustc_legacy_const_generics(3)]
36417pub unsafe fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(
36418    k: __mmask8,
36419    a: __m128d,
36420    b: __m128d,
36421) -> __m128d {
36422    static_assert_rounding!(ROUNDING);
36423    let a = a.as_f64x2();
36424    let b = b.as_f64x2();
36425    let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
36426    transmute(r)
36427}
36428
36429/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36430///
36431/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36437///
36438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
36439#[inline]
36440#[target_feature(enable = "avx512f")]
36441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36442#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
36443#[rustc_legacy_const_generics(2)]
36444pub unsafe fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36445    static_assert_rounding!(ROUNDING);
36446    let a = a.as_f32x4();
36447    let b = b.as_f32x4();
36448    let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
36449    transmute(r)
36450}
36451
36452/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36453///
36454/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36455/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36456/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36457/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36458/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36459/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36460///
36461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
36462#[inline]
36463#[target_feature(enable = "avx512f")]
36464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36465#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
36466#[rustc_legacy_const_generics(4)]
36467pub unsafe fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
36468    src: __m128,
36469    k: __mmask8,
36470    a: __m128,
36471    b: __m128,
36472) -> __m128 {
36473    static_assert_rounding!(ROUNDING);
36474    let a = a.as_f32x4();
36475    let b = b.as_f32x4();
36476    let src = src.as_f32x4();
36477    let r = vmulss(a, b, src, k, ROUNDING);
36478    transmute(r)
36479}
36480
36481/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36482///
36483/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36484/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36485/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36486/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36487/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36488/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36489///
36490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
36491#[inline]
36492#[target_feature(enable = "avx512f")]
36493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36494#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
36495#[rustc_legacy_const_generics(3)]
36496pub unsafe fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(
36497    k: __mmask8,
36498    a: __m128,
36499    b: __m128,
36500) -> __m128 {
36501    static_assert_rounding!(ROUNDING);
36502    let a = a.as_f32x4();
36503    let b = b.as_f32x4();
36504    let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
36505    transmute(r)
36506}
36507
36508/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36509///
36510/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36511/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36512/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36513/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36514/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36515/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36516///
36517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
36518#[inline]
36519#[target_feature(enable = "avx512f")]
36520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36521#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
36522#[rustc_legacy_const_generics(2)]
36523pub unsafe fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36524    static_assert_rounding!(ROUNDING);
36525    let a = a.as_f64x2();
36526    let b = b.as_f64x2();
36527    let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
36528    transmute(r)
36529}
36530
36531/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36532///
36533/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36534/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36535/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36536/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36537/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36538/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36539///
36540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
36541#[inline]
36542#[target_feature(enable = "avx512f")]
36543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36544#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
36545#[rustc_legacy_const_generics(4)]
36546pub unsafe fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
36547    src: __m128d,
36548    k: __mmask8,
36549    a: __m128d,
36550    b: __m128d,
36551) -> __m128d {
36552    static_assert_rounding!(ROUNDING);
36553    let a = a.as_f64x2();
36554    let b = b.as_f64x2();
36555    let src = src.as_f64x2();
36556    let r = vmulsd(a, b, src, k, ROUNDING);
36557    transmute(r)
36558}
36559
36560/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36561///
36562/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36563/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36564/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36565/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36566/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36567/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36568///
36569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
36570#[inline]
36571#[target_feature(enable = "avx512f")]
36572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36573#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
36574#[rustc_legacy_const_generics(3)]
36575pub unsafe fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(
36576    k: __mmask8,
36577    a: __m128d,
36578    b: __m128d,
36579) -> __m128d {
36580    static_assert_rounding!(ROUNDING);
36581    let a = a.as_f64x2();
36582    let b = b.as_f64x2();
36583    let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
36584    transmute(r)
36585}
36586
36587/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36588///
36589/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36590/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36591/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36592/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36593/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36594/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36595///
36596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
36597#[inline]
36598#[target_feature(enable = "avx512f")]
36599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36600#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
36601#[rustc_legacy_const_generics(2)]
36602pub unsafe fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36603    static_assert_rounding!(ROUNDING);
36604    let a = a.as_f32x4();
36605    let b = b.as_f32x4();
36606    let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
36607    transmute(r)
36608}
36609
36610/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36611///
36612/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36613/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36614/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36615/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36616/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36617/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36618///
36619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
36620#[inline]
36621#[target_feature(enable = "avx512f")]
36622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36623#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
36624#[rustc_legacy_const_generics(4)]
36625pub unsafe fn _mm_mask_div_round_ss<const ROUNDING: i32>(
36626    src: __m128,
36627    k: __mmask8,
36628    a: __m128,
36629    b: __m128,
36630) -> __m128 {
36631    static_assert_rounding!(ROUNDING);
36632    let a = a.as_f32x4();
36633    let b = b.as_f32x4();
36634    let src = src.as_f32x4();
36635    let r = vdivss(a, b, src, k, ROUNDING);
36636    transmute(r)
36637}
36638
36639/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36640///
36641/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36642/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36643/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36644/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36645/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36646/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36647///
36648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
36649#[inline]
36650#[target_feature(enable = "avx512f")]
36651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36652#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
36653#[rustc_legacy_const_generics(3)]
36654pub unsafe fn _mm_maskz_div_round_ss<const ROUNDING: i32>(
36655    k: __mmask8,
36656    a: __m128,
36657    b: __m128,
36658) -> __m128 {
36659    static_assert_rounding!(ROUNDING);
36660    let a = a.as_f32x4();
36661    let b = b.as_f32x4();
36662    let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
36663    transmute(r)
36664}
36665
36666/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36667///
36668/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36669/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36670/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36671/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36672/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36673/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36674///
36675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
36676#[inline]
36677#[target_feature(enable = "avx512f")]
36678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36679#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
36680#[rustc_legacy_const_generics(2)]
36681pub unsafe fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
36682    static_assert_rounding!(ROUNDING);
36683    let a = a.as_f64x2();
36684    let b = b.as_f64x2();
36685    let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
36686    transmute(r)
36687}
36688
36689/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36690///
36691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36697///
36698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
36699#[inline]
36700#[target_feature(enable = "avx512f")]
36701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36702#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
36703#[rustc_legacy_const_generics(4)]
36704pub unsafe fn _mm_mask_div_round_sd<const ROUNDING: i32>(
36705    src: __m128d,
36706    k: __mmask8,
36707    a: __m128d,
36708    b: __m128d,
36709) -> __m128d {
36710    static_assert_rounding!(ROUNDING);
36711    let a = a.as_f64x2();
36712    let b = b.as_f64x2();
36713    let src = src.as_f64x2();
36714    let r = vdivsd(a, b, src, k, ROUNDING);
36715    transmute(r)
36716}
36717
36718/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36719///
36720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36726///
36727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
36728#[inline]
36729#[target_feature(enable = "avx512f")]
36730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36731#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
36732#[rustc_legacy_const_generics(3)]
36733pub unsafe fn _mm_maskz_div_round_sd<const ROUNDING: i32>(
36734    k: __mmask8,
36735    a: __m128d,
36736    b: __m128d,
36737) -> __m128d {
36738    static_assert_rounding!(ROUNDING);
36739    let a = a.as_f64x2();
36740    let b = b.as_f64x2();
36741    let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
36742    transmute(r)
36743}
36744
36745/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36746/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36747///
36748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
36749#[inline]
36750#[target_feature(enable = "avx512f")]
36751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36752#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
36753#[rustc_legacy_const_generics(2)]
36754pub unsafe fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
36755    static_assert_sae!(SAE);
36756    let a = a.as_f32x4();
36757    let b = b.as_f32x4();
36758    let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
36759    transmute(r)
36760}
36761
36762/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36763/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36764///
36765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
36766#[inline]
36767#[target_feature(enable = "avx512f")]
36768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36769#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
36770#[rustc_legacy_const_generics(4)]
36771pub unsafe fn _mm_mask_max_round_ss<const SAE: i32>(
36772    src: __m128,
36773    k: __mmask8,
36774    a: __m128,
36775    b: __m128,
36776) -> __m128 {
36777    static_assert_sae!(SAE);
36778    let a = a.as_f32x4();
36779    let b = b.as_f32x4();
36780    let src = src.as_f32x4();
36781    let r = vmaxss(a, b, src, k, SAE);
36782    transmute(r)
36783}
36784
36785/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36786/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36787///
36788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
36789#[inline]
36790#[target_feature(enable = "avx512f")]
36791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36792#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
36793#[rustc_legacy_const_generics(3)]
36794pub unsafe fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36795    static_assert_sae!(SAE);
36796    let a = a.as_f32x4();
36797    let b = b.as_f32x4();
36798    let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
36799    transmute(r)
36800}
36801
36802/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
36803/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36804///
36805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
36806#[inline]
36807#[target_feature(enable = "avx512f")]
36808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36809#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
36810#[rustc_legacy_const_generics(2)]
36811pub unsafe fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
36812    static_assert_sae!(SAE);
36813    let a = a.as_f64x2();
36814    let b = b.as_f64x2();
36815    let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
36816    transmute(r)
36817}
36818
36819/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36820/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36821///
36822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
36823#[inline]
36824#[target_feature(enable = "avx512f")]
36825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36826#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
36827#[rustc_legacy_const_generics(4)]
36828pub unsafe fn _mm_mask_max_round_sd<const SAE: i32>(
36829    src: __m128d,
36830    k: __mmask8,
36831    a: __m128d,
36832    b: __m128d,
36833) -> __m128d {
36834    static_assert_sae!(SAE);
36835    let a = a.as_f64x2();
36836    let b = b.as_f64x2();
36837    let src = src.as_f64x2();
36838    let r = vmaxsd(a, b, src, k, SAE);
36839    transmute(r)
36840}
36841
36842/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36843/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36844///
36845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
36846#[inline]
36847#[target_feature(enable = "avx512f")]
36848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36849#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
36850#[rustc_legacy_const_generics(3)]
36851pub unsafe fn _mm_maskz_max_round_sd<const SAE: i32>(
36852    k: __mmask8,
36853    a: __m128d,
36854    b: __m128d,
36855) -> __m128d {
36856    static_assert_sae!(SAE);
36857    let a = a.as_f64x2();
36858    let b = b.as_f64x2();
36859    let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
36860    transmute(r)
36861}
36862
36863/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36864/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36865///
36866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
36867#[inline]
36868#[target_feature(enable = "avx512f")]
36869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36870#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
36871#[rustc_legacy_const_generics(2)]
36872pub unsafe fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
36873    static_assert_sae!(SAE);
36874    let a = a.as_f32x4();
36875    let b = b.as_f32x4();
36876    let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
36877    transmute(r)
36878}
36879
36880/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36881/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36882///
36883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
36884#[inline]
36885#[target_feature(enable = "avx512f")]
36886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36887#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
36888#[rustc_legacy_const_generics(4)]
36889pub unsafe fn _mm_mask_min_round_ss<const SAE: i32>(
36890    src: __m128,
36891    k: __mmask8,
36892    a: __m128,
36893    b: __m128,
36894) -> __m128 {
36895    static_assert_sae!(SAE);
36896    let a = a.as_f32x4();
36897    let b = b.as_f32x4();
36898    let src = src.as_f32x4();
36899    let r = vminss(a, b, src, k, SAE);
36900    transmute(r)
36901}
36902
36903/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
36904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36905///
36906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
36907#[inline]
36908#[target_feature(enable = "avx512f")]
36909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36910#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
36911#[rustc_legacy_const_generics(3)]
36912pub unsafe fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36913    static_assert_sae!(SAE);
36914    let a = a.as_f32x4();
36915    let b = b.as_f32x4();
36916    let r = vminss(a, b, f32x4::ZERO, k, SAE);
36917    transmute(r)
36918}
36919
36920/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
36921/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
36924#[inline]
36925#[target_feature(enable = "avx512f")]
36926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36927#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
36928#[rustc_legacy_const_generics(2)]
36929pub unsafe fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
36930    static_assert_sae!(SAE);
36931    let a = a.as_f64x2();
36932    let b = b.as_f64x2();
36933    let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
36934    transmute(r)
36935}
36936
36937/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36938/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36939///
36940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
36941#[inline]
36942#[target_feature(enable = "avx512f")]
36943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36944#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
36945#[rustc_legacy_const_generics(4)]
36946pub unsafe fn _mm_mask_min_round_sd<const SAE: i32>(
36947    src: __m128d,
36948    k: __mmask8,
36949    a: __m128d,
36950    b: __m128d,
36951) -> __m128d {
36952    static_assert_sae!(SAE);
36953    let a = a.as_f64x2();
36954    let b = b.as_f64x2();
36955    let src = src.as_f64x2();
36956    let r = vminsd(a, b, src, k, SAE);
36957    transmute(r)
36958}
36959
36960/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
36961/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36962///
36963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
36964#[inline]
36965#[target_feature(enable = "avx512f")]
36966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36967#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
36968#[rustc_legacy_const_generics(3)]
36969pub unsafe fn _mm_maskz_min_round_sd<const SAE: i32>(
36970    k: __mmask8,
36971    a: __m128d,
36972    b: __m128d,
36973) -> __m128d {
36974    static_assert_sae!(SAE);
36975    let a = a.as_f64x2();
36976    let b = b.as_f64x2();
36977    let r = vminsd(a, b, f64x2::ZERO, k, SAE);
36978    transmute(r)
36979}
36980
36981/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
36982///
36983/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
36984/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
36985/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
36986/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
36987/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
36988/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
36989///
36990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
36991#[inline]
36992#[target_feature(enable = "avx512f")]
36993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36994#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
36995#[rustc_legacy_const_generics(2)]
36996pub unsafe fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
36997    static_assert_rounding!(ROUNDING);
36998    vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
36999}
37000
37001/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37002///
37003/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37004/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37005/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37006/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37007/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37008/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37009///
37010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
37011#[inline]
37012#[target_feature(enable = "avx512f")]
37013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37014#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
37015#[rustc_legacy_const_generics(4)]
37016pub unsafe fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
37017    src: __m128,
37018    k: __mmask8,
37019    a: __m128,
37020    b: __m128,
37021) -> __m128 {
37022    static_assert_rounding!(ROUNDING);
37023    vsqrtss(a, b, src, k, ROUNDING)
37024}
37025
37026/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37027///
37028/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37029/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37030/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37031/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37032/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37033/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37034///
37035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
37036#[inline]
37037#[target_feature(enable = "avx512f")]
37038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37039#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
37040#[rustc_legacy_const_generics(3)]
37041pub unsafe fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(
37042    k: __mmask8,
37043    a: __m128,
37044    b: __m128,
37045) -> __m128 {
37046    static_assert_rounding!(ROUNDING);
37047    vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
37048}
37049
37050/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37051///
37052/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37053/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37054/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37055/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37056/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37057/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37058///
37059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
37060#[inline]
37061#[target_feature(enable = "avx512f")]
37062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37063#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
37064#[rustc_legacy_const_generics(2)]
37065pub unsafe fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
37066    static_assert_rounding!(ROUNDING);
37067    vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
37068}
37069
37070/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37071///
37072/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37073/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37074/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37075/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37076/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37078///
37079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
37080#[inline]
37081#[target_feature(enable = "avx512f")]
37082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37083#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
37084#[rustc_legacy_const_generics(4)]
37085pub unsafe fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
37086    src: __m128d,
37087    k: __mmask8,
37088    a: __m128d,
37089    b: __m128d,
37090) -> __m128d {
37091    static_assert_rounding!(ROUNDING);
37092    vsqrtsd(a, b, src, k, ROUNDING)
37093}
37094
37095/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37096///
37097/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37103///
37104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
37105#[inline]
37106#[target_feature(enable = "avx512f")]
37107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37108#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
37109#[rustc_legacy_const_generics(3)]
37110pub unsafe fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
37111    k: __mmask8,
37112    a: __m128d,
37113    b: __m128d,
37114) -> __m128d {
37115    static_assert_rounding!(ROUNDING);
37116    vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
37117}
37118
37119/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37120/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37121///
37122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
37123#[inline]
37124#[target_feature(enable = "avx512f")]
37125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37126#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
37127#[rustc_legacy_const_generics(2)]
37128pub unsafe fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
37129    static_assert_sae!(SAE);
37130    let a = a.as_f32x4();
37131    let b = b.as_f32x4();
37132    let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
37133    transmute(r)
37134}
37135
37136/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37137/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37138///
37139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
37140#[inline]
37141#[target_feature(enable = "avx512f")]
37142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37143#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
37144#[rustc_legacy_const_generics(4)]
37145pub unsafe fn _mm_mask_getexp_round_ss<const SAE: i32>(
37146    src: __m128,
37147    k: __mmask8,
37148    a: __m128,
37149    b: __m128,
37150) -> __m128 {
37151    static_assert_sae!(SAE);
37152    let a = a.as_f32x4();
37153    let b = b.as_f32x4();
37154    let src = src.as_f32x4();
37155    let r = vgetexpss(a, b, src, k, SAE);
37156    transmute(r)
37157}
37158
37159/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37160/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37161///
37162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
37163#[inline]
37164#[target_feature(enable = "avx512f")]
37165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37166#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
37167#[rustc_legacy_const_generics(3)]
37168pub unsafe fn _mm_maskz_getexp_round_ss<const SAE: i32>(
37169    k: __mmask8,
37170    a: __m128,
37171    b: __m128,
37172) -> __m128 {
37173    static_assert_sae!(SAE);
37174    let a = a.as_f32x4();
37175    let b = b.as_f32x4();
37176    let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
37177    transmute(r)
37178}
37179
37180/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37181/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37182///
37183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
37184#[inline]
37185#[target_feature(enable = "avx512f")]
37186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37187#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
37188#[rustc_legacy_const_generics(2)]
37189pub unsafe fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
37190    static_assert_sae!(SAE);
37191    let a = a.as_f64x2();
37192    let b = b.as_f64x2();
37193    let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
37194    transmute(r)
37195}
37196
37197/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37198/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37199///
37200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
37201#[inline]
37202#[target_feature(enable = "avx512f")]
37203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37204#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
37205#[rustc_legacy_const_generics(4)]
37206pub unsafe fn _mm_mask_getexp_round_sd<const SAE: i32>(
37207    src: __m128d,
37208    k: __mmask8,
37209    a: __m128d,
37210    b: __m128d,
37211) -> __m128d {
37212    static_assert_sae!(SAE);
37213    let a = a.as_f64x2();
37214    let b = b.as_f64x2();
37215    let src = src.as_f64x2();
37216    let r = vgetexpsd(a, b, src, k, SAE);
37217    transmute(r)
37218}
37219
37220/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
37221/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37222///
37223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
37224#[inline]
37225#[target_feature(enable = "avx512f")]
37226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37227#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
37228#[rustc_legacy_const_generics(3)]
37229pub unsafe fn _mm_maskz_getexp_round_sd<const SAE: i32>(
37230    k: __mmask8,
37231    a: __m128d,
37232    b: __m128d,
37233) -> __m128d {
37234    static_assert_sae!(SAE);
37235    let a = a.as_f64x2();
37236    let b = b.as_f64x2();
37237    let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
37238    transmute(r)
37239}
37240
37241/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37242/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37243///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37244///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37245///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37246///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37247/// The sign is determined by sc which can take the following values:\
37248///    _MM_MANT_SIGN_src     // sign = sign(src)\
37249///    _MM_MANT_SIGN_zero    // sign = 0\
37250///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37251/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37252///
37253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
37254#[inline]
37255#[target_feature(enable = "avx512f")]
37256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37257#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
37258#[rustc_legacy_const_generics(2, 3, 4)]
37259pub unsafe fn _mm_getmant_round_ss<
37260    const NORM: _MM_MANTISSA_NORM_ENUM,
37261    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37262    const SAE: i32,
37263>(
37264    a: __m128,
37265    b: __m128,
37266) -> __m128 {
37267    static_assert_uimm_bits!(NORM, 4);
37268    static_assert_uimm_bits!(SIGN, 2);
37269    static_assert_mantissas_sae!(SAE);
37270    let a = a.as_f32x4();
37271    let b = b.as_f32x4();
37272    let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
37273    transmute(r)
37274}
37275
37276/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37277/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37278///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37279///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37280///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37281///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37282/// The sign is determined by sc which can take the following values:\
37283///    _MM_MANT_SIGN_src     // sign = sign(src)\
37284///    _MM_MANT_SIGN_zero    // sign = 0\
37285///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37286/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37287///
37288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
37289#[inline]
37290#[target_feature(enable = "avx512f")]
37291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37292#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
37293#[rustc_legacy_const_generics(4, 5, 6)]
37294pub unsafe fn _mm_mask_getmant_round_ss<
37295    const NORM: _MM_MANTISSA_NORM_ENUM,
37296    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37297    const SAE: i32,
37298>(
37299    src: __m128,
37300    k: __mmask8,
37301    a: __m128,
37302    b: __m128,
37303) -> __m128 {
37304    static_assert_uimm_bits!(NORM, 4);
37305    static_assert_uimm_bits!(SIGN, 2);
37306    static_assert_mantissas_sae!(SAE);
37307    let a = a.as_f32x4();
37308    let b = b.as_f32x4();
37309    let src = src.as_f32x4();
37310    let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
37311    transmute(r)
37312}
37313
37314/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37315/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37316///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37317///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37318///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37319///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37320/// The sign is determined by sc which can take the following values:\
37321///    _MM_MANT_SIGN_src     // sign = sign(src)\
37322///    _MM_MANT_SIGN_zero    // sign = 0\
37323///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37324/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37325///
37326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
37327#[inline]
37328#[target_feature(enable = "avx512f")]
37329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37330#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
37331#[rustc_legacy_const_generics(3, 4, 5)]
37332pub unsafe fn _mm_maskz_getmant_round_ss<
37333    const NORM: _MM_MANTISSA_NORM_ENUM,
37334    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37335    const SAE: i32,
37336>(
37337    k: __mmask8,
37338    a: __m128,
37339    b: __m128,
37340) -> __m128 {
37341    static_assert_uimm_bits!(NORM, 4);
37342    static_assert_uimm_bits!(SIGN, 2);
37343    static_assert_mantissas_sae!(SAE);
37344    let a = a.as_f32x4();
37345    let b = b.as_f32x4();
37346    let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
37347    transmute(r)
37348}
37349
37350/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37351/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37352///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37353///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37354///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37355///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37356/// The sign is determined by sc which can take the following values:\
37357///    _MM_MANT_SIGN_src     // sign = sign(src)\
37358///    _MM_MANT_SIGN_zero    // sign = 0\
37359///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37360/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37361///
37362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
37363#[inline]
37364#[target_feature(enable = "avx512f")]
37365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37366#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
37367#[rustc_legacy_const_generics(2, 3, 4)]
37368pub unsafe fn _mm_getmant_round_sd<
37369    const NORM: _MM_MANTISSA_NORM_ENUM,
37370    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37371    const SAE: i32,
37372>(
37373    a: __m128d,
37374    b: __m128d,
37375) -> __m128d {
37376    static_assert_uimm_bits!(NORM, 4);
37377    static_assert_uimm_bits!(SIGN, 2);
37378    static_assert_mantissas_sae!(SAE);
37379    let a = a.as_f64x2();
37380    let b = b.as_f64x2();
37381    let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
37382    transmute(r)
37383}
37384
37385/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37386/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37387///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37388///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37389///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37390///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37391/// The sign is determined by sc which can take the following values:\
37392///    _MM_MANT_SIGN_src     // sign = sign(src)\
37393///    _MM_MANT_SIGN_zero    // sign = 0\
37394///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37395/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37396///
37397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
37398#[inline]
37399#[target_feature(enable = "avx512f")]
37400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37401#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
37402#[rustc_legacy_const_generics(4, 5, 6)]
37403pub unsafe fn _mm_mask_getmant_round_sd<
37404    const NORM: _MM_MANTISSA_NORM_ENUM,
37405    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37406    const SAE: i32,
37407>(
37408    src: __m128d,
37409    k: __mmask8,
37410    a: __m128d,
37411    b: __m128d,
37412) -> __m128d {
37413    static_assert_uimm_bits!(NORM, 4);
37414    static_assert_uimm_bits!(SIGN, 2);
37415    static_assert_mantissas_sae!(SAE);
37416    let a = a.as_f64x2();
37417    let b = b.as_f64x2();
37418    let src = src.as_f64x2();
37419    let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
37420    transmute(r)
37421}
37422
37423/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37424/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37425///    _MM_MANT_NORM_1_2     // interval [1, 2)\
37426///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
37427///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
37428///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37429/// The sign is determined by sc which can take the following values:\
37430///    _MM_MANT_SIGN_src     // sign = sign(src)\
37431///    _MM_MANT_SIGN_zero    // sign = 0\
37432///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
37433/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37434///
37435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
37436#[inline]
37437#[target_feature(enable = "avx512f")]
37438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37439#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
37440#[rustc_legacy_const_generics(3, 4, 5)]
37441pub unsafe fn _mm_maskz_getmant_round_sd<
37442    const NORM: _MM_MANTISSA_NORM_ENUM,
37443    const SIGN: _MM_MANTISSA_SIGN_ENUM,
37444    const SAE: i32,
37445>(
37446    k: __mmask8,
37447    a: __m128d,
37448    b: __m128d,
37449) -> __m128d {
37450    static_assert_uimm_bits!(NORM, 4);
37451    static_assert_uimm_bits!(SIGN, 2);
37452    static_assert_mantissas_sae!(SAE);
37453    let a = a.as_f64x2();
37454    let b = b.as_f64x2();
37455    let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
37456    transmute(r)
37457}
37458
37459/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37460/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37461/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37462/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37463/// * [`_MM_FROUND_TO_POS_INF`] : round up
37464/// * [`_MM_FROUND_TO_ZERO`] : truncate
37465/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37466///
37467/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
37469#[inline]
37470#[target_feature(enable = "avx512f")]
37471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37472#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
37473#[rustc_legacy_const_generics(2, 3)]
37474pub unsafe fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37475    a: __m128,
37476    b: __m128,
37477) -> __m128 {
37478    static_assert_uimm_bits!(IMM8, 8);
37479    static_assert_mantissas_sae!(SAE);
37480    let a = a.as_f32x4();
37481    let b = b.as_f32x4();
37482    let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
37483    transmute(r)
37484}
37485
37486/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37487/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37488/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37489/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37490/// * [`_MM_FROUND_TO_POS_INF`] : round up
37491/// * [`_MM_FROUND_TO_ZERO`] : truncate
37492/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37493///
37494/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
37496#[inline]
37497#[target_feature(enable = "avx512f")]
37498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37499#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
37500#[rustc_legacy_const_generics(4, 5)]
37501pub unsafe fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37502    src: __m128,
37503    k: __mmask8,
37504    a: __m128,
37505    b: __m128,
37506) -> __m128 {
37507    static_assert_uimm_bits!(IMM8, 8);
37508    static_assert_mantissas_sae!(SAE);
37509    let a = a.as_f32x4();
37510    let b = b.as_f32x4();
37511    let src = src.as_f32x4();
37512    let r = vrndscaless(a, b, src, k, IMM8, SAE);
37513    transmute(r)
37514}
37515
37516/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37517/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37518/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37519/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37520/// * [`_MM_FROUND_TO_POS_INF`] : round up
37521/// * [`_MM_FROUND_TO_ZERO`] : truncate
37522/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37523///
37524/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
37526#[inline]
37527#[target_feature(enable = "avx512f")]
37528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37529#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
37530#[rustc_legacy_const_generics(3, 4)]
37531pub unsafe fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
37532    k: __mmask8,
37533    a: __m128,
37534    b: __m128,
37535) -> __m128 {
37536    static_assert_uimm_bits!(IMM8, 8);
37537    static_assert_mantissas_sae!(SAE);
37538    let a = a.as_f32x4();
37539    let b = b.as_f32x4();
37540    let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
37541    transmute(r)
37542}
37543
37544/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37545/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37546/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37547/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37548/// * [`_MM_FROUND_TO_POS_INF`] : round up
37549/// * [`_MM_FROUND_TO_ZERO`] : truncate
37550/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37551///
37552/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
37554#[inline]
37555#[target_feature(enable = "avx512f")]
37556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37557#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
37558#[rustc_legacy_const_generics(2, 3)]
37559pub unsafe fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37560    a: __m128d,
37561    b: __m128d,
37562) -> __m128d {
37563    static_assert_uimm_bits!(IMM8, 8);
37564    static_assert_mantissas_sae!(SAE);
37565    let a = a.as_f64x2();
37566    let b = b.as_f64x2();
37567    let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
37568    transmute(r)
37569}
37570
37571/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37572/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37573/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37574/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37575/// * [`_MM_FROUND_TO_POS_INF`] : round up
37576/// * [`_MM_FROUND_TO_ZERO`] : truncate
37577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37578///
37579/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
37581#[inline]
37582#[target_feature(enable = "avx512f")]
37583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37584#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
37585#[rustc_legacy_const_generics(4, 5)]
37586pub unsafe fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37587    src: __m128d,
37588    k: __mmask8,
37589    a: __m128d,
37590    b: __m128d,
37591) -> __m128d {
37592    static_assert_uimm_bits!(IMM8, 8);
37593    static_assert_mantissas_sae!(SAE);
37594    let a = a.as_f64x2();
37595    let b = b.as_f64x2();
37596    let src = src.as_f64x2();
37597    let r = vrndscalesd(a, b, src, k, IMM8, SAE);
37598    transmute(r)
37599}
37600
37601/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37602/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37603/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37604/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37605/// * [`_MM_FROUND_TO_POS_INF`] : round up
37606/// * [`_MM_FROUND_TO_ZERO`] : truncate
37607/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37608///
37609/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
37611#[inline]
37612#[target_feature(enable = "avx512f")]
37613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37614#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
37615#[rustc_legacy_const_generics(3, 4)]
37616pub unsafe fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
37617    k: __mmask8,
37618    a: __m128d,
37619    b: __m128d,
37620) -> __m128d {
37621    static_assert_uimm_bits!(IMM8, 8);
37622    static_assert_mantissas_sae!(SAE);
37623    let a = a.as_f64x2();
37624    let b = b.as_f64x2();
37625    let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
37626    transmute(r)
37627}
37628
37629/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37630///
37631/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37632/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37633/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37634/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37635/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37636/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37637///
37638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
37639#[inline]
37640#[target_feature(enable = "avx512f")]
37641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37642#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
37643#[rustc_legacy_const_generics(2)]
37644pub unsafe fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37645    static_assert_rounding!(ROUNDING);
37646    let a = a.as_f32x4();
37647    let b = b.as_f32x4();
37648    let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
37649    transmute(r)
37650}
37651
37652/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37653///
37654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37660///
37661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
37662#[inline]
37663#[target_feature(enable = "avx512f")]
37664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37665#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
37666#[rustc_legacy_const_generics(4)]
37667pub unsafe fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
37668    src: __m128,
37669    k: __mmask8,
37670    a: __m128,
37671    b: __m128,
37672) -> __m128 {
37673    static_assert_rounding!(ROUNDING);
37674    let a = a.as_f32x4();
37675    let b = b.as_f32x4();
37676    let src = src.as_f32x4();
37677    let r = vscalefss(a, b, src, k, ROUNDING);
37678    transmute(r)
37679}
37680
37681/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37682///
37683/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37684/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37685/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37686/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37687/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37688/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37689///
37690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
37691#[inline]
37692#[target_feature(enable = "avx512f")]
37693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37694#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
37695#[rustc_legacy_const_generics(3)]
37696pub unsafe fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(
37697    k: __mmask8,
37698    a: __m128,
37699    b: __m128,
37700) -> __m128 {
37701    static_assert_rounding!(ROUNDING);
37702    let a = a.as_f32x4();
37703    let b = b.as_f32x4();
37704    let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
37705    transmute(r)
37706}
37707
37708/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37709///
37710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37716///
37717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
37718#[inline]
37719#[target_feature(enable = "avx512f")]
37720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37721#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
37722#[rustc_legacy_const_generics(2)]
37723pub unsafe fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
37724    static_assert_rounding!(ROUNDING);
37725    let a = a.as_f64x2();
37726    let b = b.as_f64x2();
37727    let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
37728    transmute(r)
37729}
37730
37731/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37732///
37733/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37734/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37735/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37736/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37737/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37738/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37739///
37740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
37741#[inline]
37742#[target_feature(enable = "avx512f")]
37743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37744#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
37745#[rustc_legacy_const_generics(4)]
37746pub unsafe fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
37747    src: __m128d,
37748    k: __mmask8,
37749    a: __m128d,
37750    b: __m128d,
37751) -> __m128d {
37752    let a = a.as_f64x2();
37753    let b = b.as_f64x2();
37754    let src = src.as_f64x2();
37755    let r = vscalefsd(a, b, src, k, ROUNDING);
37756    transmute(r)
37757}
37758
37759/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37760///
37761/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37762/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37763/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37764/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37765/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37767///
37768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
37769#[inline]
37770#[target_feature(enable = "avx512f")]
37771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37772#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
37773#[rustc_legacy_const_generics(3)]
37774pub unsafe fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
37775    k: __mmask8,
37776    a: __m128d,
37777    b: __m128d,
37778) -> __m128d {
37779    static_assert_rounding!(ROUNDING);
37780    let a = a.as_f64x2();
37781    let b = b.as_f64x2();
37782    let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
37783    transmute(r)
37784}
37785
37786/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37787///
37788/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37789/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37790/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37791/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37792/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37793/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37794///
37795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
37796#[inline]
37797#[target_feature(enable = "avx512f")]
37798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37799#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37800#[rustc_legacy_const_generics(3)]
37801pub unsafe fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
37802    static_assert_rounding!(ROUNDING);
37803    let extracta: f32 = simd_extract!(a, 0);
37804    let extractb: f32 = simd_extract!(b, 0);
37805    let extractc: f32 = simd_extract!(c, 0);
37806    let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
37807    simd_insert!(a, 0, r)
37808}
37809
37810/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37811///
37812/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37813/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37814/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37815/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37816/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37817/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37818///
37819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
37820#[inline]
37821#[target_feature(enable = "avx512f")]
37822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37823#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37824#[rustc_legacy_const_generics(4)]
37825pub unsafe fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
37826    a: __m128,
37827    k: __mmask8,
37828    b: __m128,
37829    c: __m128,
37830) -> __m128 {
37831    static_assert_rounding!(ROUNDING);
37832    let mut fmadd: f32 = simd_extract!(a, 0);
37833    if (k & 0b00000001) != 0 {
37834        let extractb: f32 = simd_extract!(b, 0);
37835        let extractc: f32 = simd_extract!(c, 0);
37836        fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
37837    }
37838    simd_insert!(a, 0, fmadd)
37839}
37840
37841/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37842///
37843/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37844/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37845/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37846/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37847/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37848/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37849///
37850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
37851#[inline]
37852#[target_feature(enable = "avx512f")]
37853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37854#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37855#[rustc_legacy_const_generics(4)]
37856pub unsafe fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
37857    k: __mmask8,
37858    a: __m128,
37859    b: __m128,
37860    c: __m128,
37861) -> __m128 {
37862    static_assert_rounding!(ROUNDING);
37863    let mut fmadd: f32 = 0.;
37864    if (k & 0b00000001) != 0 {
37865        let extracta: f32 = simd_extract!(a, 0);
37866        let extractb: f32 = simd_extract!(b, 0);
37867        let extractc: f32 = simd_extract!(c, 0);
37868        fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
37869    }
37870    simd_insert!(a, 0, fmadd)
37871}
37872
37873/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
37874///
37875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37881///
37882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
37883#[inline]
37884#[target_feature(enable = "avx512f")]
37885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37886#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37887#[rustc_legacy_const_generics(4)]
37888pub unsafe fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
37889    a: __m128,
37890    b: __m128,
37891    c: __m128,
37892    k: __mmask8,
37893) -> __m128 {
37894    static_assert_rounding!(ROUNDING);
37895    let mut fmadd: f32 = simd_extract!(c, 0);
37896    if (k & 0b00000001) != 0 {
37897        let extracta: f32 = simd_extract!(a, 0);
37898        let extractb: f32 = simd_extract!(b, 0);
37899        fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
37900    }
37901    simd_insert!(c, 0, fmadd)
37902}
37903
37904/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37905///
37906/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37907/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37908/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37909/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37910/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37911/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37912///
37913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
37914#[inline]
37915#[target_feature(enable = "avx512f")]
37916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37917#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37918#[rustc_legacy_const_generics(3)]
37919pub unsafe fn _mm_fmadd_round_sd<const ROUNDING: i32>(
37920    a: __m128d,
37921    b: __m128d,
37922    c: __m128d,
37923) -> __m128d {
37924    static_assert_rounding!(ROUNDING);
37925    let extracta: f64 = simd_extract!(a, 0);
37926    let extractb: f64 = simd_extract!(b, 0);
37927    let extractc: f64 = simd_extract!(c, 0);
37928    let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
37929    simd_insert!(a, 0, fmadd)
37930}
37931
37932/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37933///
37934/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37935/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37936/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37937/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37938/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37939/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37940///
37941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
37942#[inline]
37943#[target_feature(enable = "avx512f")]
37944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37945#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37946#[rustc_legacy_const_generics(4)]
37947pub unsafe fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
37948    a: __m128d,
37949    k: __mmask8,
37950    b: __m128d,
37951    c: __m128d,
37952) -> __m128d {
37953    static_assert_rounding!(ROUNDING);
37954    let mut fmadd: f64 = simd_extract!(a, 0);
37955    if (k & 0b00000001) != 0 {
37956        let extractb: f64 = simd_extract!(b, 0);
37957        let extractc: f64 = simd_extract!(c, 0);
37958        fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
37959    }
37960    simd_insert!(a, 0, fmadd)
37961}
37962
37963/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37964///
37965/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37966/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37967/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37968/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37969/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37970/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37971///
37972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
37973#[inline]
37974#[target_feature(enable = "avx512f")]
37975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37976#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
37977#[rustc_legacy_const_generics(4)]
37978pub unsafe fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
37979    k: __mmask8,
37980    a: __m128d,
37981    b: __m128d,
37982    c: __m128d,
37983) -> __m128d {
37984    static_assert_rounding!(ROUNDING);
37985    let mut fmadd: f64 = 0.;
37986    if (k & 0b00000001) != 0 {
37987        let extracta: f64 = simd_extract!(a, 0);
37988        let extractb: f64 = simd_extract!(b, 0);
37989        let extractc: f64 = simd_extract!(c, 0);
37990        fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
37991    }
37992    simd_insert!(a, 0, fmadd)
37993}
37994
37995/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
37996///
37997/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37998/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37999/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38000/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38001/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38002/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38003///
38004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
38005#[inline]
38006#[target_feature(enable = "avx512f")]
38007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38008#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
38009#[rustc_legacy_const_generics(4)]
38010pub unsafe fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
38011    a: __m128d,
38012    b: __m128d,
38013    c: __m128d,
38014    k: __mmask8,
38015) -> __m128d {
38016    static_assert_rounding!(ROUNDING);
38017    let mut fmadd: f64 = simd_extract!(c, 0);
38018    if (k & 0b00000001) != 0 {
38019        let extracta: f64 = simd_extract!(a, 0);
38020        let extractb: f64 = simd_extract!(b, 0);
38021        fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
38022    }
38023    simd_insert!(c, 0, fmadd)
38024}
38025
38026/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38027///
38028/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38029/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38030/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38031/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38032/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38033/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38034///
38035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
38036#[inline]
38037#[target_feature(enable = "avx512f")]
38038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38039#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38040#[rustc_legacy_const_generics(3)]
38041pub unsafe fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38042    static_assert_rounding!(ROUNDING);
38043    let extracta: f32 = simd_extract!(a, 0);
38044    let extractb: f32 = simd_extract!(b, 0);
38045    let extractc: f32 = simd_extract!(c, 0);
38046    let extractc = -extractc;
38047    let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38048    simd_insert!(a, 0, fmsub)
38049}
38050
38051/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38052///
38053/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38054/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38055/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38056/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38057/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38058/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38059///
38060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
38061#[inline]
38062#[target_feature(enable = "avx512f")]
38063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38064#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38065#[rustc_legacy_const_generics(4)]
38066pub unsafe fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
38067    a: __m128,
38068    k: __mmask8,
38069    b: __m128,
38070    c: __m128,
38071) -> __m128 {
38072    static_assert_rounding!(ROUNDING);
38073    let mut fmsub: f32 = simd_extract!(a, 0);
38074    if (k & 0b00000001) != 0 {
38075        let extractb: f32 = simd_extract!(b, 0);
38076        let extractc: f32 = simd_extract!(c, 0);
38077        let extractc = -extractc;
38078        fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
38079    }
38080    simd_insert!(a, 0, fmsub)
38081}
38082
38083/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38084///
38085/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38091///
38092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
38093#[inline]
38094#[target_feature(enable = "avx512f")]
38095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38096#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38097#[rustc_legacy_const_generics(4)]
38098pub unsafe fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
38099    k: __mmask8,
38100    a: __m128,
38101    b: __m128,
38102    c: __m128,
38103) -> __m128 {
38104    static_assert_rounding!(ROUNDING);
38105    let mut fmsub: f32 = 0.;
38106    if (k & 0b00000001) != 0 {
38107        let extracta: f32 = simd_extract!(a, 0);
38108        let extractb: f32 = simd_extract!(b, 0);
38109        let extractc: f32 = simd_extract!(c, 0);
38110        let extractc = -extractc;
38111        fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38112    }
38113    simd_insert!(a, 0, fmsub)
38114}
38115
38116/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38117///
38118/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38119/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38120/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38121/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38122/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38123/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38124///
38125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
38126#[inline]
38127#[target_feature(enable = "avx512f")]
38128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38129#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38130#[rustc_legacy_const_generics(4)]
38131pub unsafe fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
38132    a: __m128,
38133    b: __m128,
38134    c: __m128,
38135    k: __mmask8,
38136) -> __m128 {
38137    static_assert_rounding!(ROUNDING);
38138    let mut fmsub: f32 = simd_extract!(c, 0);
38139    if (k & 0b00000001) != 0 {
38140        let extracta: f32 = simd_extract!(a, 0);
38141        let extractb: f32 = simd_extract!(b, 0);
38142        let extractc = -fmsub;
38143        fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38144    }
38145    simd_insert!(c, 0, fmsub)
38146}
38147
38148/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38149///
38150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38156///
38157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
38158#[inline]
38159#[target_feature(enable = "avx512f")]
38160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38161#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38162#[rustc_legacy_const_generics(3)]
38163pub unsafe fn _mm_fmsub_round_sd<const ROUNDING: i32>(
38164    a: __m128d,
38165    b: __m128d,
38166    c: __m128d,
38167) -> __m128d {
38168    static_assert_rounding!(ROUNDING);
38169    let extracta: f64 = simd_extract!(a, 0);
38170    let extractb: f64 = simd_extract!(b, 0);
38171    let extractc: f64 = simd_extract!(c, 0);
38172    let extractc = -extractc;
38173    let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38174    simd_insert!(a, 0, fmsub)
38175}
38176
38177/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38178///
38179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38185///
38186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
38187#[inline]
38188#[target_feature(enable = "avx512f")]
38189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38190#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38191#[rustc_legacy_const_generics(4)]
38192pub unsafe fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
38193    a: __m128d,
38194    k: __mmask8,
38195    b: __m128d,
38196    c: __m128d,
38197) -> __m128d {
38198    static_assert_rounding!(ROUNDING);
38199    let mut fmsub: f64 = simd_extract!(a, 0);
38200    if (k & 0b00000001) != 0 {
38201        let extractb: f64 = simd_extract!(b, 0);
38202        let extractc: f64 = simd_extract!(c, 0);
38203        let extractc = -extractc;
38204        fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
38205    }
38206    simd_insert!(a, 0, fmsub)
38207}
38208
38209/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38210///
38211/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38212/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38213/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38214/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38215/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38216/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38217///
38218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
38219#[inline]
38220#[target_feature(enable = "avx512f")]
38221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38222#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38223#[rustc_legacy_const_generics(4)]
38224pub unsafe fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
38225    k: __mmask8,
38226    a: __m128d,
38227    b: __m128d,
38228    c: __m128d,
38229) -> __m128d {
38230    static_assert_rounding!(ROUNDING);
38231    let mut fmsub: f64 = 0.;
38232    if (k & 0b00000001) != 0 {
38233        let extracta: f64 = simd_extract!(a, 0);
38234        let extractb: f64 = simd_extract!(b, 0);
38235        let extractc: f64 = simd_extract!(c, 0);
38236        let extractc = -extractc;
38237        fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38238    }
38239    simd_insert!(a, 0, fmsub)
38240}
38241
38242/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38243///
38244/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38245/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38246/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38247/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38248/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38249/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38250///
38251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
38252#[inline]
38253#[target_feature(enable = "avx512f")]
38254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38255#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
38256#[rustc_legacy_const_generics(4)]
38257pub unsafe fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
38258    a: __m128d,
38259    b: __m128d,
38260    c: __m128d,
38261    k: __mmask8,
38262) -> __m128d {
38263    static_assert_rounding!(ROUNDING);
38264    let mut fmsub: f64 = simd_extract!(c, 0);
38265    if (k & 0b00000001) != 0 {
38266        let extracta: f64 = simd_extract!(a, 0);
38267        let extractb: f64 = simd_extract!(b, 0);
38268        let extractc = -fmsub;
38269        fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38270    }
38271    simd_insert!(c, 0, fmsub)
38272}
38273
38274/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38275///
38276/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38277/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38278/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38279/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38280/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38281/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38282///
38283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
38284#[inline]
38285#[target_feature(enable = "avx512f")]
38286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38287#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38288#[rustc_legacy_const_generics(3)]
38289pub unsafe fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38290    static_assert_rounding!(ROUNDING);
38291    let extracta: f32 = simd_extract!(a, 0);
38292    let extracta = -extracta;
38293    let extractb: f32 = simd_extract!(b, 0);
38294    let extractc: f32 = simd_extract!(c, 0);
38295    let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38296    simd_insert!(a, 0, fnmadd)
38297}
38298
38299/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38300///
38301/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38302/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38303/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38304/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38305/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38306/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38307///
38308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
38309#[inline]
38310#[target_feature(enable = "avx512f")]
38311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38312#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38313#[rustc_legacy_const_generics(4)]
38314pub unsafe fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
38315    a: __m128,
38316    k: __mmask8,
38317    b: __m128,
38318    c: __m128,
38319) -> __m128 {
38320    static_assert_rounding!(ROUNDING);
38321    let mut fnmadd: f32 = simd_extract!(a, 0);
38322    if (k & 0b00000001) != 0 {
38323        let extracta = -fnmadd;
38324        let extractb: f32 = simd_extract!(b, 0);
38325        let extractc: f32 = simd_extract!(c, 0);
38326        fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38327    }
38328    simd_insert!(a, 0, fnmadd)
38329}
38330
38331/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38332///
38333/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38334/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38335/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38336/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38337/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38338/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38339///
38340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
38341#[inline]
38342#[target_feature(enable = "avx512f")]
38343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38344#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38345#[rustc_legacy_const_generics(4)]
38346pub unsafe fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
38347    k: __mmask8,
38348    a: __m128,
38349    b: __m128,
38350    c: __m128,
38351) -> __m128 {
38352    static_assert_rounding!(ROUNDING);
38353    let mut fnmadd: f32 = 0.;
38354    if (k & 0b00000001) != 0 {
38355        let extracta: f32 = simd_extract!(a, 0);
38356        let extracta = -extracta;
38357        let extractb: f32 = simd_extract!(b, 0);
38358        let extractc: f32 = simd_extract!(c, 0);
38359        fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38360    }
38361    simd_insert!(a, 0, fnmadd)
38362}
38363
38364/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38365///
38366/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38367/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38368/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38369/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38370/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38371/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38372///
38373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
38374#[inline]
38375#[target_feature(enable = "avx512f")]
38376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38377#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38378#[rustc_legacy_const_generics(4)]
38379pub unsafe fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
38380    a: __m128,
38381    b: __m128,
38382    c: __m128,
38383    k: __mmask8,
38384) -> __m128 {
38385    static_assert_rounding!(ROUNDING);
38386    let mut fnmadd: f32 = simd_extract!(c, 0);
38387    if (k & 0b00000001) != 0 {
38388        let extracta: f32 = simd_extract!(a, 0);
38389        let extracta = -extracta;
38390        let extractb: f32 = simd_extract!(b, 0);
38391        fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
38392    }
38393    simd_insert!(c, 0, fnmadd)
38394}
38395
38396/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38397///
38398/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38399/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38400/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38401/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38402/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38403/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38404///
38405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
38406#[inline]
38407#[target_feature(enable = "avx512f")]
38408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38409#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38410#[rustc_legacy_const_generics(3)]
38411pub unsafe fn _mm_fnmadd_round_sd<const ROUNDING: i32>(
38412    a: __m128d,
38413    b: __m128d,
38414    c: __m128d,
38415) -> __m128d {
38416    static_assert_rounding!(ROUNDING);
38417    let extracta: f64 = simd_extract!(a, 0);
38418    let extracta = -extracta;
38419    let extractb: f64 = simd_extract!(b, 0);
38420    let extractc: f64 = simd_extract!(c, 0);
38421    let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38422    simd_insert!(a, 0, fnmadd)
38423}
38424
38425/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38426///
38427/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38428/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38429/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38430/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38431/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38432/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38433///
38434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
38435#[inline]
38436#[target_feature(enable = "avx512f")]
38437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38438#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38439#[rustc_legacy_const_generics(4)]
38440pub unsafe fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
38441    a: __m128d,
38442    k: __mmask8,
38443    b: __m128d,
38444    c: __m128d,
38445) -> __m128d {
38446    static_assert_rounding!(ROUNDING);
38447    let mut fnmadd: f64 = simd_extract!(a, 0);
38448    if (k & 0b00000001) != 0 {
38449        let extracta = -fnmadd;
38450        let extractb: f64 = simd_extract!(b, 0);
38451        let extractc: f64 = simd_extract!(c, 0);
38452        fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38453    }
38454    simd_insert!(a, 0, fnmadd)
38455}
38456
38457/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38458///
38459/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38460/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38461/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38462/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38463/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38464/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38465///
38466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
38467#[inline]
38468#[target_feature(enable = "avx512f")]
38469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38470#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38471#[rustc_legacy_const_generics(4)]
38472pub unsafe fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
38473    k: __mmask8,
38474    a: __m128d,
38475    b: __m128d,
38476    c: __m128d,
38477) -> __m128d {
38478    static_assert_rounding!(ROUNDING);
38479    let mut fnmadd: f64 = 0.;
38480    if (k & 0b00000001) != 0 {
38481        let extracta: f64 = simd_extract!(a, 0);
38482        let extracta = -extracta;
38483        let extractb: f64 = simd_extract!(b, 0);
38484        let extractc: f64 = simd_extract!(c, 0);
38485        fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38486    }
38487    simd_insert!(a, 0, fnmadd)
38488}
38489
38490/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38491///
38492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38498///
38499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
38500#[inline]
38501#[target_feature(enable = "avx512f")]
38502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38503#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
38504#[rustc_legacy_const_generics(4)]
38505pub unsafe fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
38506    a: __m128d,
38507    b: __m128d,
38508    c: __m128d,
38509    k: __mmask8,
38510) -> __m128d {
38511    static_assert_rounding!(ROUNDING);
38512    let mut fnmadd: f64 = simd_extract!(c, 0);
38513    if (k & 0b00000001) != 0 {
38514        let extracta: f64 = simd_extract!(a, 0);
38515        let extracta = -extracta;
38516        let extractb: f64 = simd_extract!(b, 0);
38517        fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
38518    }
38519    simd_insert!(c, 0, fnmadd)
38520}
38521
38522/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38523///
38524/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38525/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38526/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38527/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38528/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38529/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38530///
38531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
38532#[inline]
38533#[target_feature(enable = "avx512f")]
38534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38535#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38536#[rustc_legacy_const_generics(3)]
38537pub unsafe fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
38538    static_assert_rounding!(ROUNDING);
38539    let extracta: f32 = simd_extract!(a, 0);
38540    let extracta = -extracta;
38541    let extractb: f32 = simd_extract!(b, 0);
38542    let extractc: f32 = simd_extract!(c, 0);
38543    let extractc = -extractc;
38544    let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38545    simd_insert!(a, 0, fnmsub)
38546}
38547
38548/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38549///
38550/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38551/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38552/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38553/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38554/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38555/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38556///
38557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
38558#[inline]
38559#[target_feature(enable = "avx512f")]
38560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38561#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38562#[rustc_legacy_const_generics(4)]
38563pub unsafe fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
38564    a: __m128,
38565    k: __mmask8,
38566    b: __m128,
38567    c: __m128,
38568) -> __m128 {
38569    static_assert_rounding!(ROUNDING);
38570    let mut fnmsub: f32 = simd_extract!(a, 0);
38571    if (k & 0b00000001) != 0 {
38572        let extracta = -fnmsub;
38573        let extractb: f32 = simd_extract!(b, 0);
38574        let extractc: f32 = simd_extract!(c, 0);
38575        let extractc = -extractc;
38576        fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38577    }
38578    simd_insert!(a, 0, fnmsub)
38579}
38580
38581/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38582///
38583/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38584/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38585/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38586/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38587/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38588/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38589///
38590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
38591#[inline]
38592#[target_feature(enable = "avx512f")]
38593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38594#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38595#[rustc_legacy_const_generics(4)]
38596pub unsafe fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
38597    k: __mmask8,
38598    a: __m128,
38599    b: __m128,
38600    c: __m128,
38601) -> __m128 {
38602    static_assert_rounding!(ROUNDING);
38603    let mut fnmsub: f32 = 0.;
38604    if (k & 0b00000001) != 0 {
38605        let extracta: f32 = simd_extract!(a, 0);
38606        let extracta = -extracta;
38607        let extractb: f32 = simd_extract!(b, 0);
38608        let extractc: f32 = simd_extract!(c, 0);
38609        let extractc = -extractc;
38610        fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38611    }
38612    simd_insert!(a, 0, fnmsub)
38613}
38614
38615/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
38616///
38617/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38623///
38624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
38625#[inline]
38626#[target_feature(enable = "avx512f")]
38627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38628#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38629#[rustc_legacy_const_generics(4)]
38630pub unsafe fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
38631    a: __m128,
38632    b: __m128,
38633    c: __m128,
38634    k: __mmask8,
38635) -> __m128 {
38636    static_assert_rounding!(ROUNDING);
38637    let mut fnmsub: f32 = simd_extract!(c, 0);
38638    if (k & 0b00000001) != 0 {
38639        let extracta: f32 = simd_extract!(a, 0);
38640        let extracta = -extracta;
38641        let extractb: f32 = simd_extract!(b, 0);
38642        let extractc = -fnmsub;
38643        fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
38644    }
38645    simd_insert!(c, 0, fnmsub)
38646}
38647
38648/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38649///
38650/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38651/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38652/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38653/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38654/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38655/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38656///
38657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
38658#[inline]
38659#[target_feature(enable = "avx512f")]
38660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38661#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38662#[rustc_legacy_const_generics(3)]
38663pub unsafe fn _mm_fnmsub_round_sd<const ROUNDING: i32>(
38664    a: __m128d,
38665    b: __m128d,
38666    c: __m128d,
38667) -> __m128d {
38668    static_assert_rounding!(ROUNDING);
38669    let extracta: f64 = simd_extract!(a, 0);
38670    let extracta = -extracta;
38671    let extractb: f64 = simd_extract!(b, 0);
38672    let extractc: f64 = simd_extract!(c, 0);
38673    let extractc = -extractc;
38674    let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38675    simd_insert!(a, 0, fnmsub)
38676}
38677
38678/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38679///
38680/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38681/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38682/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38683/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38684/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38685/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38686///
38687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
38688#[inline]
38689#[target_feature(enable = "avx512f")]
38690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38691#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38692#[rustc_legacy_const_generics(4)]
38693pub unsafe fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
38694    a: __m128d,
38695    k: __mmask8,
38696    b: __m128d,
38697    c: __m128d,
38698) -> __m128d {
38699    static_assert_rounding!(ROUNDING);
38700    let mut fnmsub: f64 = simd_extract!(a, 0);
38701    if (k & 0b00000001) != 0 {
38702        let extracta = -fnmsub;
38703        let extractb: f64 = simd_extract!(b, 0);
38704        let extractc: f64 = simd_extract!(c, 0);
38705        let extractc = -extractc;
38706        fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38707    }
38708    simd_insert!(a, 0, fnmsub)
38709}
38710
38711/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38712///
38713/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38714/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38715/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38716/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38717/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38718/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38719///
38720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
38721#[inline]
38722#[target_feature(enable = "avx512f")]
38723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38724#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38725#[rustc_legacy_const_generics(4)]
38726pub unsafe fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
38727    k: __mmask8,
38728    a: __m128d,
38729    b: __m128d,
38730    c: __m128d,
38731) -> __m128d {
38732    static_assert_rounding!(ROUNDING);
38733    let mut fnmsub: f64 = 0.;
38734    if (k & 0b00000001) != 0 {
38735        let extracta: f64 = simd_extract!(a, 0);
38736        let extracta = -extracta;
38737        let extractb: f64 = simd_extract!(b, 0);
38738        let extractc: f64 = simd_extract!(c, 0);
38739        let extractc = -extractc;
38740        fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38741    }
38742    simd_insert!(a, 0, fnmsub)
38743}
38744
38745/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
38746///
38747/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38748/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38749/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38750/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38751/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38752/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38753///
38754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
38755#[inline]
38756#[target_feature(enable = "avx512f")]
38757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38758#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
38759#[rustc_legacy_const_generics(4)]
38760pub unsafe fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
38761    a: __m128d,
38762    b: __m128d,
38763    c: __m128d,
38764    k: __mmask8,
38765) -> __m128d {
38766    static_assert_rounding!(ROUNDING);
38767    let mut fnmsub: f64 = simd_extract!(c, 0);
38768    if (k & 0b00000001) != 0 {
38769        let extracta: f64 = simd_extract!(a, 0);
38770        let extracta = -extracta;
38771        let extractb: f64 = simd_extract!(b, 0);
38772        let extractc = -fnmsub;
38773        fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
38774    }
38775    simd_insert!(c, 0, fnmsub)
38776}
38777
38778/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
38779///
38780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
38781#[inline]
38782#[target_feature(enable = "avx512f")]
38783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38784#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
38785#[rustc_legacy_const_generics(3)]
38786pub unsafe fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
38787    static_assert_uimm_bits!(IMM8, 8);
38788    let a = a.as_f32x4();
38789    let b = b.as_f32x4();
38790    let c = c.as_i32x4();
38791    let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
38792    let fixupimm: f32 = simd_extract!(r, 0);
38793    let r = simd_insert!(a, 0, fixupimm);
38794    transmute(r)
38795}
38796
38797/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
38798///
38799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
38800#[inline]
38801#[target_feature(enable = "avx512f")]
38802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38803#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
38804#[rustc_legacy_const_generics(4)]
38805pub unsafe fn _mm_mask_fixupimm_ss<const IMM8: i32>(
38806    a: __m128,
38807    k: __mmask8,
38808    b: __m128,
38809    c: __m128i,
38810) -> __m128 {
38811    static_assert_uimm_bits!(IMM8, 8);
38812    let a = a.as_f32x4();
38813    let b = b.as_f32x4();
38814    let c = c.as_i32x4();
38815    let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
38816    let fixupimm: f32 = simd_extract!(fixupimm, 0);
38817    let r = simd_insert!(a, 0, fixupimm);
38818    transmute(r)
38819}
38820
38821/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
38822///
38823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
38824#[inline]
38825#[target_feature(enable = "avx512f")]
38826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38827#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
38828#[rustc_legacy_const_generics(4)]
38829pub unsafe fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
38830    k: __mmask8,
38831    a: __m128,
38832    b: __m128,
38833    c: __m128i,
38834) -> __m128 {
38835    static_assert_uimm_bits!(IMM8, 8);
38836    let a = a.as_f32x4();
38837    let b = b.as_f32x4();
38838    let c = c.as_i32x4();
38839    let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
38840    let fixupimm: f32 = simd_extract!(fixupimm, 0);
38841    let r = simd_insert!(a, 0, fixupimm);
38842    transmute(r)
38843}
38844
38845/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
38846///
38847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
38848#[inline]
38849#[target_feature(enable = "avx512f")]
38850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38851#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
38852#[rustc_legacy_const_generics(3)]
38853pub unsafe fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
38854    static_assert_uimm_bits!(IMM8, 8);
38855    let a = a.as_f64x2();
38856    let b = b.as_f64x2();
38857    let c = c.as_i64x2();
38858    let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
38859    let fixupimm: f64 = simd_extract!(fixupimm, 0);
38860    let r = simd_insert!(a, 0, fixupimm);
38861    transmute(r)
38862}
38863
38864/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
38865///
38866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
38867#[inline]
38868#[target_feature(enable = "avx512f")]
38869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38870#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
38871#[rustc_legacy_const_generics(4)]
38872pub unsafe fn _mm_mask_fixupimm_sd<const IMM8: i32>(
38873    a: __m128d,
38874    k: __mmask8,
38875    b: __m128d,
38876    c: __m128i,
38877) -> __m128d {
38878    static_assert_uimm_bits!(IMM8, 8);
38879    let a = a.as_f64x2();
38880    let b = b.as_f64x2();
38881    let c = c.as_i64x2();
38882    let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
38883    let fixupimm: f64 = simd_extract!(fixupimm, 0);
38884    let r = simd_insert!(a, 0, fixupimm);
38885    transmute(r)
38886}
38887
38888/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
38889///
38890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
38891#[inline]
38892#[target_feature(enable = "avx512f")]
38893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38894#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
38895#[rustc_legacy_const_generics(4)]
38896pub unsafe fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
38897    k: __mmask8,
38898    a: __m128d,
38899    b: __m128d,
38900    c: __m128i,
38901) -> __m128d {
38902    static_assert_uimm_bits!(IMM8, 8);
38903    let a = a.as_f64x2();
38904    let b = b.as_f64x2();
38905    let c = c.as_i64x2();
38906    let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
38907    let fixupimm: f64 = simd_extract!(fixupimm, 0);
38908    let r = simd_insert!(a, 0, fixupimm);
38909    transmute(r)
38910}
38911
38912/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
38913/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38914///
38915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
38916#[inline]
38917#[target_feature(enable = "avx512f")]
38918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38919#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
38920#[rustc_legacy_const_generics(3, 4)]
38921pub unsafe fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
38922    a: __m128,
38923    b: __m128,
38924    c: __m128i,
38925) -> __m128 {
38926    static_assert_uimm_bits!(IMM8, 8);
38927    static_assert_mantissas_sae!(SAE);
38928    let a = a.as_f32x4();
38929    let b = b.as_f32x4();
38930    let c = c.as_i32x4();
38931    let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
38932    let fixupimm: f32 = simd_extract!(r, 0);
38933    let r = simd_insert!(a, 0, fixupimm);
38934    transmute(r)
38935}
38936
38937/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
38938/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38939///
38940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
38941#[inline]
38942#[target_feature(enable = "avx512f")]
38943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38944#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
38945#[rustc_legacy_const_generics(4, 5)]
38946pub unsafe fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
38947    a: __m128,
38948    k: __mmask8,
38949    b: __m128,
38950    c: __m128i,
38951) -> __m128 {
38952    static_assert_uimm_bits!(IMM8, 8);
38953    static_assert_mantissas_sae!(SAE);
38954    let a = a.as_f32x4();
38955    let b = b.as_f32x4();
38956    let c = c.as_i32x4();
38957    let r = vfixupimmss(a, b, c, IMM8, k, SAE);
38958    let fixupimm: f32 = simd_extract!(r, 0);
38959    let r = simd_insert!(a, 0, fixupimm);
38960    transmute(r)
38961}
38962
38963/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
38964/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38965///
38966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
38967#[inline]
38968#[target_feature(enable = "avx512f")]
38969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38970#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
38971#[rustc_legacy_const_generics(4, 5)]
38972pub unsafe fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
38973    k: __mmask8,
38974    a: __m128,
38975    b: __m128,
38976    c: __m128i,
38977) -> __m128 {
38978    static_assert_uimm_bits!(IMM8, 8);
38979    static_assert_mantissas_sae!(SAE);
38980    let a = a.as_f32x4();
38981    let b = b.as_f32x4();
38982    let c = c.as_i32x4();
38983    let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
38984    let fixupimm: f32 = simd_extract!(r, 0);
38985    let r = simd_insert!(a, 0, fixupimm);
38986    transmute(r)
38987}
38988
38989/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
38990/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38991///
38992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
38993#[inline]
38994#[target_feature(enable = "avx512f")]
38995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38996#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
38997#[rustc_legacy_const_generics(3, 4)]
38998pub unsafe fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
38999    a: __m128d,
39000    b: __m128d,
39001    c: __m128i,
39002) -> __m128d {
39003    static_assert_uimm_bits!(IMM8, 8);
39004    static_assert_mantissas_sae!(SAE);
39005    let a = a.as_f64x2();
39006    let b = b.as_f64x2();
39007    let c = c.as_i64x2();
39008    let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
39009    let fixupimm: f64 = simd_extract!(r, 0);
39010    let r = simd_insert!(a, 0, fixupimm);
39011    transmute(r)
39012}
39013
39014/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39015/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39016///
39017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
39018#[inline]
39019#[target_feature(enable = "avx512f")]
39020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39021#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
39022#[rustc_legacy_const_generics(4, 5)]
39023pub unsafe fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39024    a: __m128d,
39025    k: __mmask8,
39026    b: __m128d,
39027    c: __m128i,
39028) -> __m128d {
39029    static_assert_uimm_bits!(IMM8, 8);
39030    static_assert_mantissas_sae!(SAE);
39031    let a = a.as_f64x2();
39032    let b = b.as_f64x2();
39033    let c = c.as_i64x2();
39034    let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
39035    let fixupimm: f64 = simd_extract!(r, 0);
39036    let r = simd_insert!(a, 0, fixupimm);
39037    transmute(r)
39038}
39039
39040/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
39041/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39042///
39043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
39044#[inline]
39045#[target_feature(enable = "avx512f")]
39046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39047#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
39048#[rustc_legacy_const_generics(4, 5)]
39049pub unsafe fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
39050    k: __mmask8,
39051    a: __m128d,
39052    b: __m128d,
39053    c: __m128i,
39054) -> __m128d {
39055    static_assert_uimm_bits!(IMM8, 8);
39056    static_assert_mantissas_sae!(SAE);
39057    let a = a.as_f64x2();
39058    let b = b.as_f64x2();
39059    let c = c.as_i64x2();
39060    let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
39061    let fixupimm: f64 = simd_extract!(r, 0);
39062    let r = simd_insert!(a, 0, fixupimm);
39063    transmute(r)
39064}
39065
39066/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39067///
39068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
39069#[inline]
39070#[target_feature(enable = "avx512f")]
39071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39072#[cfg_attr(test, assert_instr(vcvtss2sd))]
39073pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
39074    transmute(vcvtss2sd(
39075        a.as_f64x2(),
39076        b.as_f32x4(),
39077        src.as_f64x2(),
39078        k,
39079        _MM_FROUND_CUR_DIRECTION,
39080    ))
39081}
39082
39083/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.    
39084///
39085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
39086#[inline]
39087#[target_feature(enable = "avx512f")]
39088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39089#[cfg_attr(test, assert_instr(vcvtss2sd))]
39090pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
39091    transmute(vcvtss2sd(
39092        a.as_f64x2(),
39093        b.as_f32x4(),
39094        f64x2::ZERO,
39095        k,
39096        _MM_FROUND_CUR_DIRECTION,
39097    ))
39098}
39099
39100/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39101///
39102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
39103#[inline]
39104#[target_feature(enable = "avx512f")]
39105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39106#[cfg_attr(test, assert_instr(vcvtsd2ss))]
39107pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
39108    transmute(vcvtsd2ss(
39109        a.as_f32x4(),
39110        b.as_f64x2(),
39111        src.as_f32x4(),
39112        k,
39113        _MM_FROUND_CUR_DIRECTION,
39114    ))
39115}
39116
39117/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39118///
39119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
39120#[inline]
39121#[target_feature(enable = "avx512f")]
39122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39123#[cfg_attr(test, assert_instr(vcvtsd2ss))]
39124pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
39125    transmute(vcvtsd2ss(
39126        a.as_f32x4(),
39127        b.as_f64x2(),
39128        f32x4::ZERO,
39129        k,
39130        _MM_FROUND_CUR_DIRECTION,
39131    ))
39132}
39133
39134/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39135/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39136///    
39137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
39138#[inline]
39139#[target_feature(enable = "avx512f")]
39140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39141#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
39142#[rustc_legacy_const_generics(2)]
39143pub unsafe fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
39144    static_assert_sae!(SAE);
39145    let a = a.as_f64x2();
39146    let b = b.as_f32x4();
39147    let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
39148    transmute(r)
39149}
39150
39151/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39152/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39153///    
39154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
39155#[inline]
39156#[target_feature(enable = "avx512f")]
39157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39158#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
39159#[rustc_legacy_const_generics(4)]
39160pub unsafe fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
39161    src: __m128d,
39162    k: __mmask8,
39163    a: __m128d,
39164    b: __m128,
39165) -> __m128d {
39166    static_assert_sae!(SAE);
39167    let a = a.as_f64x2();
39168    let b = b.as_f32x4();
39169    let src = src.as_f64x2();
39170    let r = vcvtss2sd(a, b, src, k, SAE);
39171    transmute(r)
39172}
39173
39174/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39175/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39176///    
39177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
39178#[inline]
39179#[target_feature(enable = "avx512f")]
39180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39181#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
39182#[rustc_legacy_const_generics(3)]
39183pub unsafe fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(
39184    k: __mmask8,
39185    a: __m128d,
39186    b: __m128,
39187) -> __m128d {
39188    static_assert_sae!(SAE);
39189    let a = a.as_f64x2();
39190    let b = b.as_f32x4();
39191    let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
39192    transmute(r)
39193}
39194
39195/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39196/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39197/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39198/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39199/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39200/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39201/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39202///
39203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
39204#[inline]
39205#[target_feature(enable = "avx512f")]
39206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39207#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
39208#[rustc_legacy_const_generics(2)]
39209pub unsafe fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
39210    static_assert_rounding!(ROUNDING);
39211    let a = a.as_f32x4();
39212    let b = b.as_f64x2();
39213    let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
39214    transmute(r)
39215}
39216
39217/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39224///
39225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
39226#[inline]
39227#[target_feature(enable = "avx512f")]
39228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39229#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
39230#[rustc_legacy_const_generics(4)]
39231pub unsafe fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
39232    src: __m128,
39233    k: __mmask8,
39234    a: __m128,
39235    b: __m128d,
39236) -> __m128 {
39237    static_assert_rounding!(ROUNDING);
39238    let a = a.as_f32x4();
39239    let b = b.as_f64x2();
39240    let src = src.as_f32x4();
39241    let r = vcvtsd2ss(a, b, src, k, ROUNDING);
39242    transmute(r)
39243}
39244
39245/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39252///
39253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
39254#[inline]
39255#[target_feature(enable = "avx512f")]
39256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39257#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
39258#[rustc_legacy_const_generics(3)]
39259pub unsafe fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(
39260    k: __mmask8,
39261    a: __m128,
39262    b: __m128d,
39263) -> __m128 {
39264    static_assert_rounding!(ROUNDING);
39265    let a = a.as_f32x4();
39266    let b = b.as_f64x2();
39267    let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
39268    transmute(r)
39269}
39270
39271/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39272/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39278///
39279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
39280#[inline]
39281#[target_feature(enable = "avx512f")]
39282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39283#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
39284#[rustc_legacy_const_generics(1)]
39285pub unsafe fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
39286    static_assert_rounding!(ROUNDING);
39287    let a = a.as_f32x4();
39288    vcvtss2si(a, ROUNDING)
39289}
39290
39291/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39292/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39293/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39294/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39295/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39296/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39297/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39298///
39299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
39300#[inline]
39301#[target_feature(enable = "avx512f")]
39302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39303#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
39304#[rustc_legacy_const_generics(1)]
39305pub unsafe fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
39306    static_assert_rounding!(ROUNDING);
39307    let a = a.as_f32x4();
39308    vcvtss2si(a, ROUNDING)
39309}
39310
39311/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
39312/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39313/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39314/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39315/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39316/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39317/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39318///
39319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
39320#[inline]
39321#[target_feature(enable = "avx512f")]
39322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39323#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
39324#[rustc_legacy_const_generics(1)]
39325pub unsafe fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
39326    static_assert_rounding!(ROUNDING);
39327    let a = a.as_f32x4();
39328    vcvtss2usi(a, ROUNDING)
39329}
39330
39331/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
39332///
39333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
39334#[inline]
39335#[target_feature(enable = "avx512f")]
39336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39337#[cfg_attr(test, assert_instr(vcvtss2si))]
39338pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
39339    vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39340}
39341
39342/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
39343///
39344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
39345#[inline]
39346#[target_feature(enable = "avx512f")]
39347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39348#[cfg_attr(test, assert_instr(vcvtss2usi))]
39349pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
39350    vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39351}
39352
39353/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39354/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39355/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39356/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39357/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39358/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39359/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39360///
39361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
39362#[inline]
39363#[target_feature(enable = "avx512f")]
39364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39365#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
39366#[rustc_legacy_const_generics(1)]
39367pub unsafe fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
39368    static_assert_rounding!(ROUNDING);
39369    let a = a.as_f64x2();
39370    vcvtsd2si(a, ROUNDING)
39371}
39372
39373/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
39374/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39375/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39376/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39377/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39378/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39379/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39380///
39381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
39382#[inline]
39383#[target_feature(enable = "avx512f")]
39384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39385#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
39386#[rustc_legacy_const_generics(1)]
39387pub unsafe fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
39388    static_assert_rounding!(ROUNDING);
39389    let a = a.as_f64x2();
39390    vcvtsd2si(a, ROUNDING)
39391}
39392
39393/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
39394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39400///
39401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
39402#[inline]
39403#[target_feature(enable = "avx512f")]
39404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39405#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
39406#[rustc_legacy_const_generics(1)]
39407pub unsafe fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
39408    static_assert_rounding!(ROUNDING);
39409    let a = a.as_f64x2();
39410    vcvtsd2usi(a, ROUNDING)
39411}
39412
39413/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
39414///
39415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
39416#[inline]
39417#[target_feature(enable = "avx512f")]
39418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39419#[cfg_attr(test, assert_instr(vcvtsd2si))]
39420pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
39421    vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39422}
39423
39424/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
39425///
39426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
39427#[inline]
39428#[target_feature(enable = "avx512f")]
39429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39430#[cfg_attr(test, assert_instr(vcvtsd2usi))]
39431pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
39432    vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39433}
39434
39435/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39436///
39437/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39438/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39439/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39440/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39441/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39442/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39443///
39444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
39445#[inline]
39446#[target_feature(enable = "avx512f")]
39447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39448#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
39449#[rustc_legacy_const_generics(2)]
39450pub unsafe fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
39451    static_assert_rounding!(ROUNDING);
39452    let a = a.as_f32x4();
39453    let r = vcvtsi2ss(a, b, ROUNDING);
39454    transmute(r)
39455}
39456
39457/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39458///
39459/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39460/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39461/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39462/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39463/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39464/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39465///
39466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
39467#[inline]
39468#[target_feature(enable = "avx512f")]
39469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39470#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
39471#[rustc_legacy_const_generics(2)]
39472pub unsafe fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
39473    static_assert_rounding!(ROUNDING);
39474    let a = a.as_f32x4();
39475    let r = vcvtsi2ss(a, b, ROUNDING);
39476    transmute(r)
39477}
39478
39479/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39480/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39481/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39482/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39483/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39484/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39485/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39486///
39487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
39488#[inline]
39489#[target_feature(enable = "avx512f")]
39490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39491#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
39492#[rustc_legacy_const_generics(2)]
39493pub unsafe fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
39494    static_assert_rounding!(ROUNDING);
39495    let a = a.as_f32x4();
39496    let r = vcvtusi2ss(a, b, ROUNDING);
39497    transmute(r)
39498}
39499
39500/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39501///
39502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
39503#[inline]
39504#[target_feature(enable = "avx512f")]
39505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39506#[cfg_attr(test, assert_instr(vcvtsi2ss))]
39507pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
39508    let b = b as f32;
39509    simd_insert!(a, 0, b)
39510}
39511
39512/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39513///
39514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
39515#[inline]
39516#[target_feature(enable = "avx512f")]
39517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39518#[cfg_attr(test, assert_instr(vcvtsi2sd))]
39519pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
39520    let b = b as f64;
39521    simd_insert!(a, 0, b)
39522}
39523
39524/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39526///
39527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
39528#[inline]
39529#[target_feature(enable = "avx512f")]
39530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39531#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
39532#[rustc_legacy_const_generics(1)]
39533pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
39534    static_assert_sae!(SAE);
39535    let a = a.as_f32x4();
39536    vcvttss2si(a, SAE)
39537}
39538
39539/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39540/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39541///
39542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
39543#[inline]
39544#[target_feature(enable = "avx512f")]
39545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39546#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
39547#[rustc_legacy_const_generics(1)]
39548pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
39549    static_assert_sae!(SAE);
39550    let a = a.as_f32x4();
39551    vcvttss2si(a, SAE)
39552}
39553
39554/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
39555/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39556///
39557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
39558#[inline]
39559#[target_feature(enable = "avx512f")]
39560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39561#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
39562#[rustc_legacy_const_generics(1)]
39563pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
39564    static_assert_sae!(SAE);
39565    let a = a.as_f32x4();
39566    vcvttss2usi(a, SAE)
39567}
39568
39569/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
39570///
39571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
39572#[inline]
39573#[target_feature(enable = "avx512f")]
39574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39575#[cfg_attr(test, assert_instr(vcvttss2si))]
39576pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
39577    vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39578}
39579
39580/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
39581///
39582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
39583#[inline]
39584#[target_feature(enable = "avx512f")]
39585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39586#[cfg_attr(test, assert_instr(vcvttss2usi))]
39587pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
39588    vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
39589}
39590
39591/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39592/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39593///
39594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
39595#[inline]
39596#[target_feature(enable = "avx512f")]
39597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39598#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
39599#[rustc_legacy_const_generics(1)]
39600pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
39601    static_assert_sae!(SAE);
39602    let a = a.as_f64x2();
39603    vcvttsd2si(a, SAE)
39604}
39605
39606/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
39607/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39608///
39609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
39610#[inline]
39611#[target_feature(enable = "avx512f")]
39612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39613#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
39614#[rustc_legacy_const_generics(1)]
39615pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
39616    static_assert_sae!(SAE);
39617    let a = a.as_f64x2();
39618    vcvttsd2si(a, SAE)
39619}
39620
39621/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
39622/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39623///
39624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
39625#[inline]
39626#[target_feature(enable = "avx512f")]
39627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39628#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
39629#[rustc_legacy_const_generics(1)]
39630pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
39631    static_assert_sae!(SAE);
39632    let a = a.as_f64x2();
39633    vcvttsd2usi(a, SAE)
39634}
39635
39636/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
39637///
39638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
39639#[inline]
39640#[target_feature(enable = "avx512f")]
39641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39642#[cfg_attr(test, assert_instr(vcvttsd2si))]
39643pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
39644    vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39645}
39646
39647/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
39648///
39649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
39650#[inline]
39651#[target_feature(enable = "avx512f")]
39652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39653#[cfg_attr(test, assert_instr(vcvttsd2usi))]
39654pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
39655    vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
39656}
39657
39658/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39659///
39660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
39661#[inline]
39662#[target_feature(enable = "avx512f")]
39663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39664#[cfg_attr(test, assert_instr(vcvtusi2ss))]
39665pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
39666    let b = b as f32;
39667    simd_insert!(a, 0, b)
39668}
39669
39670/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39671///
39672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
39673#[inline]
39674#[target_feature(enable = "avx512f")]
39675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39676#[cfg_attr(test, assert_instr(vcvtusi2sd))]
39677pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
39678    let b = b as f64;
39679    simd_insert!(a, 0, b)
39680}
39681
39682/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
39683/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39684///
39685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
39686#[inline]
39687#[target_feature(enable = "avx512f")]
39688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39689#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
39690#[rustc_legacy_const_generics(2, 3)]
39691pub unsafe fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
39692    static_assert_uimm_bits!(IMM5, 5);
39693    static_assert_mantissas_sae!(SAE);
39694    let a = a.as_f32x4();
39695    let b = b.as_f32x4();
39696    vcomiss(a, b, IMM5, SAE)
39697}
39698
39699/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
39700/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39701///
39702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
39703#[inline]
39704#[target_feature(enable = "avx512f")]
39705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39706#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
39707#[rustc_legacy_const_generics(2, 3)]
39708pub unsafe fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
39709    static_assert_uimm_bits!(IMM5, 5);
39710    static_assert_mantissas_sae!(SAE);
39711    let a = a.as_f64x2();
39712    let b = b.as_f64x2();
39713    vcomisd(a, b, IMM5, SAE)
39714}
39715
39716/// Equal
39717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39718pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
39719/// Less-than
39720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39721pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
39722/// Less-than-or-equal
39723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39724pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
39725/// False
39726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39727pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
39728/// Not-equal
39729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39730pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
39731/// Not less-than
39732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39733pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
39734/// Not less-than-or-equal
39735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39736pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
39737/// True
39738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39739pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
39740
39741/// interval [1, 2)
39742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39743pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
39744/// interval [0.5, 2)
39745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39746pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
39747/// interval [0.5, 1)
39748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39749pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
39750/// interval [0.75, 1.5)
39751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39752pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
39753
39754/// sign = sign(SRC)
39755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39756pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
39757/// sign = 0
39758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39759pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
39760/// DEST = NaN if sign(SRC) = 1
39761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39762pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
39763
39764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39765pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
39766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39767pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
39768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39769pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
39770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39771pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
39772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39773pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
39774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39775pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
39776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39777pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
39778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39779pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
39780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39781pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
39782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39783pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
39784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39785pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
39786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39787pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
39788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39789pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
39790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39791pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
39792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39793pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
39794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39795pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
39796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39797pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
39798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39799pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
39800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39801pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
39802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39803pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
39804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39805pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
39806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39807pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
39808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39809pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
39810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39811pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
39812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39813pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
39814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39815pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
39816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39817pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
39818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39819pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
39820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39821pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
39822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39823pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
39824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39825pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
39826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39827pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
39828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39829pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
39830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39831pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
39832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39833pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
39834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39835pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
39836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39837pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
39838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39839pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
39840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39841pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
39842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39843pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
39844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39845pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
39846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39847pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
39848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39849pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
39850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39851pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
39852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39853pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
39854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39855pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
39856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39857pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
39858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39859pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
39860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39861pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
39862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39863pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
39864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39865pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
39866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39867pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
39868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39869pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
39870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39871pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
39872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39873pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
39874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39875pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
39876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39877pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
39878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39879pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
39880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39881pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
39882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39883pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
39884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39885pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
39886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39887pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
39888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39889pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
39890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39891pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
39892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39893pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
39894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39895pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
39896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39897pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
39898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39899pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
39900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39901pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
39902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39903pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
39904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39905pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
39906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39907pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
39908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39909pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
39910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39911pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
39912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39913pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
39914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39915pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
39916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39917pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
39918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39919pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
39920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39921pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
39922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39923pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
39924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39925pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
39926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39927pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
39928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39929pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
39930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39931pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
39932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39933pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
39934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39935pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
39936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39937pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
39938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39939pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
39940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39941pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
39942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39943pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
39944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39945pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
39946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39947pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
39948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39949pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
39950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39951pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
39952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39953pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
39954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39955pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
39956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39957pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
39958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39959pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
39960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39961pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
39962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39963pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
39964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39965pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
39966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39967pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
39968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39969pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
39970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39971pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
39972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39973pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
39974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39975pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
39976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39977pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
39978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39979pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
39980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39981pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
39982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39983pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
39984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39985pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
39986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39987pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
39988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39989pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
39990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39991pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
39992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39993pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
39994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39995pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
39996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39997pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
39998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39999pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
40000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40001pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
40002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40003pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
40004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40005pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
40006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40007pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
40008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40009pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
40010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40011pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
40012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40013pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
40014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40015pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
40016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40017pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
40018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40019pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
40020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40021pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
40022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40023pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
40024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40025pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
40026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40027pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
40028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40029pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
40030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40031pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
40032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40033pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
40034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40035pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
40036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40037pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
40038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40039pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
40040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40041pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
40042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40043pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
40044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40045pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
40046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40047pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
40048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40049pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
40050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40051pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
40052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40053pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
40054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40055pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
40056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40057pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
40058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40059pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
40060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40061pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
40062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40063pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
40064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40065pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
40066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40067pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
40068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40069pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
40070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40071pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
40072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40073pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
40074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40075pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
40076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40077pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
40078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40079pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
40080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40081pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
40082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40083pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
40084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40085pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
40086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40087pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
40088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40089pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
40090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40091pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
40092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40093pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
40094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40095pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
40096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40097pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
40098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40099pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
40100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40101pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
40102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40103pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
40104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40105pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
40106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40107pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
40108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40109pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
40110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40111pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
40112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40113pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
40114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40115pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
40116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40117pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
40118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40119pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
40120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40121pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
40122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40123pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
40124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40125pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
40126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40127pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
40128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40129pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
40130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40131pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
40132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40133pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
40134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40135pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
40136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40137pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
40138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40139pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
40140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40141pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
40142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40143pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
40144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40145pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
40146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40147pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
40148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40149pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
40150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40151pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
40152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40153pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
40154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40155pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
40156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40157pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
40158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40159pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
40160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40161pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
40162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40163pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
40164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40165pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
40166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40167pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
40168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40169pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
40170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40171pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
40172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40173pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
40174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40175pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
40176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40177pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
40178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40179pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
40180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40181pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
40182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40183pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
40184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40185pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
40186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40187pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
40188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40189pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
40190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40191pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
40192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40193pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
40194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40195pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
40196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40197pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
40198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40199pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
40200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40201pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
40202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40203pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
40204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40205pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
40206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40207pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
40208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40209pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
40210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40211pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
40212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40213pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
40214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40215pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
40216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40217pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
40218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40219pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
40220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40221pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
40222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40223pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
40224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40225pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
40226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40227pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
40228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40229pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
40230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40231pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
40232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40233pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
40234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40235pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
40236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40237pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
40238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40239pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
40240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40241pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
40242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40243pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
40244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40245pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
40246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40247pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
40248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40249pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
40250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40251pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
40252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40253pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
40254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40255pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
40256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40257pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
40258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40259pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
40260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40261pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
40262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40263pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
40264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40265pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
40266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40267pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
40268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40269pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
40270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40271pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
40272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40273pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
40274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40275pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
40276
40277#[allow(improper_ctypes)]
40278extern "C" {
40279    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
40280    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
40281    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
40282    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
40283
40284    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
40285    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
40286    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
40287    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
40288
40289    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
40290    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
40291    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
40292    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
40293
40294    #[link_name = "llvm.x86.avx512.add.ps.512"]
40295    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40296    #[link_name = "llvm.x86.avx512.add.pd.512"]
40297    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40298    #[link_name = "llvm.x86.avx512.sub.ps.512"]
40299    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40300    #[link_name = "llvm.x86.avx512.sub.pd.512"]
40301    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40302    #[link_name = "llvm.x86.avx512.mul.ps.512"]
40303    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40304    #[link_name = "llvm.x86.avx512.mul.pd.512"]
40305    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40306    #[link_name = "llvm.x86.avx512.div.ps.512"]
40307    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
40308    #[link_name = "llvm.x86.avx512.div.pd.512"]
40309    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
40310
40311    #[link_name = "llvm.x86.avx512.max.ps.512"]
40312    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
40313    #[link_name = "llvm.x86.avx512.max.pd.512"]
40314    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
40315    #[link_name = "llvm.x86.avx512.min.ps.512"]
40316    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
40317    #[link_name = "llvm.x86.avx512.min.pd.512"]
40318    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
40319
40320    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
40321    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
40322
40323    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
40324    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40325    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
40326    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40327
40328    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
40329    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
40330    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
40331    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40332    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
40333    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40334
40335    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
40336    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
40337    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
40338    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
40339    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
40340    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
40341
40342    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
40343    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
40344    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
40345    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
40346    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
40347    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
40348
40349    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
40350    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
40351    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
40352    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
40353    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
40354    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
40355
40356    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
40357    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
40358    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
40359    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
40360    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
40361    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
40362
40363    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
40364    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
40365    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
40366    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
40367    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
40368    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
40369
40370    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
40371    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
40372    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
40373    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
40374    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
40375    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
40376
40377    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
40378    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
40379    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
40380    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
40381    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
40382    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
40383
40384    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
40385    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
40386    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
40387    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
40388    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
40389    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
40390
40391    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
40392    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
40393    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
40394    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
40395    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
40396    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
40397
40398    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
40399    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
40400    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
40401    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
40402    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
40403    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
40404
40405    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
40406    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
40407    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
40408    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
40409    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
40410    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
40411
40412    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
40413    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
40414    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
40415    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
40416    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
40417    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
40418
40419    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
40420    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
40421    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
40422    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40423    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
40424    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40425
40426    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
40427    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
40428    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
40429    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40430    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
40431    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40432
40433    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
40434    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
40435    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
40436    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
40437    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
40438    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
40439
40440    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
40441    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
40442    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
40443    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
40444    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
40445    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
40446
40447    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
40448    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
40449
40450    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
40451    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
40452    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
40453    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
40454    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
40455    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
40456
40457    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
40458    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
40459    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
40460    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
40461
40462    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
40463    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
40464
40465    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
40466    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
40467    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
40468    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
40469    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
40470    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
40471
40472    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
40473    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
40474    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
40475    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
40476
40477    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
40478    fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
40479    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
40480    fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
40481    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
40482    fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
40483
40484    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
40485    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
40486
40487    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
40488    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
40489    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
40490    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
40491    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
40492    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
40493
40494    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
40495    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
40496    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
40497    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
40498    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
40499    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
40500
40501    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
40502    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
40503    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
40504    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
40505    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
40506    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
40507
40508    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
40509    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
40510    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
40511    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
40512    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
40513    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
40514
40515    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
40516    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
40517    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
40518    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
40519    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
40520    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
40521
40522    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
40523    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
40524    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
40525    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
40526    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
40527    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
40528    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
40529    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
40530    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
40531    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
40532
40533    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
40534    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40535    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
40536    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40537    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
40538    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40539
40540    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
40541    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40542    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
40543    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40544    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
40545    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40546
40547    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
40548    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40549    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
40550    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40551    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
40552    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40553
40554    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
40555    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40556    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
40557    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40558    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
40559    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40560
40561    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
40562    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40563    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
40564    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40565    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
40566    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40567
40568    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
40569    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
40570    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
40571    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
40572    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
40573    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
40574
40575    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
40576    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40577    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
40578    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40579    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
40580    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40581
40582    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
40583    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40584    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
40585    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40586    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
40587    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40588
40589    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
40590    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40591    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
40592    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40593    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
40594    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40595
40596    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
40597    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40598    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
40599    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40600    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
40601    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40602
40603    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
40604    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40605    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
40606    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40607    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
40608    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40609
40610    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
40611    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40612    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
40613    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40614    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
40615    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40616
40617    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
40618    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40619    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
40620    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40621    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
40622    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40623
40624    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
40625    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40626    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
40627    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40628    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
40629    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40630
40631    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
40632    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
40633    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
40634    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
40635    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
40636    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
40637
40638    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
40639    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
40640
40641    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
40642    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
40643    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
40644    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
40645    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
40646    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
40647
40648    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
40649    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
40650    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
40651    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
40652    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
40653    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
40654
40655    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
40656    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
40657    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
40658    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
40659    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
40660    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
40661
40662    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
40663    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
40664    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
40665    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
40666    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
40667    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
40668
40669    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
40670    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
40671    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
40672    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
40673    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
40674    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
40675
40676    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
40677    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
40678    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
40679    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
40680    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
40681    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
40682
40683    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
40684    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
40685    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
40686    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
40687    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
40688    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
40689
40690    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
40691    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
40692    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
40693    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
40694    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
40695    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
40696
40697    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
40698    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
40699    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
40700    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
40701    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
40702    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
40703
40704    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
40705    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
40706    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
40707    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
40708    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
40709    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
40710
40711    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
40712    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
40713    #[link_name = "llvm.x86.avx512.gather.dps.512"]
40714    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
40715    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
40716    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
40717    #[link_name = "llvm.x86.avx512.gather.qps.512"]
40718    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
40719    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
40720    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
40721    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
40722    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
40723    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
40724    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
40725    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
40726    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
40727
40728    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
40729    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
40730    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
40731    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
40732    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
40733    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
40734    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
40735    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
40736    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
40737    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
40738
40739    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
40740    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
40741    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
40742    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
40743    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
40744    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
40745
40746    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
40747    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
40748    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
40749    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
40750    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
40751    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
40752    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
40753    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
40754    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
40755    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
40756    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
40757    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
40758    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
40759    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
40760    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
40761    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
40762
40763    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
40764    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
40765    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
40766    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
40767    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
40768    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
40769    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
40770    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
40771    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
40772    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
40773    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
40774    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
40775    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
40776    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
40777    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
40778    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
40779
40780    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
40781    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
40782    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
40783    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
40784    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
40785    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
40786    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
40787    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
40788    #[link_name = "llvm.x86.avx512.gather3div4.si"]
40789    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
40790    #[link_name = "llvm.x86.avx512.gather3div2.di"]
40791    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
40792    #[link_name = "llvm.x86.avx512.gather3div2.df"]
40793    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
40794    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
40795    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
40796
40797    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
40798    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
40799    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
40800    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
40801    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
40802    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
40803    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
40804    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
40805    #[link_name = "llvm.x86.avx512.gather3div8.si"]
40806    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
40807    #[link_name = "llvm.x86.avx512.gather3div4.di"]
40808    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
40809    #[link_name = "llvm.x86.avx512.gather3div4.df"]
40810    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
40811    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
40812    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
40813
40814    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
40815    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
40816    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
40817    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
40818
40819    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
40820    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
40821    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
40822    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
40823    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
40824    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
40825
40826    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
40827    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
40828    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
40829    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
40830    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
40831    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
40832
40833    #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
40834    fn vprold(a: i32x16, i8: i32) -> i32x16;
40835    #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
40836    fn vprold256(a: i32x8, i8: i32) -> i32x8;
40837    #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
40838    fn vprold128(a: i32x4, i8: i32) -> i32x4;
40839
40840    #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
40841    fn vprord(a: i32x16, i8: i32) -> i32x16;
40842    #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
40843    fn vprord256(a: i32x8, i8: i32) -> i32x8;
40844    #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
40845    fn vprord128(a: i32x4, i8: i32) -> i32x4;
40846
40847    #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
40848    fn vprolq(a: i64x8, i8: i32) -> i64x8;
40849    #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
40850    fn vprolq256(a: i64x4, i8: i32) -> i64x4;
40851    #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
40852    fn vprolq128(a: i64x2, i8: i32) -> i64x2;
40853
40854    #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
40855    fn vprorq(a: i64x8, i8: i32) -> i64x8;
40856    #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
40857    fn vprorq256(a: i64x4, i8: i32) -> i64x4;
40858    #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
40859    fn vprorq128(a: i64x2, i8: i32) -> i64x2;
40860
40861    #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
40862    fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
40863    #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
40864    fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
40865    #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
40866    fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
40867
40868    #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
40869    fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
40870    #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
40871    fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
40872    #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
40873    fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
40874
40875    #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
40876    fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
40877    #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
40878    fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
40879    #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
40880    fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
40881
40882    #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
40883    fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
40884    #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
40885    fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
40886    #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
40887    fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
40888
40889    #[link_name = "llvm.x86.avx512.psllv.d.512"]
40890    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
40891    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
40892    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
40893    #[link_name = "llvm.x86.avx512.psllv.q.512"]
40894    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
40895    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
40896    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
40897
40898    #[link_name = "llvm.x86.avx512.psll.d.512"]
40899    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
40900    #[link_name = "llvm.x86.avx512.psrl.d.512"]
40901    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
40902    #[link_name = "llvm.x86.avx512.psll.q.512"]
40903    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
40904    #[link_name = "llvm.x86.avx512.psrl.q.512"]
40905    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
40906
40907    #[link_name = "llvm.x86.avx512.psra.d.512"]
40908    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
40909
40910    #[link_name = "llvm.x86.avx512.psra.q.512"]
40911    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
40912    #[link_name = "llvm.x86.avx512.psra.q.256"]
40913    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
40914    #[link_name = "llvm.x86.avx512.psra.q.128"]
40915    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
40916
40917    #[link_name = "llvm.x86.avx512.psrav.d.512"]
40918    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
40919
40920    #[link_name = "llvm.x86.avx512.psrav.q.512"]
40921    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
40922    #[link_name = "llvm.x86.avx512.psrav.q.256"]
40923    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
40924    #[link_name = "llvm.x86.avx512.psrav.q.128"]
40925    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
40926
40927    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
40928    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
40929    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
40930    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
40931
40932    #[link_name = "llvm.x86.avx512.permvar.si.512"]
40933    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
40934
40935    #[link_name = "llvm.x86.avx512.permvar.di.512"]
40936    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
40937    #[link_name = "llvm.x86.avx512.permvar.di.256"]
40938    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
40939
40940    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
40941    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
40942
40943    #[link_name = "llvm.x86.avx512.permvar.df.512"]
40944    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
40945    #[link_name = "llvm.x86.avx512.permvar.df.256"]
40946    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
40947
40948    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
40949    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
40950    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
40951    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
40952    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
40953    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
40954
40955    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
40956    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
40957    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
40958    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
40959    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
40960    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
40961
40962    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
40963    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
40964    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
40965    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
40966    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
40967    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
40968
40969    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
40970    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
40971    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
40972    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
40973    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
40974    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
40975
40976    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
40977    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
40978    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
40979    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
40980    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
40981    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
40982
40983    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
40984    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
40985    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
40986    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
40987    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
40988    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
40989
40990    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
40991    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
40992    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
40993    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
40994    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
40995    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
40996
40997    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
40998    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
40999    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
41000    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
41001    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
41002    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
41003
41004    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
41005    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
41006    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
41007    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
41008    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
41009    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
41010
41011    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
41012    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
41013    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
41014    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
41015    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
41016    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
41017
41018    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
41019    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
41020    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
41021    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
41022    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
41023    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
41024
41025    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
41026    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
41027    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
41028    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
41029    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
41030    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
41031
41032    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
41033    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
41034    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
41035    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
41036    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
41037    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
41038
41039    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
41040    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
41041    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
41042    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
41043    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
41044    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
41045
41046    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
41047    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
41048    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
41049    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
41050    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
41051    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
41052
41053    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
41054    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
41055    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
41056    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
41057    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
41058    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
41059
41060    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
41061    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41062    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
41063    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41064    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
41065    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41066    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
41067    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41068    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
41069    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41070    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
41071    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41072    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
41073    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41074    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
41075    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41076    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
41077    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41078    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
41079    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41080    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
41081    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41082    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
41083    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41084    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
41085    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
41086    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
41087    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
41088    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
41089    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
41090    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
41091    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
41092    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
41093    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
41094    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
41095    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
41096
41097    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
41098    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
41099    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
41100    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
41101    #[link_name = "llvm.x86.avx512.rcp14.ss"]
41102    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
41103    #[link_name = "llvm.x86.avx512.rcp14.sd"]
41104    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
41105
41106    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
41107    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
41108    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
41109    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
41110    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
41111    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41112    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
41113    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
41114
41115    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
41116    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
41117    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
41118    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
41119
41120    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
41121    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
41122    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
41123    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
41124    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
41125    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
41126    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
41127    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
41128
41129    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
41130    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
41131    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
41132    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
41133
41134    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
41135    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
41136    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
41137    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
41138
41139    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
41140    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
41141    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
41142    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
41143
41144    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
41145    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
41146    #[link_name = "llvm.x86.avx512.cvtsi2sd64"]
41147    fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
41148
41149    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
41150    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
41151    #[link_name = "llvm.x86.avx512.cvtusi642sd"]
41152    fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
41153
41154    #[link_name = "llvm.x86.avx512.cvttss2si"]
41155    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
41156    #[link_name = "llvm.x86.avx512.cvttss2usi"]
41157    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
41158
41159    #[link_name = "llvm.x86.avx512.cvttsd2si"]
41160    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
41161    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
41162    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
41163
41164    #[link_name = "llvm.x86.avx512.vcomi.ss"]
41165    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
41166    #[link_name = "llvm.x86.avx512.vcomi.sd"]
41167    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
41168
41169    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
41170    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
41171    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
41172    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
41173    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
41174    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
41175    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
41176    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
41177    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
41178    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
41179    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
41180    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
41181    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
41182    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
41183    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
41184    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
41185    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
41186    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
41187    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
41188    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
41189    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
41190    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
41191    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
41192    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
41193
41194    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
41195    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
41196    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
41197    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
41198    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
41199    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
41200    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
41201    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
41202    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
41203    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
41204    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
41205    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
41206    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
41207    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
41208    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
41209    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
41210    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
41211    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
41212    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
41213    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
41214    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
41215    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
41216    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
41217    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
41218
41219    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
41220    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
41221    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
41222    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
41223    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
41224    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
41225    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
41226    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
41227    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
41228    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
41229    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
41230    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
41231    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
41232    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
41233    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
41234    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
41235    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
41236    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
41237    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
41238    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
41239    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
41240    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
41241    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
41242    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
41243
41244    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
41245    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
41246    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
41247    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
41248    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
41249    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
41250    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
41251    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
41252    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
41253    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
41254    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
41255    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
41256    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
41257    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
41258    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
41259    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
41260    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
41261    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
41262    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
41263    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
41264    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
41265    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
41266    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
41267    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
41268
41269    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
41270    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
41271    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
41272    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
41273    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
41274    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
41275    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
41276    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
41277    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
41278    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
41279    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
41280    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
41281    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
41282    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
41283    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
41284    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
41285    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
41286    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
41287    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
41288    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
41289    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
41290    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
41291    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
41292    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
41293
41294}
41295
41296#[cfg(test)]
41297mod tests {
41298
41299    use stdarch_test::simd_test;
41300
41301    use crate::core_arch::x86::*;
41302    use crate::hint::black_box;
41303    use crate::mem::{self};
41304
41305    #[simd_test(enable = "avx512f")]
41306    unsafe fn test_mm512_abs_epi32() {
41307        #[rustfmt::skip]
41308        let a = _mm512_setr_epi32(
41309            0, 1, -1, i32::MAX,
41310            i32::MIN, 100, -100, -32,
41311            0, 1, -1, i32::MAX,
41312            i32::MIN, 100, -100, -32,
41313        );
41314        let r = _mm512_abs_epi32(a);
41315        #[rustfmt::skip]
41316        let e = _mm512_setr_epi32(
41317            0, 1, 1, i32::MAX,
41318            i32::MAX.wrapping_add(1), 100, 100, 32,
41319            0, 1, 1, i32::MAX,
41320            i32::MAX.wrapping_add(1), 100, 100, 32,
41321        );
41322        assert_eq_m512i(r, e);
41323    }
41324
41325    #[simd_test(enable = "avx512f")]
41326    unsafe fn test_mm512_mask_abs_epi32() {
41327        #[rustfmt::skip]
41328        let a = _mm512_setr_epi32(
41329            0, 1, -1, i32::MAX,
41330            i32::MIN, 100, -100, -32,
41331            0, 1, -1, i32::MAX,
41332            i32::MIN, 100, -100, -32,
41333        );
41334        let r = _mm512_mask_abs_epi32(a, 0, a);
41335        assert_eq_m512i(r, a);
41336        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
41337        #[rustfmt::skip]
41338        let e = _mm512_setr_epi32(
41339            0, 1, 1, i32::MAX,
41340            i32::MAX.wrapping_add(1), 100, 100, 32,
41341            0, 1, -1, i32::MAX,
41342            i32::MIN, 100, -100, -32,
41343        );
41344        assert_eq_m512i(r, e);
41345    }
41346
41347    #[simd_test(enable = "avx512f")]
41348    unsafe fn test_mm512_maskz_abs_epi32() {
41349        #[rustfmt::skip]
41350        let a = _mm512_setr_epi32(
41351            0, 1, -1, i32::MAX,
41352            i32::MIN, 100, -100, -32,
41353            0, 1, -1, i32::MAX,
41354            i32::MIN, 100, -100, -32,
41355        );
41356        let r = _mm512_maskz_abs_epi32(0, a);
41357        assert_eq_m512i(r, _mm512_setzero_si512());
41358        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
41359        #[rustfmt::skip]
41360        let e = _mm512_setr_epi32(
41361            0, 1, 1, i32::MAX,
41362            i32::MAX.wrapping_add(1), 100, 100, 32,
41363            0, 0, 0, 0,
41364            0, 0, 0, 0,
41365        );
41366        assert_eq_m512i(r, e);
41367    }
41368
41369    #[simd_test(enable = "avx512f,avx512vl")]
41370    unsafe fn test_mm256_mask_abs_epi32() {
41371        #[rustfmt::skip]
41372        let a = _mm256_setr_epi32(
41373            0, 1, -1, i32::MAX,
41374            i32::MIN, 100, -100, -32,
41375        );
41376        let r = _mm256_mask_abs_epi32(a, 0, a);
41377        assert_eq_m256i(r, a);
41378        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
41379        #[rustfmt::skip]
41380        let e = _mm256_setr_epi32(
41381            0, 1, 1, i32::MAX,
41382            i32::MAX.wrapping_add(1), 100, -100, -32,
41383        );
41384        assert_eq_m256i(r, e);
41385    }
41386
41387    #[simd_test(enable = "avx512f,avx512vl")]
41388    unsafe fn test_mm256_maskz_abs_epi32() {
41389        #[rustfmt::skip]
41390        let a = _mm256_setr_epi32(
41391            0, 1, -1, i32::MAX,
41392            i32::MIN, 100, -100, -32,
41393        );
41394        let r = _mm256_maskz_abs_epi32(0, a);
41395        assert_eq_m256i(r, _mm256_setzero_si256());
41396        let r = _mm256_maskz_abs_epi32(0b00001111, a);
41397        #[rustfmt::skip]
41398        let e = _mm256_setr_epi32(
41399            0, 1, 1, i32::MAX,
41400            0, 0, 0, 0,
41401        );
41402        assert_eq_m256i(r, e);
41403    }
41404
41405    #[simd_test(enable = "avx512f,avx512vl")]
41406    unsafe fn test_mm_mask_abs_epi32() {
41407        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
41408        let r = _mm_mask_abs_epi32(a, 0, a);
41409        assert_eq_m128i(r, a);
41410        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
41411        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
41412        assert_eq_m128i(r, e);
41413    }
41414
41415    #[simd_test(enable = "avx512f,avx512vl")]
41416    unsafe fn test_mm_maskz_abs_epi32() {
41417        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
41418        let r = _mm_maskz_abs_epi32(0, a);
41419        assert_eq_m128i(r, _mm_setzero_si128());
41420        let r = _mm_maskz_abs_epi32(0b00001111, a);
41421        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
41422        assert_eq_m128i(r, e);
41423    }
41424
41425    #[simd_test(enable = "avx512f")]
41426    unsafe fn test_mm512_abs_ps() {
41427        #[rustfmt::skip]
41428        let a = _mm512_setr_ps(
41429            0., 1., -1., f32::MAX,
41430            f32::MIN, 100., -100., -32.,
41431            0., 1., -1., f32::MAX,
41432            f32::MIN, 100., -100., -32.,
41433        );
41434        let r = _mm512_abs_ps(a);
41435        #[rustfmt::skip]
41436        let e = _mm512_setr_ps(
41437            0., 1., 1., f32::MAX,
41438            f32::MAX, 100., 100., 32.,
41439            0., 1., 1., f32::MAX,
41440            f32::MAX, 100., 100., 32.,
41441        );
41442        assert_eq_m512(r, e);
41443    }
41444
41445    #[simd_test(enable = "avx512f")]
41446    unsafe fn test_mm512_mask_abs_ps() {
41447        #[rustfmt::skip]
41448        let a = _mm512_setr_ps(
41449            0., 1., -1., f32::MAX,
41450            f32::MIN, 100., -100., -32.,
41451            0., 1., -1., f32::MAX,
41452            f32::MIN, 100., -100., -32.,
41453        );
41454        let r = _mm512_mask_abs_ps(a, 0, a);
41455        assert_eq_m512(r, a);
41456        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
41457        #[rustfmt::skip]
41458        let e = _mm512_setr_ps(
41459            0., 1., 1., f32::MAX,
41460            f32::MAX, 100., 100., 32.,
41461            0., 1., -1., f32::MAX,
41462            f32::MIN, 100., -100., -32.,
41463        );
41464        assert_eq_m512(r, e);
41465    }
41466
41467    #[simd_test(enable = "avx512f")]
41468    unsafe fn test_mm512_mask_mov_epi32() {
41469        let src = _mm512_set1_epi32(1);
41470        let a = _mm512_set1_epi32(2);
41471        let r = _mm512_mask_mov_epi32(src, 0, a);
41472        assert_eq_m512i(r, src);
41473        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
41474        assert_eq_m512i(r, a);
41475    }
41476
41477    #[simd_test(enable = "avx512f")]
41478    unsafe fn test_mm512_maskz_mov_epi32() {
41479        let a = _mm512_set1_epi32(2);
41480        let r = _mm512_maskz_mov_epi32(0, a);
41481        assert_eq_m512i(r, _mm512_setzero_si512());
41482        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
41483        assert_eq_m512i(r, a);
41484    }
41485
41486    #[simd_test(enable = "avx512f,avx512vl")]
41487    unsafe fn test_mm256_mask_mov_epi32() {
41488        let src = _mm256_set1_epi32(1);
41489        let a = _mm256_set1_epi32(2);
41490        let r = _mm256_mask_mov_epi32(src, 0, a);
41491        assert_eq_m256i(r, src);
41492        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
41493        assert_eq_m256i(r, a);
41494    }
41495
41496    #[simd_test(enable = "avx512f,avx512vl")]
41497    unsafe fn test_mm256_maskz_mov_epi32() {
41498        let a = _mm256_set1_epi32(2);
41499        let r = _mm256_maskz_mov_epi32(0, a);
41500        assert_eq_m256i(r, _mm256_setzero_si256());
41501        let r = _mm256_maskz_mov_epi32(0b11111111, a);
41502        assert_eq_m256i(r, a);
41503    }
41504
41505    #[simd_test(enable = "avx512f,avx512vl")]
41506    unsafe fn test_mm_mask_mov_epi32() {
41507        let src = _mm_set1_epi32(1);
41508        let a = _mm_set1_epi32(2);
41509        let r = _mm_mask_mov_epi32(src, 0, a);
41510        assert_eq_m128i(r, src);
41511        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
41512        assert_eq_m128i(r, a);
41513    }
41514
41515    #[simd_test(enable = "avx512f,avx512vl")]
41516    unsafe fn test_mm_maskz_mov_epi32() {
41517        let a = _mm_set1_epi32(2);
41518        let r = _mm_maskz_mov_epi32(0, a);
41519        assert_eq_m128i(r, _mm_setzero_si128());
41520        let r = _mm_maskz_mov_epi32(0b00001111, a);
41521        assert_eq_m128i(r, a);
41522    }
41523
41524    #[simd_test(enable = "avx512f")]
41525    unsafe fn test_mm512_mask_mov_ps() {
41526        let src = _mm512_set1_ps(1.);
41527        let a = _mm512_set1_ps(2.);
41528        let r = _mm512_mask_mov_ps(src, 0, a);
41529        assert_eq_m512(r, src);
41530        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
41531        assert_eq_m512(r, a);
41532    }
41533
41534    #[simd_test(enable = "avx512f")]
41535    unsafe fn test_mm512_maskz_mov_ps() {
41536        let a = _mm512_set1_ps(2.);
41537        let r = _mm512_maskz_mov_ps(0, a);
41538        assert_eq_m512(r, _mm512_setzero_ps());
41539        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
41540        assert_eq_m512(r, a);
41541    }
41542
41543    #[simd_test(enable = "avx512f,avx512vl")]
41544    unsafe fn test_mm256_mask_mov_ps() {
41545        let src = _mm256_set1_ps(1.);
41546        let a = _mm256_set1_ps(2.);
41547        let r = _mm256_mask_mov_ps(src, 0, a);
41548        assert_eq_m256(r, src);
41549        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
41550        assert_eq_m256(r, a);
41551    }
41552
41553    #[simd_test(enable = "avx512f,avx512vl")]
41554    unsafe fn test_mm256_maskz_mov_ps() {
41555        let a = _mm256_set1_ps(2.);
41556        let r = _mm256_maskz_mov_ps(0, a);
41557        assert_eq_m256(r, _mm256_setzero_ps());
41558        let r = _mm256_maskz_mov_ps(0b11111111, a);
41559        assert_eq_m256(r, a);
41560    }
41561
41562    #[simd_test(enable = "avx512f,avx512vl")]
41563    unsafe fn test_mm_mask_mov_ps() {
41564        let src = _mm_set1_ps(1.);
41565        let a = _mm_set1_ps(2.);
41566        let r = _mm_mask_mov_ps(src, 0, a);
41567        assert_eq_m128(r, src);
41568        let r = _mm_mask_mov_ps(src, 0b00001111, a);
41569        assert_eq_m128(r, a);
41570    }
41571
41572    #[simd_test(enable = "avx512f,avx512vl")]
41573    unsafe fn test_mm_maskz_mov_ps() {
41574        let a = _mm_set1_ps(2.);
41575        let r = _mm_maskz_mov_ps(0, a);
41576        assert_eq_m128(r, _mm_setzero_ps());
41577        let r = _mm_maskz_mov_ps(0b00001111, a);
41578        assert_eq_m128(r, a);
41579    }
41580
41581    #[simd_test(enable = "avx512f")]
41582    unsafe fn test_mm512_add_epi32() {
41583        #[rustfmt::skip]
41584        let a = _mm512_setr_epi32(
41585            0, 1, -1, i32::MAX,
41586            i32::MIN, 100, -100, -32,
41587            0, 1, -1, i32::MAX,
41588            i32::MIN, 100, -100, -32,
41589        );
41590        let b = _mm512_set1_epi32(1);
41591        let r = _mm512_add_epi32(a, b);
41592        #[rustfmt::skip]
41593        let e = _mm512_setr_epi32(
41594            1, 2, 0, i32::MIN,
41595            i32::MIN + 1, 101, -99, -31,
41596            1, 2, 0, i32::MIN,
41597            i32::MIN + 1, 101, -99, -31,
41598        );
41599        assert_eq_m512i(r, e);
41600    }
41601
41602    #[simd_test(enable = "avx512f")]
41603    unsafe fn test_mm512_mask_add_epi32() {
41604        #[rustfmt::skip]
41605        let a = _mm512_setr_epi32(
41606            0, 1, -1, i32::MAX,
41607            i32::MIN, 100, -100, -32,
41608            0, 1, -1, i32::MAX,
41609            i32::MIN, 100, -100, -32,
41610        );
41611        let b = _mm512_set1_epi32(1);
41612        let r = _mm512_mask_add_epi32(a, 0, a, b);
41613        assert_eq_m512i(r, a);
41614        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
41615        #[rustfmt::skip]
41616        let e = _mm512_setr_epi32(
41617            1, 2, 0, i32::MIN,
41618            i32::MIN + 1, 101, -99, -31,
41619            0, 1, -1, i32::MAX,
41620            i32::MIN, 100, -100, -32,
41621        );
41622        assert_eq_m512i(r, e);
41623    }
41624
41625    #[simd_test(enable = "avx512f")]
41626    unsafe fn test_mm512_maskz_add_epi32() {
41627        #[rustfmt::skip]
41628        let a = _mm512_setr_epi32(
41629            0, 1, -1, i32::MAX,
41630            i32::MIN, 100, -100, -32,
41631            0, 1, -1, i32::MAX,
41632            i32::MIN, 100, -100, -32,
41633        );
41634        let b = _mm512_set1_epi32(1);
41635        let r = _mm512_maskz_add_epi32(0, a, b);
41636        assert_eq_m512i(r, _mm512_setzero_si512());
41637        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
41638        #[rustfmt::skip]
41639        let e = _mm512_setr_epi32(
41640            1, 2, 0, i32::MIN,
41641            i32::MIN + 1, 101, -99, -31,
41642            0, 0, 0, 0,
41643            0, 0, 0, 0,
41644        );
41645        assert_eq_m512i(r, e);
41646    }
41647
41648    #[simd_test(enable = "avx512f,avx512vl")]
41649    unsafe fn test_mm256_mask_add_epi32() {
41650        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
41651        let b = _mm256_set1_epi32(1);
41652        let r = _mm256_mask_add_epi32(a, 0, a, b);
41653        assert_eq_m256i(r, a);
41654        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
41655        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
41656        assert_eq_m256i(r, e);
41657    }
41658
41659    #[simd_test(enable = "avx512f,avx512vl")]
41660    unsafe fn test_mm256_maskz_add_epi32() {
41661        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
41662        let b = _mm256_set1_epi32(1);
41663        let r = _mm256_maskz_add_epi32(0, a, b);
41664        assert_eq_m256i(r, _mm256_setzero_si256());
41665        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
41666        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
41667        assert_eq_m256i(r, e);
41668    }
41669
41670    #[simd_test(enable = "avx512f,avx512vl")]
41671    unsafe fn test_mm_mask_add_epi32() {
41672        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
41673        let b = _mm_set1_epi32(1);
41674        let r = _mm_mask_add_epi32(a, 0, a, b);
41675        assert_eq_m128i(r, a);
41676        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
41677        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
41678        assert_eq_m128i(r, e);
41679    }
41680
41681    #[simd_test(enable = "avx512f,avx512vl")]
41682    unsafe fn test_mm_maskz_add_epi32() {
41683        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
41684        let b = _mm_set1_epi32(1);
41685        let r = _mm_maskz_add_epi32(0, a, b);
41686        assert_eq_m128i(r, _mm_setzero_si128());
41687        let r = _mm_maskz_add_epi32(0b00001111, a, b);
41688        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
41689        assert_eq_m128i(r, e);
41690    }
41691
41692    #[simd_test(enable = "avx512f")]
41693    unsafe fn test_mm512_add_ps() {
41694        #[rustfmt::skip]
41695        let a = _mm512_setr_ps(
41696            0., 1., -1., f32::MAX,
41697            f32::MIN, 100., -100., -32.,
41698            0., 1., -1., f32::MAX,
41699            f32::MIN, 100., -100., -32.,
41700        );
41701        let b = _mm512_set1_ps(1.);
41702        let r = _mm512_add_ps(a, b);
41703        #[rustfmt::skip]
41704        let e = _mm512_setr_ps(
41705            1., 2., 0., f32::MAX,
41706            f32::MIN + 1., 101., -99., -31.,
41707            1., 2., 0., f32::MAX,
41708            f32::MIN + 1., 101., -99., -31.,
41709        );
41710        assert_eq_m512(r, e);
41711    }
41712
41713    #[simd_test(enable = "avx512f")]
41714    unsafe fn test_mm512_mask_add_ps() {
41715        #[rustfmt::skip]
41716        let a = _mm512_setr_ps(
41717            0., 1., -1., f32::MAX,
41718            f32::MIN, 100., -100., -32.,
41719            0., 1., -1., f32::MAX,
41720            f32::MIN, 100., -100., -32.,
41721        );
41722        let b = _mm512_set1_ps(1.);
41723        let r = _mm512_mask_add_ps(a, 0, a, b);
41724        assert_eq_m512(r, a);
41725        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
41726        #[rustfmt::skip]
41727        let e = _mm512_setr_ps(
41728            1., 2., 0., f32::MAX,
41729            f32::MIN + 1., 101., -99., -31.,
41730            0., 1., -1., f32::MAX,
41731            f32::MIN, 100., -100., -32.,
41732        );
41733        assert_eq_m512(r, e);
41734    }
41735
41736    #[simd_test(enable = "avx512f")]
41737    unsafe fn test_mm512_maskz_add_ps() {
41738        #[rustfmt::skip]
41739        let a = _mm512_setr_ps(
41740            0., 1., -1., f32::MAX,
41741            f32::MIN, 100., -100., -32.,
41742            0., 1., -1., f32::MAX,
41743            f32::MIN, 100., -100., -32.,
41744        );
41745        let b = _mm512_set1_ps(1.);
41746        let r = _mm512_maskz_add_ps(0, a, b);
41747        assert_eq_m512(r, _mm512_setzero_ps());
41748        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
41749        #[rustfmt::skip]
41750        let e = _mm512_setr_ps(
41751            1., 2., 0., f32::MAX,
41752            f32::MIN + 1., 101., -99., -31.,
41753            0., 0., 0., 0.,
41754            0., 0., 0., 0.,
41755        );
41756        assert_eq_m512(r, e);
41757    }
41758
41759    #[simd_test(enable = "avx512f,avx512vl")]
41760    unsafe fn test_mm256_mask_add_ps() {
41761        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
41762        let b = _mm256_set1_ps(1.);
41763        let r = _mm256_mask_add_ps(a, 0, a, b);
41764        assert_eq_m256(r, a);
41765        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
41766        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
41767        assert_eq_m256(r, e);
41768    }
41769
41770    #[simd_test(enable = "avx512f,avx512vl")]
41771    unsafe fn test_mm256_maskz_add_ps() {
41772        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
41773        let b = _mm256_set1_ps(1.);
41774        let r = _mm256_maskz_add_ps(0, a, b);
41775        assert_eq_m256(r, _mm256_setzero_ps());
41776        let r = _mm256_maskz_add_ps(0b11111111, a, b);
41777        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
41778        assert_eq_m256(r, e);
41779    }
41780
41781    #[simd_test(enable = "avx512f,avx512vl")]
41782    unsafe fn test_mm_mask_add_ps() {
41783        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
41784        let b = _mm_set1_ps(1.);
41785        let r = _mm_mask_add_ps(a, 0, a, b);
41786        assert_eq_m128(r, a);
41787        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
41788        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
41789        assert_eq_m128(r, e);
41790    }
41791
41792    #[simd_test(enable = "avx512f,avx512vl")]
41793    unsafe fn test_mm_maskz_add_ps() {
41794        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
41795        let b = _mm_set1_ps(1.);
41796        let r = _mm_maskz_add_ps(0, a, b);
41797        assert_eq_m128(r, _mm_setzero_ps());
41798        let r = _mm_maskz_add_ps(0b00001111, a, b);
41799        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
41800        assert_eq_m128(r, e);
41801    }
41802
41803    #[simd_test(enable = "avx512f")]
41804    unsafe fn test_mm512_sub_epi32() {
41805        #[rustfmt::skip]
41806        let a = _mm512_setr_epi32(
41807            0, 1, -1, i32::MAX,
41808            i32::MIN, 100, -100, -32,
41809            0, 1, -1, i32::MAX,
41810            i32::MIN, 100, -100, -32,
41811        );
41812        let b = _mm512_set1_epi32(1);
41813        let r = _mm512_sub_epi32(a, b);
41814        #[rustfmt::skip]
41815        let e = _mm512_setr_epi32(
41816            -1, 0, -2, i32::MAX - 1,
41817            i32::MAX, 99, -101, -33,
41818            -1, 0, -2, i32::MAX - 1,
41819            i32::MAX, 99, -101, -33,
41820        );
41821        assert_eq_m512i(r, e);
41822    }
41823
41824    #[simd_test(enable = "avx512f")]
41825    unsafe fn test_mm512_mask_sub_epi32() {
41826        #[rustfmt::skip]
41827        let a = _mm512_setr_epi32(
41828            0, 1, -1, i32::MAX,
41829            i32::MIN, 100, -100, -32,
41830            0, 1, -1, i32::MAX,
41831            i32::MIN, 100, -100, -32,
41832        );
41833        let b = _mm512_set1_epi32(1);
41834        let r = _mm512_mask_sub_epi32(a, 0, a, b);
41835        assert_eq_m512i(r, a);
41836        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
41837        #[rustfmt::skip]
41838        let e = _mm512_setr_epi32(
41839            -1, 0, -2, i32::MAX - 1,
41840            i32::MAX, 99, -101, -33,
41841            0, 1, -1, i32::MAX,
41842            i32::MIN, 100, -100, -32,
41843        );
41844        assert_eq_m512i(r, e);
41845    }
41846
41847    #[simd_test(enable = "avx512f")]
41848    unsafe fn test_mm512_maskz_sub_epi32() {
41849        #[rustfmt::skip]
41850        let a = _mm512_setr_epi32(
41851            0, 1, -1, i32::MAX,
41852            i32::MIN, 100, -100, -32,
41853            0, 1, -1, i32::MAX,
41854            i32::MIN, 100, -100, -32,
41855        );
41856        let b = _mm512_set1_epi32(1);
41857        let r = _mm512_maskz_sub_epi32(0, a, b);
41858        assert_eq_m512i(r, _mm512_setzero_si512());
41859        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
41860        #[rustfmt::skip]
41861        let e = _mm512_setr_epi32(
41862            -1, 0, -2, i32::MAX - 1,
41863            i32::MAX, 99, -101, -33,
41864            0, 0, 0, 0,
41865            0, 0, 0, 0,
41866        );
41867        assert_eq_m512i(r, e);
41868    }
41869
41870    #[simd_test(enable = "avx512f,avx512vl")]
41871    unsafe fn test_mm256_mask_sub_epi32() {
41872        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
41873        let b = _mm256_set1_epi32(1);
41874        let r = _mm256_mask_sub_epi32(a, 0, a, b);
41875        assert_eq_m256i(r, a);
41876        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
41877        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
41878        assert_eq_m256i(r, e);
41879    }
41880
41881    #[simd_test(enable = "avx512f,avx512vl")]
41882    unsafe fn test_mm256_maskz_sub_epi32() {
41883        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
41884        let b = _mm256_set1_epi32(1);
41885        let r = _mm256_maskz_sub_epi32(0, a, b);
41886        assert_eq_m256i(r, _mm256_setzero_si256());
41887        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
41888        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
41889        assert_eq_m256i(r, e);
41890    }
41891
41892    #[simd_test(enable = "avx512f,avx512vl")]
41893    unsafe fn test_mm_mask_sub_epi32() {
41894        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
41895        let b = _mm_set1_epi32(1);
41896        let r = _mm_mask_sub_epi32(a, 0, a, b);
41897        assert_eq_m128i(r, a);
41898        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
41899        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
41900        assert_eq_m128i(r, e);
41901    }
41902
41903    #[simd_test(enable = "avx512f,avx512vl")]
41904    unsafe fn test_mm_maskz_sub_epi32() {
41905        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
41906        let b = _mm_set1_epi32(1);
41907        let r = _mm_maskz_sub_epi32(0, a, b);
41908        assert_eq_m128i(r, _mm_setzero_si128());
41909        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
41910        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
41911        assert_eq_m128i(r, e);
41912    }
41913
41914    #[simd_test(enable = "avx512f")]
41915    unsafe fn test_mm512_sub_ps() {
41916        #[rustfmt::skip]
41917        let a = _mm512_setr_ps(
41918            0., 1., -1., f32::MAX,
41919            f32::MIN, 100., -100., -32.,
41920            0., 1., -1., f32::MAX,
41921            f32::MIN, 100., -100., -32.,
41922        );
41923        let b = _mm512_set1_ps(1.);
41924        let r = _mm512_sub_ps(a, b);
41925        #[rustfmt::skip]
41926        let e = _mm512_setr_ps(
41927            -1., 0., -2., f32::MAX - 1.,
41928            f32::MIN, 99., -101., -33.,
41929            -1., 0., -2., f32::MAX - 1.,
41930            f32::MIN, 99., -101., -33.,
41931        );
41932        assert_eq_m512(r, e);
41933    }
41934
41935    #[simd_test(enable = "avx512f")]
41936    unsafe fn test_mm512_mask_sub_ps() {
41937        #[rustfmt::skip]
41938        let a = _mm512_setr_ps(
41939            0., 1., -1., f32::MAX,
41940            f32::MIN, 100., -100., -32.,
41941            0., 1., -1., f32::MAX,
41942            f32::MIN, 100., -100., -32.,
41943        );
41944        let b = _mm512_set1_ps(1.);
41945        let r = _mm512_mask_sub_ps(a, 0, a, b);
41946        assert_eq_m512(r, a);
41947        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
41948        #[rustfmt::skip]
41949        let e = _mm512_setr_ps(
41950            -1., 0., -2., f32::MAX - 1.,
41951            f32::MIN, 99., -101., -33.,
41952            0., 1., -1., f32::MAX,
41953            f32::MIN, 100., -100., -32.,
41954        );
41955        assert_eq_m512(r, e);
41956    }
41957
41958    #[simd_test(enable = "avx512f")]
41959    unsafe fn test_mm512_maskz_sub_ps() {
41960        #[rustfmt::skip]
41961        let a = _mm512_setr_ps(
41962            0., 1., -1., f32::MAX,
41963            f32::MIN, 100., -100., -32.,
41964            0., 1., -1., f32::MAX,
41965            f32::MIN, 100., -100., -32.,
41966        );
41967        let b = _mm512_set1_ps(1.);
41968        let r = _mm512_maskz_sub_ps(0, a, b);
41969        assert_eq_m512(r, _mm512_setzero_ps());
41970        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
41971        #[rustfmt::skip]
41972        let e = _mm512_setr_ps(
41973            -1., 0., -2., f32::MAX - 1.,
41974            f32::MIN, 99., -101., -33.,
41975            0., 0., 0., 0.,
41976            0., 0., 0., 0.,
41977        );
41978        assert_eq_m512(r, e);
41979    }
41980
41981    #[simd_test(enable = "avx512f,avx512vl")]
41982    unsafe fn test_mm256_mask_sub_ps() {
41983        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
41984        let b = _mm256_set1_ps(1.);
41985        let r = _mm256_mask_sub_ps(a, 0, a, b);
41986        assert_eq_m256(r, a);
41987        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
41988        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
41989        assert_eq_m256(r, e);
41990    }
41991
41992    #[simd_test(enable = "avx512f,avx512vl")]
41993    unsafe fn test_mm256_maskz_sub_ps() {
41994        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
41995        let b = _mm256_set1_ps(1.);
41996        let r = _mm256_maskz_sub_ps(0, a, b);
41997        assert_eq_m256(r, _mm256_setzero_ps());
41998        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
41999        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
42000        assert_eq_m256(r, e);
42001    }
42002
42003    #[simd_test(enable = "avx512f,avx512vl")]
42004    unsafe fn test_mm_mask_sub_ps() {
42005        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42006        let b = _mm_set1_ps(1.);
42007        let r = _mm_mask_sub_ps(a, 0, a, b);
42008        assert_eq_m128(r, a);
42009        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
42010        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
42011        assert_eq_m128(r, e);
42012    }
42013
42014    #[simd_test(enable = "avx512f,avx512vl")]
42015    unsafe fn test_mm_maskz_sub_ps() {
42016        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42017        let b = _mm_set1_ps(1.);
42018        let r = _mm_maskz_sub_ps(0, a, b);
42019        assert_eq_m128(r, _mm_setzero_ps());
42020        let r = _mm_maskz_sub_ps(0b00001111, a, b);
42021        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
42022        assert_eq_m128(r, e);
42023    }
42024
42025    #[simd_test(enable = "avx512f")]
42026    unsafe fn test_mm512_mullo_epi32() {
42027        #[rustfmt::skip]
42028        let a = _mm512_setr_epi32(
42029            0, 1, -1, i32::MAX,
42030            i32::MIN, 100, -100, -32,
42031            0, 1, -1, i32::MAX,
42032            i32::MIN, 100, -100, -32,
42033        );
42034        let b = _mm512_set1_epi32(2);
42035        let r = _mm512_mullo_epi32(a, b);
42036        let e = _mm512_setr_epi32(
42037            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
42038        );
42039        assert_eq_m512i(r, e);
42040    }
42041
42042    #[simd_test(enable = "avx512f")]
42043    unsafe fn test_mm512_mask_mullo_epi32() {
42044        #[rustfmt::skip]
42045        let a = _mm512_setr_epi32(
42046            0, 1, -1, i32::MAX,
42047            i32::MIN, 100, -100, -32,
42048            0, 1, -1, i32::MAX,
42049            i32::MIN, 100, -100, -32,
42050        );
42051        let b = _mm512_set1_epi32(2);
42052        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
42053        assert_eq_m512i(r, a);
42054        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
42055        #[rustfmt::skip]
42056        let e = _mm512_setr_epi32(
42057            0, 2, -2, -2,
42058            0, 200, -200, -64,
42059            0, 1, -1, i32::MAX,
42060            i32::MIN, 100, -100, -32,
42061        );
42062        assert_eq_m512i(r, e);
42063    }
42064
42065    #[simd_test(enable = "avx512f")]
42066    unsafe fn test_mm512_maskz_mullo_epi32() {
42067        #[rustfmt::skip]
42068        let a = _mm512_setr_epi32(
42069            0, 1, -1, i32::MAX,
42070            i32::MIN, 100, -100, -32,
42071            0, 1, -1, i32::MAX,
42072            i32::MIN, 100, -100, -32,
42073        );
42074        let b = _mm512_set1_epi32(2);
42075        let r = _mm512_maskz_mullo_epi32(0, a, b);
42076        assert_eq_m512i(r, _mm512_setzero_si512());
42077        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
42078        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
42079        assert_eq_m512i(r, e);
42080    }
42081
42082    #[simd_test(enable = "avx512f,avx512vl")]
42083    unsafe fn test_mm256_mask_mullo_epi32() {
42084        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
42085        let b = _mm256_set1_epi32(2);
42086        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
42087        assert_eq_m256i(r, a);
42088        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
42089        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
42090        assert_eq_m256i(r, e);
42091    }
42092
42093    #[simd_test(enable = "avx512f,avx512vl")]
42094    unsafe fn test_mm256_maskz_mullo_epi32() {
42095        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
42096        let b = _mm256_set1_epi32(2);
42097        let r = _mm256_maskz_mullo_epi32(0, a, b);
42098        assert_eq_m256i(r, _mm256_setzero_si256());
42099        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
42100        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
42101        assert_eq_m256i(r, e);
42102    }
42103
42104    #[simd_test(enable = "avx512f,avx512vl")]
42105    unsafe fn test_mm_mask_mullo_epi32() {
42106        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
42107        let b = _mm_set1_epi32(2);
42108        let r = _mm_mask_mullo_epi32(a, 0, a, b);
42109        assert_eq_m128i(r, a);
42110        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
42111        let e = _mm_set_epi32(2, -2, -2, 0);
42112        assert_eq_m128i(r, e);
42113    }
42114
42115    #[simd_test(enable = "avx512f,avx512vl")]
42116    unsafe fn test_mm_maskz_mullo_epi32() {
42117        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
42118        let b = _mm_set1_epi32(2);
42119        let r = _mm_maskz_mullo_epi32(0, a, b);
42120        assert_eq_m128i(r, _mm_setzero_si128());
42121        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
42122        let e = _mm_set_epi32(2, -2, -2, 0);
42123        assert_eq_m128i(r, e);
42124    }
42125
42126    #[simd_test(enable = "avx512f")]
42127    unsafe fn test_mm512_mul_ps() {
42128        #[rustfmt::skip]
42129        let a = _mm512_setr_ps(
42130            0., 1., -1., f32::MAX,
42131            f32::MIN, 100., -100., -32.,
42132            0., 1., -1., f32::MAX,
42133            f32::MIN, 100., -100., -32.,
42134        );
42135        let b = _mm512_set1_ps(2.);
42136        let r = _mm512_mul_ps(a, b);
42137        #[rustfmt::skip]
42138        let e = _mm512_setr_ps(
42139            0., 2., -2., f32::INFINITY,
42140            f32::NEG_INFINITY, 200., -200., -64.,
42141            0., 2., -2., f32::INFINITY,
42142            f32::NEG_INFINITY, 200., -200.,
42143            -64.,
42144        );
42145        assert_eq_m512(r, e);
42146    }
42147
42148    #[simd_test(enable = "avx512f")]
42149    unsafe fn test_mm512_mask_mul_ps() {
42150        #[rustfmt::skip]
42151        let a = _mm512_setr_ps(
42152            0., 1., -1., f32::MAX,
42153            f32::MIN, 100., -100., -32.,
42154            0., 1., -1., f32::MAX,
42155            f32::MIN, 100., -100., -32.,
42156        );
42157        let b = _mm512_set1_ps(2.);
42158        let r = _mm512_mask_mul_ps(a, 0, a, b);
42159        assert_eq_m512(r, a);
42160        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
42161        #[rustfmt::skip]
42162        let e = _mm512_setr_ps(
42163            0., 2., -2., f32::INFINITY,
42164            f32::NEG_INFINITY, 200., -200., -64.,
42165            0., 1., -1., f32::MAX,
42166            f32::MIN, 100., -100., -32.,
42167        );
42168        assert_eq_m512(r, e);
42169    }
42170
42171    #[simd_test(enable = "avx512f")]
42172    unsafe fn test_mm512_maskz_mul_ps() {
42173        #[rustfmt::skip]
42174        let a = _mm512_setr_ps(
42175            0., 1., -1., f32::MAX,
42176            f32::MIN, 100., -100., -32.,
42177            0., 1., -1., f32::MAX,
42178            f32::MIN, 100., -100., -32.,
42179        );
42180        let b = _mm512_set1_ps(2.);
42181        let r = _mm512_maskz_mul_ps(0, a, b);
42182        assert_eq_m512(r, _mm512_setzero_ps());
42183        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
42184        #[rustfmt::skip]
42185        let e = _mm512_setr_ps(
42186            0., 2., -2., f32::INFINITY,
42187            f32::NEG_INFINITY, 200., -200., -64.,
42188            0., 0., 0., 0.,
42189            0., 0., 0., 0.,
42190        );
42191        assert_eq_m512(r, e);
42192    }
42193
42194    #[simd_test(enable = "avx512f,avx512vl")]
42195    unsafe fn test_mm256_mask_mul_ps() {
42196        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
42197        let b = _mm256_set1_ps(2.);
42198        let r = _mm256_mask_mul_ps(a, 0, a, b);
42199        assert_eq_m256(r, a);
42200        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
42201        #[rustfmt::skip]
42202        let e = _mm256_set_ps(
42203            0., 2., -2., f32::INFINITY,
42204            f32::NEG_INFINITY, 200., -200., -64.,
42205        );
42206        assert_eq_m256(r, e);
42207    }
42208
42209    #[simd_test(enable = "avx512f,avx512vl")]
42210    unsafe fn test_mm256_maskz_mul_ps() {
42211        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
42212        let b = _mm256_set1_ps(2.);
42213        let r = _mm256_maskz_mul_ps(0, a, b);
42214        assert_eq_m256(r, _mm256_setzero_ps());
42215        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
42216        #[rustfmt::skip]
42217        let e = _mm256_set_ps(
42218            0., 2., -2., f32::INFINITY,
42219            f32::NEG_INFINITY, 200., -200., -64.,
42220        );
42221        assert_eq_m256(r, e);
42222    }
42223
42224    #[simd_test(enable = "avx512f,avx512vl")]
42225    unsafe fn test_mm_mask_mul_ps() {
42226        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42227        let b = _mm_set1_ps(2.);
42228        let r = _mm_mask_mul_ps(a, 0, a, b);
42229        assert_eq_m128(r, a);
42230        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
42231        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
42232        assert_eq_m128(r, e);
42233    }
42234
42235    #[simd_test(enable = "avx512f,avx512vl")]
42236    unsafe fn test_mm_maskz_mul_ps() {
42237        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
42238        let b = _mm_set1_ps(2.);
42239        let r = _mm_maskz_mul_ps(0, a, b);
42240        assert_eq_m128(r, _mm_setzero_ps());
42241        let r = _mm_maskz_mul_ps(0b00001111, a, b);
42242        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
42243        assert_eq_m128(r, e);
42244    }
42245
42246    #[simd_test(enable = "avx512f")]
42247    unsafe fn test_mm512_div_ps() {
42248        let a = _mm512_setr_ps(
42249            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
42250        );
42251        let b = _mm512_setr_ps(
42252            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
42253        );
42254        let r = _mm512_div_ps(a, b);
42255        #[rustfmt::skip]
42256        let e = _mm512_setr_ps(
42257            0., 0.5, -0.5, -1.,
42258            50., f32::INFINITY, -50., -16.,
42259            0., 0.5, -0.5, 500.,
42260            f32::NEG_INFINITY, 50., -50., -16.,
42261        );
42262        assert_eq_m512(r, e); // 0/0 = NAN
42263    }
42264
42265    #[simd_test(enable = "avx512f")]
42266    unsafe fn test_mm512_mask_div_ps() {
42267        let a = _mm512_setr_ps(
42268            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
42269        );
42270        let b = _mm512_setr_ps(
42271            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
42272        );
42273        let r = _mm512_mask_div_ps(a, 0, a, b);
42274        assert_eq_m512(r, a);
42275        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
42276        #[rustfmt::skip]
42277        let e = _mm512_setr_ps(
42278            0., 0.5, -0.5, -1.,
42279            50., f32::INFINITY, -50., -16.,
42280            0., 1., -1., 1000.,
42281            -131., 100., -100., -32.,
42282        );
42283        assert_eq_m512(r, e);
42284    }
42285
42286    #[simd_test(enable = "avx512f")]
42287    unsafe fn test_mm512_maskz_div_ps() {
42288        let a = _mm512_setr_ps(
42289            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
42290        );
42291        let b = _mm512_setr_ps(
42292            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
42293        );
42294        let r = _mm512_maskz_div_ps(0, a, b);
42295        assert_eq_m512(r, _mm512_setzero_ps());
42296        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
42297        #[rustfmt::skip]
42298        let e = _mm512_setr_ps(
42299            0., 0.5, -0.5, -1.,
42300            50., f32::INFINITY, -50., -16.,
42301            0., 0., 0., 0.,
42302            0., 0., 0., 0.,
42303        );
42304        assert_eq_m512(r, e);
42305    }
42306
42307    #[simd_test(enable = "avx512f,avx512vl")]
42308    unsafe fn test_mm256_mask_div_ps() {
42309        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
42310        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
42311        let r = _mm256_mask_div_ps(a, 0, a, b);
42312        assert_eq_m256(r, a);
42313        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
42314        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
42315        assert_eq_m256(r, e);
42316    }
42317
42318    #[simd_test(enable = "avx512f,avx512vl")]
42319    unsafe fn test_mm256_maskz_div_ps() {
42320        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
42321        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
42322        let r = _mm256_maskz_div_ps(0, a, b);
42323        assert_eq_m256(r, _mm256_setzero_ps());
42324        let r = _mm256_maskz_div_ps(0b11111111, a, b);
42325        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
42326        assert_eq_m256(r, e);
42327    }
42328
42329    #[simd_test(enable = "avx512f,avx512vl")]
42330    unsafe fn test_mm_mask_div_ps() {
42331        let a = _mm_set_ps(100., 100., -100., -32.);
42332        let b = _mm_set_ps(2., 0., 2., 2.);
42333        let r = _mm_mask_div_ps(a, 0, a, b);
42334        assert_eq_m128(r, a);
42335        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
42336        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
42337        assert_eq_m128(r, e);
42338    }
42339
42340    #[simd_test(enable = "avx512f,avx512vl")]
42341    unsafe fn test_mm_maskz_div_ps() {
42342        let a = _mm_set_ps(100., 100., -100., -32.);
42343        let b = _mm_set_ps(2., 0., 2., 2.);
42344        let r = _mm_maskz_div_ps(0, a, b);
42345        assert_eq_m128(r, _mm_setzero_ps());
42346        let r = _mm_maskz_div_ps(0b00001111, a, b);
42347        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
42348        assert_eq_m128(r, e);
42349    }
42350
42351    #[simd_test(enable = "avx512f")]
42352    unsafe fn test_mm512_max_epi32() {
42353        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42354        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42355        let r = _mm512_max_epi32(a, b);
42356        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42357        assert_eq_m512i(r, e);
42358    }
42359
42360    #[simd_test(enable = "avx512f")]
42361    unsafe fn test_mm512_mask_max_epi32() {
42362        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42363        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42364        let r = _mm512_mask_max_epi32(a, 0, a, b);
42365        assert_eq_m512i(r, a);
42366        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
42367        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42368        assert_eq_m512i(r, e);
42369    }
42370
42371    #[simd_test(enable = "avx512f")]
42372    unsafe fn test_mm512_maskz_max_epi32() {
42373        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42374        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42375        let r = _mm512_maskz_max_epi32(0, a, b);
42376        assert_eq_m512i(r, _mm512_setzero_si512());
42377        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
42378        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
42379        assert_eq_m512i(r, e);
42380    }
42381
42382    #[simd_test(enable = "avx512f,avx512vl")]
42383    unsafe fn test_mm256_mask_max_epi32() {
42384        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42385        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42386        let r = _mm256_mask_max_epi32(a, 0, a, b);
42387        assert_eq_m256i(r, a);
42388        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
42389        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42390        assert_eq_m256i(r, e);
42391    }
42392
42393    #[simd_test(enable = "avx512f,avx512vl")]
42394    unsafe fn test_mm256_maskz_max_epi32() {
42395        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42396        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42397        let r = _mm256_maskz_max_epi32(0, a, b);
42398        assert_eq_m256i(r, _mm256_setzero_si256());
42399        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
42400        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42401        assert_eq_m256i(r, e);
42402    }
42403
42404    #[simd_test(enable = "avx512f,avx512vl")]
42405    unsafe fn test_mm_mask_max_epi32() {
42406        let a = _mm_set_epi32(0, 1, 2, 3);
42407        let b = _mm_set_epi32(3, 2, 1, 0);
42408        let r = _mm_mask_max_epi32(a, 0, a, b);
42409        assert_eq_m128i(r, a);
42410        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
42411        let e = _mm_set_epi32(3, 2, 2, 3);
42412        assert_eq_m128i(r, e);
42413    }
42414
42415    #[simd_test(enable = "avx512f,avx512vl")]
42416    unsafe fn test_mm_maskz_max_epi32() {
42417        let a = _mm_set_epi32(0, 1, 2, 3);
42418        let b = _mm_set_epi32(3, 2, 1, 0);
42419        let r = _mm_maskz_max_epi32(0, a, b);
42420        assert_eq_m128i(r, _mm_setzero_si128());
42421        let r = _mm_maskz_max_epi32(0b00001111, a, b);
42422        let e = _mm_set_epi32(3, 2, 2, 3);
42423        assert_eq_m128i(r, e);
42424    }
42425
42426    #[simd_test(enable = "avx512f")]
42427    unsafe fn test_mm512_max_ps() {
42428        let a = _mm512_setr_ps(
42429            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42430        );
42431        let b = _mm512_setr_ps(
42432            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42433        );
42434        let r = _mm512_max_ps(a, b);
42435        let e = _mm512_setr_ps(
42436            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
42437        );
42438        assert_eq_m512(r, e);
42439    }
42440
42441    #[simd_test(enable = "avx512f")]
42442    unsafe fn test_mm512_mask_max_ps() {
42443        let a = _mm512_setr_ps(
42444            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42445        );
42446        let b = _mm512_setr_ps(
42447            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42448        );
42449        let r = _mm512_mask_max_ps(a, 0, a, b);
42450        assert_eq_m512(r, a);
42451        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
42452        let e = _mm512_setr_ps(
42453            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
42454        );
42455        assert_eq_m512(r, e);
42456    }
42457
42458    #[simd_test(enable = "avx512f")]
42459    unsafe fn test_mm512_maskz_max_ps() {
42460        let a = _mm512_setr_ps(
42461            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42462        );
42463        let b = _mm512_setr_ps(
42464            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42465        );
42466        let r = _mm512_maskz_max_ps(0, a, b);
42467        assert_eq_m512(r, _mm512_setzero_ps());
42468        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
42469        let e = _mm512_setr_ps(
42470            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
42471        );
42472        assert_eq_m512(r, e);
42473    }
42474
42475    #[simd_test(enable = "avx512f,avx512vl")]
42476    unsafe fn test_mm256_mask_max_ps() {
42477        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42478        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42479        let r = _mm256_mask_max_ps(a, 0, a, b);
42480        assert_eq_m256(r, a);
42481        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
42482        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
42483        assert_eq_m256(r, e);
42484    }
42485
42486    #[simd_test(enable = "avx512f,avx512vl")]
42487    unsafe fn test_mm256_maskz_max_ps() {
42488        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42489        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42490        let r = _mm256_maskz_max_ps(0, a, b);
42491        assert_eq_m256(r, _mm256_setzero_ps());
42492        let r = _mm256_maskz_max_ps(0b11111111, a, b);
42493        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
42494        assert_eq_m256(r, e);
42495    }
42496
42497    #[simd_test(enable = "avx512f,avx512vl")]
42498    unsafe fn test_mm_mask_max_ps() {
42499        let a = _mm_set_ps(0., 1., 2., 3.);
42500        let b = _mm_set_ps(3., 2., 1., 0.);
42501        let r = _mm_mask_max_ps(a, 0, a, b);
42502        assert_eq_m128(r, a);
42503        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
42504        let e = _mm_set_ps(3., 2., 2., 3.);
42505        assert_eq_m128(r, e);
42506    }
42507
42508    #[simd_test(enable = "avx512f,avx512vl")]
42509    unsafe fn test_mm_maskz_max_ps() {
42510        let a = _mm_set_ps(0., 1., 2., 3.);
42511        let b = _mm_set_ps(3., 2., 1., 0.);
42512        let r = _mm_maskz_max_ps(0, a, b);
42513        assert_eq_m128(r, _mm_setzero_ps());
42514        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
42515        let e = _mm_set_ps(3., 2., 2., 3.);
42516        assert_eq_m128(r, e);
42517    }
42518
42519    #[simd_test(enable = "avx512f")]
42520    unsafe fn test_mm512_max_epu32() {
42521        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42522        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42523        let r = _mm512_max_epu32(a, b);
42524        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42525        assert_eq_m512i(r, e);
42526    }
42527
42528    #[simd_test(enable = "avx512f")]
42529    unsafe fn test_mm512_mask_max_epu32() {
42530        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42531        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42532        let r = _mm512_mask_max_epu32(a, 0, a, b);
42533        assert_eq_m512i(r, a);
42534        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
42535        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
42536        assert_eq_m512i(r, e);
42537    }
42538
42539    #[simd_test(enable = "avx512f")]
42540    unsafe fn test_mm512_maskz_max_epu32() {
42541        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42542        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42543        let r = _mm512_maskz_max_epu32(0, a, b);
42544        assert_eq_m512i(r, _mm512_setzero_si512());
42545        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
42546        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
42547        assert_eq_m512i(r, e);
42548    }
42549
42550    #[simd_test(enable = "avx512f,avx512vl")]
42551    unsafe fn test_mm256_mask_max_epu32() {
42552        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42553        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42554        let r = _mm256_mask_max_epu32(a, 0, a, b);
42555        assert_eq_m256i(r, a);
42556        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
42557        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42558        assert_eq_m256i(r, e);
42559    }
42560
42561    #[simd_test(enable = "avx512f,avx512vl")]
42562    unsafe fn test_mm256_maskz_max_epu32() {
42563        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42564        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42565        let r = _mm256_maskz_max_epu32(0, a, b);
42566        assert_eq_m256i(r, _mm256_setzero_si256());
42567        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
42568        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
42569        assert_eq_m256i(r, e);
42570    }
42571
42572    #[simd_test(enable = "avx512f,avx512vl")]
42573    unsafe fn test_mm_mask_max_epu32() {
42574        let a = _mm_set_epi32(0, 1, 2, 3);
42575        let b = _mm_set_epi32(3, 2, 1, 0);
42576        let r = _mm_mask_max_epu32(a, 0, a, b);
42577        assert_eq_m128i(r, a);
42578        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
42579        let e = _mm_set_epi32(3, 2, 2, 3);
42580        assert_eq_m128i(r, e);
42581    }
42582
42583    #[simd_test(enable = "avx512f,avx512vl")]
42584    unsafe fn test_mm_maskz_max_epu32() {
42585        let a = _mm_set_epi32(0, 1, 2, 3);
42586        let b = _mm_set_epi32(3, 2, 1, 0);
42587        let r = _mm_maskz_max_epu32(0, a, b);
42588        assert_eq_m128i(r, _mm_setzero_si128());
42589        let r = _mm_maskz_max_epu32(0b00001111, a, b);
42590        let e = _mm_set_epi32(3, 2, 2, 3);
42591        assert_eq_m128i(r, e);
42592    }
42593
42594    #[simd_test(enable = "avx512f")]
42595    unsafe fn test_mm512_min_epi32() {
42596        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42597        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42598        let r = _mm512_min_epi32(a, b);
42599        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
42600        assert_eq_m512i(r, e);
42601    }
42602
42603    #[simd_test(enable = "avx512f")]
42604    unsafe fn test_mm512_mask_min_epi32() {
42605        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42606        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42607        let r = _mm512_mask_min_epi32(a, 0, a, b);
42608        assert_eq_m512i(r, a);
42609        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
42610        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42611        assert_eq_m512i(r, e);
42612    }
42613
42614    #[simd_test(enable = "avx512f")]
42615    unsafe fn test_mm512_maskz_min_epi32() {
42616        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42617        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42618        let r = _mm512_maskz_min_epi32(0, a, b);
42619        assert_eq_m512i(r, _mm512_setzero_si512());
42620        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
42621        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
42622        assert_eq_m512i(r, e);
42623    }
42624
42625    #[simd_test(enable = "avx512f,avx512vl")]
42626    unsafe fn test_mm256_mask_min_epi32() {
42627        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42628        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42629        let r = _mm256_mask_min_epi32(a, 0, a, b);
42630        assert_eq_m256i(r, a);
42631        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
42632        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42633        assert_eq_m256i(r, e);
42634    }
42635
42636    #[simd_test(enable = "avx512f,avx512vl")]
42637    unsafe fn test_mm256_maskz_min_epi32() {
42638        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42639        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42640        let r = _mm256_maskz_min_epi32(0, a, b);
42641        assert_eq_m256i(r, _mm256_setzero_si256());
42642        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
42643        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42644        assert_eq_m256i(r, e);
42645    }
42646
42647    #[simd_test(enable = "avx512f,avx512vl")]
42648    unsafe fn test_mm_mask_min_epi32() {
42649        let a = _mm_set_epi32(0, 1, 2, 3);
42650        let b = _mm_set_epi32(3, 2, 1, 0);
42651        let r = _mm_mask_min_epi32(a, 0, a, b);
42652        assert_eq_m128i(r, a);
42653        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
42654        let e = _mm_set_epi32(0, 1, 1, 0);
42655        assert_eq_m128i(r, e);
42656    }
42657
42658    #[simd_test(enable = "avx512f,avx512vl")]
42659    unsafe fn test_mm_maskz_min_epi32() {
42660        let a = _mm_set_epi32(0, 1, 2, 3);
42661        let b = _mm_set_epi32(3, 2, 1, 0);
42662        let r = _mm_maskz_min_epi32(0, a, b);
42663        assert_eq_m128i(r, _mm_setzero_si128());
42664        let r = _mm_maskz_min_epi32(0b00001111, a, b);
42665        let e = _mm_set_epi32(0, 1, 1, 0);
42666        assert_eq_m128i(r, e);
42667    }
42668
42669    #[simd_test(enable = "avx512f")]
42670    unsafe fn test_mm512_min_ps() {
42671        let a = _mm512_setr_ps(
42672            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42673        );
42674        let b = _mm512_setr_ps(
42675            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42676        );
42677        let r = _mm512_min_ps(a, b);
42678        let e = _mm512_setr_ps(
42679            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
42680        );
42681        assert_eq_m512(r, e);
42682    }
42683
42684    #[simd_test(enable = "avx512f")]
42685    unsafe fn test_mm512_mask_min_ps() {
42686        let a = _mm512_setr_ps(
42687            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42688        );
42689        let b = _mm512_setr_ps(
42690            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42691        );
42692        let r = _mm512_mask_min_ps(a, 0, a, b);
42693        assert_eq_m512(r, a);
42694        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
42695        let e = _mm512_setr_ps(
42696            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42697        );
42698        assert_eq_m512(r, e);
42699    }
42700
42701    #[simd_test(enable = "avx512f")]
42702    unsafe fn test_mm512_maskz_min_ps() {
42703        let a = _mm512_setr_ps(
42704            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42705        );
42706        let b = _mm512_setr_ps(
42707            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
42708        );
42709        let r = _mm512_maskz_min_ps(0, a, b);
42710        assert_eq_m512(r, _mm512_setzero_ps());
42711        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
42712        let e = _mm512_setr_ps(
42713            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
42714        );
42715        assert_eq_m512(r, e);
42716    }
42717
42718    #[simd_test(enable = "avx512f,avx512vl")]
42719    unsafe fn test_mm256_mask_min_ps() {
42720        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42721        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42722        let r = _mm256_mask_min_ps(a, 0, a, b);
42723        assert_eq_m256(r, a);
42724        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
42725        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
42726        assert_eq_m256(r, e);
42727    }
42728
42729    #[simd_test(enable = "avx512f,avx512vl")]
42730    unsafe fn test_mm256_maskz_min_ps() {
42731        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42732        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
42733        let r = _mm256_maskz_min_ps(0, a, b);
42734        assert_eq_m256(r, _mm256_setzero_ps());
42735        let r = _mm256_maskz_min_ps(0b11111111, a, b);
42736        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
42737        assert_eq_m256(r, e);
42738    }
42739
42740    #[simd_test(enable = "avx512f,avx512vl")]
42741    unsafe fn test_mm_mask_min_ps() {
42742        let a = _mm_set_ps(0., 1., 2., 3.);
42743        let b = _mm_set_ps(3., 2., 1., 0.);
42744        let r = _mm_mask_min_ps(a, 0, a, b);
42745        assert_eq_m128(r, a);
42746        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
42747        let e = _mm_set_ps(0., 1., 1., 0.);
42748        assert_eq_m128(r, e);
42749    }
42750
42751    #[simd_test(enable = "avx512f,avx512vl")]
42752    unsafe fn test_mm_maskz_min_ps() {
42753        let a = _mm_set_ps(0., 1., 2., 3.);
42754        let b = _mm_set_ps(3., 2., 1., 0.);
42755        let r = _mm_maskz_min_ps(0, a, b);
42756        assert_eq_m128(r, _mm_setzero_ps());
42757        let r = _mm_maskz_min_ps(0b00001111, a, b);
42758        let e = _mm_set_ps(0., 1., 1., 0.);
42759        assert_eq_m128(r, e);
42760    }
42761
42762    #[simd_test(enable = "avx512f")]
42763    unsafe fn test_mm512_min_epu32() {
42764        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42765        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42766        let r = _mm512_min_epu32(a, b);
42767        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
42768        assert_eq_m512i(r, e);
42769    }
42770
42771    #[simd_test(enable = "avx512f")]
42772    unsafe fn test_mm512_mask_min_epu32() {
42773        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42774        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42775        let r = _mm512_mask_min_epu32(a, 0, a, b);
42776        assert_eq_m512i(r, a);
42777        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
42778        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42779        assert_eq_m512i(r, e);
42780    }
42781
42782    #[simd_test(enable = "avx512f")]
42783    unsafe fn test_mm512_maskz_min_epu32() {
42784        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
42785        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
42786        let r = _mm512_maskz_min_epu32(0, a, b);
42787        assert_eq_m512i(r, _mm512_setzero_si512());
42788        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
42789        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
42790        assert_eq_m512i(r, e);
42791    }
42792
42793    #[simd_test(enable = "avx512f,avx512vl")]
42794    unsafe fn test_mm256_mask_min_epu32() {
42795        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42796        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42797        let r = _mm256_mask_min_epu32(a, 0, a, b);
42798        assert_eq_m256i(r, a);
42799        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
42800        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42801        assert_eq_m256i(r, e);
42802    }
42803
42804    #[simd_test(enable = "avx512f,avx512vl")]
42805    unsafe fn test_mm256_maskz_min_epu32() {
42806        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
42807        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
42808        let r = _mm256_maskz_min_epu32(0, a, b);
42809        assert_eq_m256i(r, _mm256_setzero_si256());
42810        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
42811        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
42812        assert_eq_m256i(r, e);
42813    }
42814
42815    #[simd_test(enable = "avx512f,avx512vl")]
42816    unsafe fn test_mm_mask_min_epu32() {
42817        let a = _mm_set_epi32(0, 1, 2, 3);
42818        let b = _mm_set_epi32(3, 2, 1, 0);
42819        let r = _mm_mask_min_epu32(a, 0, a, b);
42820        assert_eq_m128i(r, a);
42821        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
42822        let e = _mm_set_epi32(0, 1, 1, 0);
42823        assert_eq_m128i(r, e);
42824    }
42825
42826    #[simd_test(enable = "avx512f,avx512vl")]
42827    unsafe fn test_mm_maskz_min_epu32() {
42828        let a = _mm_set_epi32(0, 1, 2, 3);
42829        let b = _mm_set_epi32(3, 2, 1, 0);
42830        let r = _mm_maskz_min_epu32(0, a, b);
42831        assert_eq_m128i(r, _mm_setzero_si128());
42832        let r = _mm_maskz_min_epu32(0b00001111, a, b);
42833        let e = _mm_set_epi32(0, 1, 1, 0);
42834        assert_eq_m128i(r, e);
42835    }
42836
42837    #[simd_test(enable = "avx512f")]
42838    unsafe fn test_mm512_sqrt_ps() {
42839        let a = _mm512_setr_ps(
42840            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
42841        );
42842        let r = _mm512_sqrt_ps(a);
42843        let e = _mm512_setr_ps(
42844            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42845        );
42846        assert_eq_m512(r, e);
42847    }
42848
42849    #[simd_test(enable = "avx512f")]
42850    unsafe fn test_mm512_mask_sqrt_ps() {
42851        let a = _mm512_setr_ps(
42852            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
42853        );
42854        let r = _mm512_mask_sqrt_ps(a, 0, a);
42855        assert_eq_m512(r, a);
42856        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
42857        let e = _mm512_setr_ps(
42858            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
42859        );
42860        assert_eq_m512(r, e);
42861    }
42862
42863    #[simd_test(enable = "avx512f")]
42864    unsafe fn test_mm512_maskz_sqrt_ps() {
42865        let a = _mm512_setr_ps(
42866            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
42867        );
42868        let r = _mm512_maskz_sqrt_ps(0, a);
42869        assert_eq_m512(r, _mm512_setzero_ps());
42870        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
42871        let e = _mm512_setr_ps(
42872            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
42873        );
42874        assert_eq_m512(r, e);
42875    }
42876
42877    #[simd_test(enable = "avx512f,avx512vl")]
42878    unsafe fn test_mm256_mask_sqrt_ps() {
42879        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
42880        let r = _mm256_mask_sqrt_ps(a, 0, a);
42881        assert_eq_m256(r, a);
42882        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
42883        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42884        assert_eq_m256(r, e);
42885    }
42886
42887    #[simd_test(enable = "avx512f,avx512vl")]
42888    unsafe fn test_mm256_maskz_sqrt_ps() {
42889        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
42890        let r = _mm256_maskz_sqrt_ps(0, a);
42891        assert_eq_m256(r, _mm256_setzero_ps());
42892        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
42893        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42894        assert_eq_m256(r, e);
42895    }
42896
42897    #[simd_test(enable = "avx512f,avx512vl")]
42898    unsafe fn test_mm_mask_sqrt_ps() {
42899        let a = _mm_set_ps(0., 1., 4., 9.);
42900        let r = _mm_mask_sqrt_ps(a, 0, a);
42901        assert_eq_m128(r, a);
42902        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
42903        let e = _mm_set_ps(0., 1., 2., 3.);
42904        assert_eq_m128(r, e);
42905    }
42906
42907    #[simd_test(enable = "avx512f,avx512vl")]
42908    unsafe fn test_mm_maskz_sqrt_ps() {
42909        let a = _mm_set_ps(0., 1., 4., 9.);
42910        let r = _mm_maskz_sqrt_ps(0, a);
42911        assert_eq_m128(r, _mm_setzero_ps());
42912        let r = _mm_maskz_sqrt_ps(0b00001111, a);
42913        let e = _mm_set_ps(0., 1., 2., 3.);
42914        assert_eq_m128(r, e);
42915    }
42916
42917    #[simd_test(enable = "avx512f")]
42918    unsafe fn test_mm512_fmadd_ps() {
42919        let a = _mm512_set1_ps(1.);
42920        let b = _mm512_setr_ps(
42921            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42922        );
42923        let c = _mm512_set1_ps(1.);
42924        let r = _mm512_fmadd_ps(a, b, c);
42925        let e = _mm512_setr_ps(
42926            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
42927        );
42928        assert_eq_m512(r, e);
42929    }
42930
42931    #[simd_test(enable = "avx512f")]
42932    unsafe fn test_mm512_mask_fmadd_ps() {
42933        let a = _mm512_set1_ps(1.);
42934        let b = _mm512_setr_ps(
42935            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42936        );
42937        let c = _mm512_set1_ps(1.);
42938        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
42939        assert_eq_m512(r, a);
42940        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
42941        let e = _mm512_setr_ps(
42942            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
42943        );
42944        assert_eq_m512(r, e);
42945    }
42946
42947    #[simd_test(enable = "avx512f")]
42948    unsafe fn test_mm512_maskz_fmadd_ps() {
42949        let a = _mm512_set1_ps(1.);
42950        let b = _mm512_setr_ps(
42951            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42952        );
42953        let c = _mm512_set1_ps(1.);
42954        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
42955        assert_eq_m512(r, _mm512_setzero_ps());
42956        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
42957        let e = _mm512_setr_ps(
42958            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
42959        );
42960        assert_eq_m512(r, e);
42961    }
42962
42963    #[simd_test(enable = "avx512f")]
42964    unsafe fn test_mm512_mask3_fmadd_ps() {
42965        let a = _mm512_set1_ps(1.);
42966        let b = _mm512_setr_ps(
42967            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
42968        );
42969        let c = _mm512_set1_ps(2.);
42970        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
42971        assert_eq_m512(r, c);
42972        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
42973        let e = _mm512_setr_ps(
42974            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
42975        );
42976        assert_eq_m512(r, e);
42977    }
42978
42979    #[simd_test(enable = "avx512f,avx512vl")]
42980    unsafe fn test_mm256_mask_fmadd_ps() {
42981        let a = _mm256_set1_ps(1.);
42982        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42983        let c = _mm256_set1_ps(1.);
42984        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
42985        assert_eq_m256(r, a);
42986        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
42987        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
42988        assert_eq_m256(r, e);
42989    }
42990
42991    #[simd_test(enable = "avx512f,avx512vl")]
42992    unsafe fn test_mm256_maskz_fmadd_ps() {
42993        let a = _mm256_set1_ps(1.);
42994        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
42995        let c = _mm256_set1_ps(1.);
42996        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
42997        assert_eq_m256(r, _mm256_setzero_ps());
42998        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
42999        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
43000        assert_eq_m256(r, e);
43001    }
43002
43003    #[simd_test(enable = "avx512f,avx512vl")]
43004    unsafe fn test_mm256_mask3_fmadd_ps() {
43005        let a = _mm256_set1_ps(1.);
43006        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43007        let c = _mm256_set1_ps(1.);
43008        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
43009        assert_eq_m256(r, c);
43010        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
43011        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
43012        assert_eq_m256(r, e);
43013    }
43014
43015    #[simd_test(enable = "avx512f,avx512vl")]
43016    unsafe fn test_mm_mask_fmadd_ps() {
43017        let a = _mm_set1_ps(1.);
43018        let b = _mm_set_ps(0., 1., 2., 3.);
43019        let c = _mm_set1_ps(1.);
43020        let r = _mm_mask_fmadd_ps(a, 0, b, c);
43021        assert_eq_m128(r, a);
43022        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
43023        let e = _mm_set_ps(1., 2., 3., 4.);
43024        assert_eq_m128(r, e);
43025    }
43026
43027    #[simd_test(enable = "avx512f,avx512vl")]
43028    unsafe fn test_mm_maskz_fmadd_ps() {
43029        let a = _mm_set1_ps(1.);
43030        let b = _mm_set_ps(0., 1., 2., 3.);
43031        let c = _mm_set1_ps(1.);
43032        let r = _mm_maskz_fmadd_ps(0, a, b, c);
43033        assert_eq_m128(r, _mm_setzero_ps());
43034        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
43035        let e = _mm_set_ps(1., 2., 3., 4.);
43036        assert_eq_m128(r, e);
43037    }
43038
43039    #[simd_test(enable = "avx512f,avx512vl")]
43040    unsafe fn test_mm_mask3_fmadd_ps() {
43041        let a = _mm_set1_ps(1.);
43042        let b = _mm_set_ps(0., 1., 2., 3.);
43043        let c = _mm_set1_ps(1.);
43044        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
43045        assert_eq_m128(r, c);
43046        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
43047        let e = _mm_set_ps(1., 2., 3., 4.);
43048        assert_eq_m128(r, e);
43049    }
43050
43051    #[simd_test(enable = "avx512f")]
43052    unsafe fn test_mm512_fmsub_ps() {
43053        let a = _mm512_setr_ps(
43054            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43055        );
43056        let b = _mm512_setr_ps(
43057            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43058        );
43059        let c = _mm512_setr_ps(
43060            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43061        );
43062        let r = _mm512_fmsub_ps(a, b, c);
43063        let e = _mm512_setr_ps(
43064            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
43065        );
43066        assert_eq_m512(r, e);
43067    }
43068
43069    #[simd_test(enable = "avx512f")]
43070    unsafe fn test_mm512_mask_fmsub_ps() {
43071        let a = _mm512_set1_ps(1.);
43072        let b = _mm512_setr_ps(
43073            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43074        );
43075        let c = _mm512_set1_ps(1.);
43076        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
43077        assert_eq_m512(r, a);
43078        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
43079        let e = _mm512_setr_ps(
43080            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
43081        );
43082        assert_eq_m512(r, e);
43083    }
43084
43085    #[simd_test(enable = "avx512f")]
43086    unsafe fn test_mm512_maskz_fmsub_ps() {
43087        let a = _mm512_set1_ps(1.);
43088        let b = _mm512_setr_ps(
43089            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43090        );
43091        let c = _mm512_set1_ps(1.);
43092        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
43093        assert_eq_m512(r, _mm512_setzero_ps());
43094        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
43095        let e = _mm512_setr_ps(
43096            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
43097        );
43098        assert_eq_m512(r, e);
43099    }
43100
43101    #[simd_test(enable = "avx512f")]
43102    unsafe fn test_mm512_mask3_fmsub_ps() {
43103        let a = _mm512_set1_ps(1.);
43104        let b = _mm512_setr_ps(
43105            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43106        );
43107        let c = _mm512_setr_ps(
43108            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43109        );
43110        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
43111        assert_eq_m512(r, c);
43112        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
43113        let e = _mm512_setr_ps(
43114            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
43115        );
43116        assert_eq_m512(r, e);
43117    }
43118
43119    #[simd_test(enable = "avx512f,avx512vl")]
43120    unsafe fn test_mm256_mask_fmsub_ps() {
43121        let a = _mm256_set1_ps(1.);
43122        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43123        let c = _mm256_set1_ps(1.);
43124        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
43125        assert_eq_m256(r, a);
43126        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
43127        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
43128        assert_eq_m256(r, e);
43129    }
43130
43131    #[simd_test(enable = "avx512f,avx512vl")]
43132    unsafe fn test_mm256_maskz_fmsub_ps() {
43133        let a = _mm256_set1_ps(1.);
43134        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43135        let c = _mm256_set1_ps(1.);
43136        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
43137        assert_eq_m256(r, _mm256_setzero_ps());
43138        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
43139        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
43140        assert_eq_m256(r, e);
43141    }
43142
43143    #[simd_test(enable = "avx512f,avx512vl")]
43144    unsafe fn test_mm256_mask3_fmsub_ps() {
43145        let a = _mm256_set1_ps(1.);
43146        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43147        let c = _mm256_set1_ps(1.);
43148        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
43149        assert_eq_m256(r, c);
43150        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
43151        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
43152        assert_eq_m256(r, e);
43153    }
43154
43155    #[simd_test(enable = "avx512f,avx512vl")]
43156    unsafe fn test_mm_mask_fmsub_ps() {
43157        let a = _mm_set1_ps(1.);
43158        let b = _mm_set_ps(0., 1., 2., 3.);
43159        let c = _mm_set1_ps(1.);
43160        let r = _mm_mask_fmsub_ps(a, 0, b, c);
43161        assert_eq_m128(r, a);
43162        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
43163        let e = _mm_set_ps(-1., 0., 1., 2.);
43164        assert_eq_m128(r, e);
43165    }
43166
43167    #[simd_test(enable = "avx512f,avx512vl")]
43168    unsafe fn test_mm_maskz_fmsub_ps() {
43169        let a = _mm_set1_ps(1.);
43170        let b = _mm_set_ps(0., 1., 2., 3.);
43171        let c = _mm_set1_ps(1.);
43172        let r = _mm_maskz_fmsub_ps(0, a, b, c);
43173        assert_eq_m128(r, _mm_setzero_ps());
43174        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
43175        let e = _mm_set_ps(-1., 0., 1., 2.);
43176        assert_eq_m128(r, e);
43177    }
43178
43179    #[simd_test(enable = "avx512f,avx512vl")]
43180    unsafe fn test_mm_mask3_fmsub_ps() {
43181        let a = _mm_set1_ps(1.);
43182        let b = _mm_set_ps(0., 1., 2., 3.);
43183        let c = _mm_set1_ps(1.);
43184        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
43185        assert_eq_m128(r, c);
43186        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
43187        let e = _mm_set_ps(-1., 0., 1., 2.);
43188        assert_eq_m128(r, e);
43189    }
43190
43191    #[simd_test(enable = "avx512f")]
43192    unsafe fn test_mm512_fmaddsub_ps() {
43193        let a = _mm512_set1_ps(1.);
43194        let b = _mm512_setr_ps(
43195            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43196        );
43197        let c = _mm512_set1_ps(1.);
43198        let r = _mm512_fmaddsub_ps(a, b, c);
43199        let e = _mm512_setr_ps(
43200            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
43201        );
43202        assert_eq_m512(r, e);
43203    }
43204
43205    #[simd_test(enable = "avx512f")]
43206    unsafe fn test_mm512_mask_fmaddsub_ps() {
43207        let a = _mm512_set1_ps(1.);
43208        let b = _mm512_setr_ps(
43209            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43210        );
43211        let c = _mm512_set1_ps(1.);
43212        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
43213        assert_eq_m512(r, a);
43214        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
43215        let e = _mm512_setr_ps(
43216            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
43217        );
43218        assert_eq_m512(r, e);
43219    }
43220
43221    #[simd_test(enable = "avx512f")]
43222    unsafe fn test_mm512_maskz_fmaddsub_ps() {
43223        let a = _mm512_set1_ps(1.);
43224        let b = _mm512_setr_ps(
43225            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43226        );
43227        let c = _mm512_set1_ps(1.);
43228        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
43229        assert_eq_m512(r, _mm512_setzero_ps());
43230        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
43231        let e = _mm512_setr_ps(
43232            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
43233        );
43234        assert_eq_m512(r, e);
43235    }
43236
43237    #[simd_test(enable = "avx512f")]
43238    unsafe fn test_mm512_mask3_fmaddsub_ps() {
43239        let a = _mm512_set1_ps(1.);
43240        let b = _mm512_setr_ps(
43241            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43242        );
43243        let c = _mm512_setr_ps(
43244            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43245        );
43246        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
43247        assert_eq_m512(r, c);
43248        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
43249        let e = _mm512_setr_ps(
43250            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
43251        );
43252        assert_eq_m512(r, e);
43253    }
43254
43255    #[simd_test(enable = "avx512f,avx512vl")]
43256    unsafe fn test_mm256_mask_fmaddsub_ps() {
43257        let a = _mm256_set1_ps(1.);
43258        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43259        let c = _mm256_set1_ps(1.);
43260        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
43261        assert_eq_m256(r, a);
43262        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
43263        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
43264        assert_eq_m256(r, e);
43265    }
43266
43267    #[simd_test(enable = "avx512f,avx512vl")]
43268    unsafe fn test_mm256_maskz_fmaddsub_ps() {
43269        let a = _mm256_set1_ps(1.);
43270        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43271        let c = _mm256_set1_ps(1.);
43272        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
43273        assert_eq_m256(r, _mm256_setzero_ps());
43274        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
43275        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
43276        assert_eq_m256(r, e);
43277    }
43278
43279    #[simd_test(enable = "avx512f,avx512vl")]
43280    unsafe fn test_mm256_mask3_fmaddsub_ps() {
43281        let a = _mm256_set1_ps(1.);
43282        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43283        let c = _mm256_set1_ps(1.);
43284        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
43285        assert_eq_m256(r, c);
43286        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
43287        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
43288        assert_eq_m256(r, e);
43289    }
43290
43291    #[simd_test(enable = "avx512f,avx512vl")]
43292    unsafe fn test_mm_mask_fmaddsub_ps() {
43293        let a = _mm_set1_ps(1.);
43294        let b = _mm_set_ps(0., 1., 2., 3.);
43295        let c = _mm_set1_ps(1.);
43296        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
43297        assert_eq_m128(r, a);
43298        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
43299        let e = _mm_set_ps(1., 0., 3., 2.);
43300        assert_eq_m128(r, e);
43301    }
43302
43303    #[simd_test(enable = "avx512f,avx512vl")]
43304    unsafe fn test_mm_maskz_fmaddsub_ps() {
43305        let a = _mm_set1_ps(1.);
43306        let b = _mm_set_ps(0., 1., 2., 3.);
43307        let c = _mm_set1_ps(1.);
43308        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
43309        assert_eq_m128(r, _mm_setzero_ps());
43310        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
43311        let e = _mm_set_ps(1., 0., 3., 2.);
43312        assert_eq_m128(r, e);
43313    }
43314
43315    #[simd_test(enable = "avx512f,avx512vl")]
43316    unsafe fn test_mm_mask3_fmaddsub_ps() {
43317        let a = _mm_set1_ps(1.);
43318        let b = _mm_set_ps(0., 1., 2., 3.);
43319        let c = _mm_set1_ps(1.);
43320        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
43321        assert_eq_m128(r, c);
43322        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
43323        let e = _mm_set_ps(1., 0., 3., 2.);
43324        assert_eq_m128(r, e);
43325    }
43326
43327    #[simd_test(enable = "avx512f")]
43328    unsafe fn test_mm512_fmsubadd_ps() {
43329        let a = _mm512_setr_ps(
43330            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43331        );
43332        let b = _mm512_setr_ps(
43333            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43334        );
43335        let c = _mm512_setr_ps(
43336            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
43337        );
43338        let r = _mm512_fmsubadd_ps(a, b, c);
43339        let e = _mm512_setr_ps(
43340            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
43341        );
43342        assert_eq_m512(r, e);
43343    }
43344
43345    #[simd_test(enable = "avx512f")]
43346    unsafe fn test_mm512_mask_fmsubadd_ps() {
43347        let a = _mm512_set1_ps(1.);
43348        let b = _mm512_setr_ps(
43349            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43350        );
43351        let c = _mm512_set1_ps(1.);
43352        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
43353        assert_eq_m512(r, a);
43354        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
43355        let e = _mm512_setr_ps(
43356            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
43357        );
43358        assert_eq_m512(r, e);
43359    }
43360
43361    #[simd_test(enable = "avx512f")]
43362    unsafe fn test_mm512_maskz_fmsubadd_ps() {
43363        let a = _mm512_set1_ps(1.);
43364        let b = _mm512_setr_ps(
43365            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43366        );
43367        let c = _mm512_set1_ps(1.);
43368        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
43369        assert_eq_m512(r, _mm512_setzero_ps());
43370        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
43371        let e = _mm512_setr_ps(
43372            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
43373        );
43374        assert_eq_m512(r, e);
43375    }
43376
43377    #[simd_test(enable = "avx512f")]
43378    unsafe fn test_mm512_mask3_fmsubadd_ps() {
43379        let a = _mm512_set1_ps(1.);
43380        let b = _mm512_setr_ps(
43381            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43382        );
43383        let c = _mm512_setr_ps(
43384            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43385        );
43386        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
43387        assert_eq_m512(r, c);
43388        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
43389        let e = _mm512_setr_ps(
43390            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
43391        );
43392        assert_eq_m512(r, e);
43393    }
43394
43395    #[simd_test(enable = "avx512f,avx512vl")]
43396    unsafe fn test_mm256_mask_fmsubadd_ps() {
43397        let a = _mm256_set1_ps(1.);
43398        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43399        let c = _mm256_set1_ps(1.);
43400        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
43401        assert_eq_m256(r, a);
43402        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
43403        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
43404        assert_eq_m256(r, e);
43405    }
43406
43407    #[simd_test(enable = "avx512f,avx512vl")]
43408    unsafe fn test_mm256_maskz_fmsubadd_ps() {
43409        let a = _mm256_set1_ps(1.);
43410        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43411        let c = _mm256_set1_ps(1.);
43412        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
43413        assert_eq_m256(r, _mm256_setzero_ps());
43414        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
43415        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
43416        assert_eq_m256(r, e);
43417    }
43418
43419    #[simd_test(enable = "avx512f,avx512vl")]
43420    unsafe fn test_mm256_mask3_fmsubadd_ps() {
43421        let a = _mm256_set1_ps(1.);
43422        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43423        let c = _mm256_set1_ps(1.);
43424        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
43425        assert_eq_m256(r, c);
43426        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
43427        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
43428        assert_eq_m256(r, e);
43429    }
43430
43431    #[simd_test(enable = "avx512f,avx512vl")]
43432    unsafe fn test_mm_mask_fmsubadd_ps() {
43433        let a = _mm_set1_ps(1.);
43434        let b = _mm_set_ps(0., 1., 2., 3.);
43435        let c = _mm_set1_ps(1.);
43436        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
43437        assert_eq_m128(r, a);
43438        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
43439        let e = _mm_set_ps(-1., 2., 1., 4.);
43440        assert_eq_m128(r, e);
43441    }
43442
43443    #[simd_test(enable = "avx512f,avx512vl")]
43444    unsafe fn test_mm_maskz_fmsubadd_ps() {
43445        let a = _mm_set1_ps(1.);
43446        let b = _mm_set_ps(0., 1., 2., 3.);
43447        let c = _mm_set1_ps(1.);
43448        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
43449        assert_eq_m128(r, _mm_setzero_ps());
43450        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
43451        let e = _mm_set_ps(-1., 2., 1., 4.);
43452        assert_eq_m128(r, e);
43453    }
43454
43455    #[simd_test(enable = "avx512f,avx512vl")]
43456    unsafe fn test_mm_mask3_fmsubadd_ps() {
43457        let a = _mm_set1_ps(1.);
43458        let b = _mm_set_ps(0., 1., 2., 3.);
43459        let c = _mm_set1_ps(1.);
43460        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
43461        assert_eq_m128(r, c);
43462        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
43463        let e = _mm_set_ps(-1., 2., 1., 4.);
43464        assert_eq_m128(r, e);
43465    }
43466
43467    #[simd_test(enable = "avx512f")]
43468    unsafe fn test_mm512_fnmadd_ps() {
43469        let a = _mm512_set1_ps(1.);
43470        let b = _mm512_setr_ps(
43471            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43472        );
43473        let c = _mm512_set1_ps(1.);
43474        let r = _mm512_fnmadd_ps(a, b, c);
43475        let e = _mm512_setr_ps(
43476            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
43477        );
43478        assert_eq_m512(r, e);
43479    }
43480
43481    #[simd_test(enable = "avx512f")]
43482    unsafe fn test_mm512_mask_fnmadd_ps() {
43483        let a = _mm512_set1_ps(1.);
43484        let b = _mm512_setr_ps(
43485            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43486        );
43487        let c = _mm512_set1_ps(1.);
43488        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
43489        assert_eq_m512(r, a);
43490        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
43491        let e = _mm512_setr_ps(
43492            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
43493        );
43494        assert_eq_m512(r, e);
43495    }
43496
43497    #[simd_test(enable = "avx512f")]
43498    unsafe fn test_mm512_maskz_fnmadd_ps() {
43499        let a = _mm512_set1_ps(1.);
43500        let b = _mm512_setr_ps(
43501            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43502        );
43503        let c = _mm512_set1_ps(1.);
43504        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
43505        assert_eq_m512(r, _mm512_setzero_ps());
43506        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
43507        let e = _mm512_setr_ps(
43508            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
43509        );
43510        assert_eq_m512(r, e);
43511    }
43512
43513    #[simd_test(enable = "avx512f")]
43514    unsafe fn test_mm512_mask3_fnmadd_ps() {
43515        let a = _mm512_set1_ps(1.);
43516        let b = _mm512_setr_ps(
43517            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43518        );
43519        let c = _mm512_setr_ps(
43520            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43521        );
43522        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
43523        assert_eq_m512(r, c);
43524        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
43525        let e = _mm512_setr_ps(
43526            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
43527        );
43528        assert_eq_m512(r, e);
43529    }
43530
43531    #[simd_test(enable = "avx512f,avx512vl")]
43532    unsafe fn test_mm256_mask_fnmadd_ps() {
43533        let a = _mm256_set1_ps(1.);
43534        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43535        let c = _mm256_set1_ps(1.);
43536        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
43537        assert_eq_m256(r, a);
43538        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
43539        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
43540        assert_eq_m256(r, e);
43541    }
43542
43543    #[simd_test(enable = "avx512f,avx512vl")]
43544    unsafe fn test_mm256_maskz_fnmadd_ps() {
43545        let a = _mm256_set1_ps(1.);
43546        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43547        let c = _mm256_set1_ps(1.);
43548        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
43549        assert_eq_m256(r, _mm256_setzero_ps());
43550        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
43551        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
43552        assert_eq_m256(r, e);
43553    }
43554
43555    #[simd_test(enable = "avx512f,avx512vl")]
43556    unsafe fn test_mm256_mask3_fnmadd_ps() {
43557        let a = _mm256_set1_ps(1.);
43558        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43559        let c = _mm256_set1_ps(1.);
43560        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
43561        assert_eq_m256(r, c);
43562        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
43563        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
43564        assert_eq_m256(r, e);
43565    }
43566
43567    #[simd_test(enable = "avx512f,avx512vl")]
43568    unsafe fn test_mm_mask_fnmadd_ps() {
43569        let a = _mm_set1_ps(1.);
43570        let b = _mm_set_ps(0., 1., 2., 3.);
43571        let c = _mm_set1_ps(1.);
43572        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
43573        assert_eq_m128(r, a);
43574        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
43575        let e = _mm_set_ps(1., 0., -1., -2.);
43576        assert_eq_m128(r, e);
43577    }
43578
43579    #[simd_test(enable = "avx512f,avx512vl")]
43580    unsafe fn test_mm_maskz_fnmadd_ps() {
43581        let a = _mm_set1_ps(1.);
43582        let b = _mm_set_ps(0., 1., 2., 3.);
43583        let c = _mm_set1_ps(1.);
43584        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
43585        assert_eq_m128(r, _mm_setzero_ps());
43586        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
43587        let e = _mm_set_ps(1., 0., -1., -2.);
43588        assert_eq_m128(r, e);
43589    }
43590
43591    #[simd_test(enable = "avx512f,avx512vl")]
43592    unsafe fn test_mm_mask3_fnmadd_ps() {
43593        let a = _mm_set1_ps(1.);
43594        let b = _mm_set_ps(0., 1., 2., 3.);
43595        let c = _mm_set1_ps(1.);
43596        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
43597        assert_eq_m128(r, c);
43598        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
43599        let e = _mm_set_ps(1., 0., -1., -2.);
43600        assert_eq_m128(r, e);
43601    }
43602
43603    #[simd_test(enable = "avx512f")]
43604    unsafe fn test_mm512_fnmsub_ps() {
43605        let a = _mm512_set1_ps(1.);
43606        let b = _mm512_setr_ps(
43607            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43608        );
43609        let c = _mm512_set1_ps(1.);
43610        let r = _mm512_fnmsub_ps(a, b, c);
43611        let e = _mm512_setr_ps(
43612            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
43613        );
43614        assert_eq_m512(r, e);
43615    }
43616
43617    #[simd_test(enable = "avx512f")]
43618    unsafe fn test_mm512_mask_fnmsub_ps() {
43619        let a = _mm512_set1_ps(1.);
43620        let b = _mm512_setr_ps(
43621            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43622        );
43623        let c = _mm512_set1_ps(1.);
43624        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
43625        assert_eq_m512(r, a);
43626        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
43627        let e = _mm512_setr_ps(
43628            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
43629        );
43630        assert_eq_m512(r, e);
43631    }
43632
43633    #[simd_test(enable = "avx512f")]
43634    unsafe fn test_mm512_maskz_fnmsub_ps() {
43635        let a = _mm512_set1_ps(1.);
43636        let b = _mm512_setr_ps(
43637            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43638        );
43639        let c = _mm512_set1_ps(1.);
43640        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
43641        assert_eq_m512(r, _mm512_setzero_ps());
43642        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
43643        let e = _mm512_setr_ps(
43644            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
43645        );
43646        assert_eq_m512(r, e);
43647    }
43648
43649    #[simd_test(enable = "avx512f")]
43650    unsafe fn test_mm512_mask3_fnmsub_ps() {
43651        let a = _mm512_set1_ps(1.);
43652        let b = _mm512_setr_ps(
43653            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
43654        );
43655        let c = _mm512_setr_ps(
43656            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
43657        );
43658        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
43659        assert_eq_m512(r, c);
43660        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
43661        let e = _mm512_setr_ps(
43662            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
43663        );
43664        assert_eq_m512(r, e);
43665    }
43666
43667    #[simd_test(enable = "avx512f,avx512vl")]
43668    unsafe fn test_mm256_mask_fnmsub_ps() {
43669        let a = _mm256_set1_ps(1.);
43670        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43671        let c = _mm256_set1_ps(1.);
43672        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
43673        assert_eq_m256(r, a);
43674        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
43675        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
43676        assert_eq_m256(r, e);
43677    }
43678
43679    #[simd_test(enable = "avx512f,avx512vl")]
43680    unsafe fn test_mm256_maskz_fnmsub_ps() {
43681        let a = _mm256_set1_ps(1.);
43682        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43683        let c = _mm256_set1_ps(1.);
43684        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
43685        assert_eq_m256(r, _mm256_setzero_ps());
43686        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
43687        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
43688        assert_eq_m256(r, e);
43689    }
43690
43691    #[simd_test(enable = "avx512f,avx512vl")]
43692    unsafe fn test_mm256_mask3_fnmsub_ps() {
43693        let a = _mm256_set1_ps(1.);
43694        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
43695        let c = _mm256_set1_ps(1.);
43696        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
43697        assert_eq_m256(r, c);
43698        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
43699        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
43700        assert_eq_m256(r, e);
43701    }
43702
43703    #[simd_test(enable = "avx512f,avx512vl")]
43704    unsafe fn test_mm_mask_fnmsub_ps() {
43705        let a = _mm_set1_ps(1.);
43706        let b = _mm_set_ps(0., 1., 2., 3.);
43707        let c = _mm_set1_ps(1.);
43708        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
43709        assert_eq_m128(r, a);
43710        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
43711        let e = _mm_set_ps(-1., -2., -3., -4.);
43712        assert_eq_m128(r, e);
43713    }
43714
43715    #[simd_test(enable = "avx512f,avx512vl")]
43716    unsafe fn test_mm_maskz_fnmsub_ps() {
43717        let a = _mm_set1_ps(1.);
43718        let b = _mm_set_ps(0., 1., 2., 3.);
43719        let c = _mm_set1_ps(1.);
43720        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
43721        assert_eq_m128(r, _mm_setzero_ps());
43722        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
43723        let e = _mm_set_ps(-1., -2., -3., -4.);
43724        assert_eq_m128(r, e);
43725    }
43726
43727    #[simd_test(enable = "avx512f,avx512vl")]
43728    unsafe fn test_mm_mask3_fnmsub_ps() {
43729        let a = _mm_set1_ps(1.);
43730        let b = _mm_set_ps(0., 1., 2., 3.);
43731        let c = _mm_set1_ps(1.);
43732        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
43733        assert_eq_m128(r, c);
43734        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
43735        let e = _mm_set_ps(-1., -2., -3., -4.);
43736        assert_eq_m128(r, e);
43737    }
43738
43739    #[simd_test(enable = "avx512f")]
43740    unsafe fn test_mm512_rcp14_ps() {
43741        let a = _mm512_set1_ps(3.);
43742        let r = _mm512_rcp14_ps(a);
43743        let e = _mm512_set1_ps(0.33333206);
43744        assert_eq_m512(r, e);
43745    }
43746
43747    #[simd_test(enable = "avx512f")]
43748    unsafe fn test_mm512_mask_rcp14_ps() {
43749        let a = _mm512_set1_ps(3.);
43750        let r = _mm512_mask_rcp14_ps(a, 0, a);
43751        assert_eq_m512(r, a);
43752        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
43753        let e = _mm512_setr_ps(
43754            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
43755            0.33333206, 0.33333206, 0.33333206, 0.33333206,
43756        );
43757        assert_eq_m512(r, e);
43758    }
43759
43760    #[simd_test(enable = "avx512f")]
43761    unsafe fn test_mm512_maskz_rcp14_ps() {
43762        let a = _mm512_set1_ps(3.);
43763        let r = _mm512_maskz_rcp14_ps(0, a);
43764        assert_eq_m512(r, _mm512_setzero_ps());
43765        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
43766        let e = _mm512_setr_ps(
43767            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
43768            0.33333206, 0.33333206, 0.33333206, 0.33333206,
43769        );
43770        assert_eq_m512(r, e);
43771    }
43772
43773    #[simd_test(enable = "avx512f,avx512vl")]
43774    unsafe fn test_mm256_rcp14_ps() {
43775        let a = _mm256_set1_ps(3.);
43776        let r = _mm256_rcp14_ps(a);
43777        let e = _mm256_set1_ps(0.33333206);
43778        assert_eq_m256(r, e);
43779    }
43780
43781    #[simd_test(enable = "avx512f,avx512vl")]
43782    unsafe fn test_mm256_mask_rcp14_ps() {
43783        let a = _mm256_set1_ps(3.);
43784        let r = _mm256_mask_rcp14_ps(a, 0, a);
43785        assert_eq_m256(r, a);
43786        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
43787        let e = _mm256_set1_ps(0.33333206);
43788        assert_eq_m256(r, e);
43789    }
43790
43791    #[simd_test(enable = "avx512f,avx512vl")]
43792    unsafe fn test_mm256_maskz_rcp14_ps() {
43793        let a = _mm256_set1_ps(3.);
43794        let r = _mm256_maskz_rcp14_ps(0, a);
43795        assert_eq_m256(r, _mm256_setzero_ps());
43796        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
43797        let e = _mm256_set1_ps(0.33333206);
43798        assert_eq_m256(r, e);
43799    }
43800
43801    #[simd_test(enable = "avx512f,avx512vl")]
43802    unsafe fn test_mm_rcp14_ps() {
43803        let a = _mm_set1_ps(3.);
43804        let r = _mm_rcp14_ps(a);
43805        let e = _mm_set1_ps(0.33333206);
43806        assert_eq_m128(r, e);
43807    }
43808
43809    #[simd_test(enable = "avx512f,avx512vl")]
43810    unsafe fn test_mm_mask_rcp14_ps() {
43811        let a = _mm_set1_ps(3.);
43812        let r = _mm_mask_rcp14_ps(a, 0, a);
43813        assert_eq_m128(r, a);
43814        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
43815        let e = _mm_set1_ps(0.33333206);
43816        assert_eq_m128(r, e);
43817    }
43818
43819    #[simd_test(enable = "avx512f,avx512vl")]
43820    unsafe fn test_mm_maskz_rcp14_ps() {
43821        let a = _mm_set1_ps(3.);
43822        let r = _mm_maskz_rcp14_ps(0, a);
43823        assert_eq_m128(r, _mm_setzero_ps());
43824        let r = _mm_maskz_rcp14_ps(0b00001111, a);
43825        let e = _mm_set1_ps(0.33333206);
43826        assert_eq_m128(r, e);
43827    }
43828
43829    #[simd_test(enable = "avx512f")]
43830    unsafe fn test_mm512_rsqrt14_ps() {
43831        let a = _mm512_set1_ps(3.);
43832        let r = _mm512_rsqrt14_ps(a);
43833        let e = _mm512_set1_ps(0.5773392);
43834        assert_eq_m512(r, e);
43835    }
43836
43837    #[simd_test(enable = "avx512f")]
43838    unsafe fn test_mm512_mask_rsqrt14_ps() {
43839        let a = _mm512_set1_ps(3.);
43840        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
43841        assert_eq_m512(r, a);
43842        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
43843        let e = _mm512_setr_ps(
43844            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
43845            0.5773392, 0.5773392, 0.5773392,
43846        );
43847        assert_eq_m512(r, e);
43848    }
43849
43850    #[simd_test(enable = "avx512f")]
43851    unsafe fn test_mm512_maskz_rsqrt14_ps() {
43852        let a = _mm512_set1_ps(3.);
43853        let r = _mm512_maskz_rsqrt14_ps(0, a);
43854        assert_eq_m512(r, _mm512_setzero_ps());
43855        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
43856        let e = _mm512_setr_ps(
43857            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
43858            0.5773392, 0.5773392, 0.5773392,
43859        );
43860        assert_eq_m512(r, e);
43861    }
43862
43863    #[simd_test(enable = "avx512f,avx512vl")]
43864    unsafe fn test_mm256_rsqrt14_ps() {
43865        let a = _mm256_set1_ps(3.);
43866        let r = _mm256_rsqrt14_ps(a);
43867        let e = _mm256_set1_ps(0.5773392);
43868        assert_eq_m256(r, e);
43869    }
43870
43871    #[simd_test(enable = "avx512f,avx512vl")]
43872    unsafe fn test_mm256_mask_rsqrt14_ps() {
43873        let a = _mm256_set1_ps(3.);
43874        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
43875        assert_eq_m256(r, a);
43876        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
43877        let e = _mm256_set1_ps(0.5773392);
43878        assert_eq_m256(r, e);
43879    }
43880
43881    #[simd_test(enable = "avx512f,avx512vl")]
43882    unsafe fn test_mm256_maskz_rsqrt14_ps() {
43883        let a = _mm256_set1_ps(3.);
43884        let r = _mm256_maskz_rsqrt14_ps(0, a);
43885        assert_eq_m256(r, _mm256_setzero_ps());
43886        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
43887        let e = _mm256_set1_ps(0.5773392);
43888        assert_eq_m256(r, e);
43889    }
43890
43891    #[simd_test(enable = "avx512f,avx512vl")]
43892    unsafe fn test_mm_rsqrt14_ps() {
43893        let a = _mm_set1_ps(3.);
43894        let r = _mm_rsqrt14_ps(a);
43895        let e = _mm_set1_ps(0.5773392);
43896        assert_eq_m128(r, e);
43897    }
43898
43899    #[simd_test(enable = "avx512f,avx512vl")]
43900    unsafe fn test_mm_mask_rsqrt14_ps() {
43901        let a = _mm_set1_ps(3.);
43902        let r = _mm_mask_rsqrt14_ps(a, 0, a);
43903        assert_eq_m128(r, a);
43904        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
43905        let e = _mm_set1_ps(0.5773392);
43906        assert_eq_m128(r, e);
43907    }
43908
43909    #[simd_test(enable = "avx512f,avx512vl")]
43910    unsafe fn test_mm_maskz_rsqrt14_ps() {
43911        let a = _mm_set1_ps(3.);
43912        let r = _mm_maskz_rsqrt14_ps(0, a);
43913        assert_eq_m128(r, _mm_setzero_ps());
43914        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
43915        let e = _mm_set1_ps(0.5773392);
43916        assert_eq_m128(r, e);
43917    }
43918
43919    #[simd_test(enable = "avx512f")]
43920    unsafe fn test_mm512_getexp_ps() {
43921        let a = _mm512_set1_ps(3.);
43922        let r = _mm512_getexp_ps(a);
43923        let e = _mm512_set1_ps(1.);
43924        assert_eq_m512(r, e);
43925    }
43926
43927    #[simd_test(enable = "avx512f")]
43928    unsafe fn test_mm512_mask_getexp_ps() {
43929        let a = _mm512_set1_ps(3.);
43930        let r = _mm512_mask_getexp_ps(a, 0, a);
43931        assert_eq_m512(r, a);
43932        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
43933        let e = _mm512_setr_ps(
43934            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
43935        );
43936        assert_eq_m512(r, e);
43937    }
43938
43939    #[simd_test(enable = "avx512f")]
43940    unsafe fn test_mm512_maskz_getexp_ps() {
43941        let a = _mm512_set1_ps(3.);
43942        let r = _mm512_maskz_getexp_ps(0, a);
43943        assert_eq_m512(r, _mm512_setzero_ps());
43944        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
43945        let e = _mm512_setr_ps(
43946            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
43947        );
43948        assert_eq_m512(r, e);
43949    }
43950
43951    #[simd_test(enable = "avx512f,avx512vl")]
43952    unsafe fn test_mm256_getexp_ps() {
43953        let a = _mm256_set1_ps(3.);
43954        let r = _mm256_getexp_ps(a);
43955        let e = _mm256_set1_ps(1.);
43956        assert_eq_m256(r, e);
43957    }
43958
43959    #[simd_test(enable = "avx512f,avx512vl")]
43960    unsafe fn test_mm256_mask_getexp_ps() {
43961        let a = _mm256_set1_ps(3.);
43962        let r = _mm256_mask_getexp_ps(a, 0, a);
43963        assert_eq_m256(r, a);
43964        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
43965        let e = _mm256_set1_ps(1.);
43966        assert_eq_m256(r, e);
43967    }
43968
43969    #[simd_test(enable = "avx512f,avx512vl")]
43970    unsafe fn test_mm256_maskz_getexp_ps() {
43971        let a = _mm256_set1_ps(3.);
43972        let r = _mm256_maskz_getexp_ps(0, a);
43973        assert_eq_m256(r, _mm256_setzero_ps());
43974        let r = _mm256_maskz_getexp_ps(0b11111111, a);
43975        let e = _mm256_set1_ps(1.);
43976        assert_eq_m256(r, e);
43977    }
43978
43979    #[simd_test(enable = "avx512f,avx512vl")]
43980    unsafe fn test_mm_getexp_ps() {
43981        let a = _mm_set1_ps(3.);
43982        let r = _mm_getexp_ps(a);
43983        let e = _mm_set1_ps(1.);
43984        assert_eq_m128(r, e);
43985    }
43986
43987    #[simd_test(enable = "avx512f,avx512vl")]
43988    unsafe fn test_mm_mask_getexp_ps() {
43989        let a = _mm_set1_ps(3.);
43990        let r = _mm_mask_getexp_ps(a, 0, a);
43991        assert_eq_m128(r, a);
43992        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
43993        let e = _mm_set1_ps(1.);
43994        assert_eq_m128(r, e);
43995    }
43996
43997    #[simd_test(enable = "avx512f,avx512vl")]
43998    unsafe fn test_mm_maskz_getexp_ps() {
43999        let a = _mm_set1_ps(3.);
44000        let r = _mm_maskz_getexp_ps(0, a);
44001        assert_eq_m128(r, _mm_setzero_ps());
44002        let r = _mm_maskz_getexp_ps(0b00001111, a);
44003        let e = _mm_set1_ps(1.);
44004        assert_eq_m128(r, e);
44005    }
44006
44007    #[simd_test(enable = "avx512f")]
44008    unsafe fn test_mm512_roundscale_ps() {
44009        let a = _mm512_set1_ps(1.1);
44010        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
44011        let e = _mm512_set1_ps(1.0);
44012        assert_eq_m512(r, e);
44013    }
44014
44015    #[simd_test(enable = "avx512f")]
44016    unsafe fn test_mm512_mask_roundscale_ps() {
44017        let a = _mm512_set1_ps(1.1);
44018        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
44019        let e = _mm512_set1_ps(1.1);
44020        assert_eq_m512(r, e);
44021        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
44022        let e = _mm512_set1_ps(1.0);
44023        assert_eq_m512(r, e);
44024    }
44025
44026    #[simd_test(enable = "avx512f")]
44027    unsafe fn test_mm512_maskz_roundscale_ps() {
44028        let a = _mm512_set1_ps(1.1);
44029        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
44030        assert_eq_m512(r, _mm512_setzero_ps());
44031        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
44032        let e = _mm512_set1_ps(1.0);
44033        assert_eq_m512(r, e);
44034    }
44035
44036    #[simd_test(enable = "avx512f,avx512vl")]
44037    unsafe fn test_mm256_roundscale_ps() {
44038        let a = _mm256_set1_ps(1.1);
44039        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
44040        let e = _mm256_set1_ps(1.0);
44041        assert_eq_m256(r, e);
44042    }
44043
44044    #[simd_test(enable = "avx512f,avx512vl")]
44045    unsafe fn test_mm256_mask_roundscale_ps() {
44046        let a = _mm256_set1_ps(1.1);
44047        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
44048        let e = _mm256_set1_ps(1.1);
44049        assert_eq_m256(r, e);
44050        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
44051        let e = _mm256_set1_ps(1.0);
44052        assert_eq_m256(r, e);
44053    }
44054
44055    #[simd_test(enable = "avx512f,avx512vl")]
44056    unsafe fn test_mm256_maskz_roundscale_ps() {
44057        let a = _mm256_set1_ps(1.1);
44058        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
44059        assert_eq_m256(r, _mm256_setzero_ps());
44060        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
44061        let e = _mm256_set1_ps(1.0);
44062        assert_eq_m256(r, e);
44063    }
44064
44065    #[simd_test(enable = "avx512f,avx512vl")]
44066    unsafe fn test_mm_roundscale_ps() {
44067        let a = _mm_set1_ps(1.1);
44068        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
44069        let e = _mm_set1_ps(1.0);
44070        assert_eq_m128(r, e);
44071    }
44072
44073    #[simd_test(enable = "avx512f,avx512vl")]
44074    unsafe fn test_mm_mask_roundscale_ps() {
44075        let a = _mm_set1_ps(1.1);
44076        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
44077        let e = _mm_set1_ps(1.1);
44078        assert_eq_m128(r, e);
44079        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
44080        let e = _mm_set1_ps(1.0);
44081        assert_eq_m128(r, e);
44082    }
44083
44084    #[simd_test(enable = "avx512f,avx512vl")]
44085    unsafe fn test_mm_maskz_roundscale_ps() {
44086        let a = _mm_set1_ps(1.1);
44087        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
44088        assert_eq_m128(r, _mm_setzero_ps());
44089        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
44090        let e = _mm_set1_ps(1.0);
44091        assert_eq_m128(r, e);
44092    }
44093
44094    #[simd_test(enable = "avx512f")]
44095    unsafe fn test_mm512_scalef_ps() {
44096        let a = _mm512_set1_ps(1.);
44097        let b = _mm512_set1_ps(3.);
44098        let r = _mm512_scalef_ps(a, b);
44099        let e = _mm512_set1_ps(8.);
44100        assert_eq_m512(r, e);
44101    }
44102
44103    #[simd_test(enable = "avx512f")]
44104    unsafe fn test_mm512_mask_scalef_ps() {
44105        let a = _mm512_set1_ps(1.);
44106        let b = _mm512_set1_ps(3.);
44107        let r = _mm512_mask_scalef_ps(a, 0, a, b);
44108        assert_eq_m512(r, a);
44109        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
44110        let e = _mm512_set_ps(
44111            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44112        );
44113        assert_eq_m512(r, e);
44114    }
44115
44116    #[simd_test(enable = "avx512f")]
44117    unsafe fn test_mm512_maskz_scalef_ps() {
44118        let a = _mm512_set1_ps(1.);
44119        let b = _mm512_set1_ps(3.);
44120        let r = _mm512_maskz_scalef_ps(0, a, b);
44121        assert_eq_m512(r, _mm512_setzero_ps());
44122        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
44123        let e = _mm512_set_ps(
44124            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44125        );
44126        assert_eq_m512(r, e);
44127    }
44128
44129    #[simd_test(enable = "avx512f,avx512vl")]
44130    unsafe fn test_mm256_scalef_ps() {
44131        let a = _mm256_set1_ps(1.);
44132        let b = _mm256_set1_ps(3.);
44133        let r = _mm256_scalef_ps(a, b);
44134        let e = _mm256_set1_ps(8.);
44135        assert_eq_m256(r, e);
44136    }
44137
44138    #[simd_test(enable = "avx512f,avx512vl")]
44139    unsafe fn test_mm256_mask_scalef_ps() {
44140        let a = _mm256_set1_ps(1.);
44141        let b = _mm256_set1_ps(3.);
44142        let r = _mm256_mask_scalef_ps(a, 0, a, b);
44143        assert_eq_m256(r, a);
44144        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
44145        let e = _mm256_set1_ps(8.);
44146        assert_eq_m256(r, e);
44147    }
44148
44149    #[simd_test(enable = "avx512f,avx512vl")]
44150    unsafe fn test_mm256_maskz_scalef_ps() {
44151        let a = _mm256_set1_ps(1.);
44152        let b = _mm256_set1_ps(3.);
44153        let r = _mm256_maskz_scalef_ps(0, a, b);
44154        assert_eq_m256(r, _mm256_setzero_ps());
44155        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
44156        let e = _mm256_set1_ps(8.);
44157        assert_eq_m256(r, e);
44158    }
44159
44160    #[simd_test(enable = "avx512f,avx512vl")]
44161    unsafe fn test_mm_scalef_ps() {
44162        let a = _mm_set1_ps(1.);
44163        let b = _mm_set1_ps(3.);
44164        let r = _mm_scalef_ps(a, b);
44165        let e = _mm_set1_ps(8.);
44166        assert_eq_m128(r, e);
44167    }
44168
44169    #[simd_test(enable = "avx512f,avx512vl")]
44170    unsafe fn test_mm_mask_scalef_ps() {
44171        let a = _mm_set1_ps(1.);
44172        let b = _mm_set1_ps(3.);
44173        let r = _mm_mask_scalef_ps(a, 0, a, b);
44174        assert_eq_m128(r, a);
44175        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
44176        let e = _mm_set1_ps(8.);
44177        assert_eq_m128(r, e);
44178    }
44179
44180    #[simd_test(enable = "avx512f,avx512vl")]
44181    unsafe fn test_mm_maskz_scalef_ps() {
44182        let a = _mm_set1_ps(1.);
44183        let b = _mm_set1_ps(3.);
44184        let r = _mm_maskz_scalef_ps(0, a, b);
44185        assert_eq_m128(r, _mm_setzero_ps());
44186        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
44187        let e = _mm_set1_ps(8.);
44188        assert_eq_m128(r, e);
44189    }
44190
44191    #[simd_test(enable = "avx512f")]
44192    unsafe fn test_mm512_fixupimm_ps() {
44193        let a = _mm512_set1_ps(f32::NAN);
44194        let b = _mm512_set1_ps(f32::MAX);
44195        let c = _mm512_set1_epi32(i32::MAX);
44196        //let r = _mm512_fixupimm_ps(a, b, c, 5);
44197        let r = _mm512_fixupimm_ps::<5>(a, b, c);
44198        let e = _mm512_set1_ps(0.0);
44199        assert_eq_m512(r, e);
44200    }
44201
44202    #[simd_test(enable = "avx512f")]
44203    unsafe fn test_mm512_mask_fixupimm_ps() {
44204        #[rustfmt::skip]
44205        let a = _mm512_set_ps(
44206            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44207            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44208            1., 1., 1., 1.,
44209            1., 1., 1., 1.,
44210        );
44211        let b = _mm512_set1_ps(f32::MAX);
44212        let c = _mm512_set1_epi32(i32::MAX);
44213        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
44214        let e = _mm512_set_ps(
44215            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
44216        );
44217        assert_eq_m512(r, e);
44218    }
44219
44220    #[simd_test(enable = "avx512f")]
44221    unsafe fn test_mm512_maskz_fixupimm_ps() {
44222        #[rustfmt::skip]
44223        let a = _mm512_set_ps(
44224            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44225            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
44226            1., 1., 1., 1.,
44227            1., 1., 1., 1.,
44228        );
44229        let b = _mm512_set1_ps(f32::MAX);
44230        let c = _mm512_set1_epi32(i32::MAX);
44231        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
44232        let e = _mm512_set_ps(
44233            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
44234        );
44235        assert_eq_m512(r, e);
44236    }
44237
44238    #[simd_test(enable = "avx512f,avx512vl")]
44239    unsafe fn test_mm256_fixupimm_ps() {
44240        let a = _mm256_set1_ps(f32::NAN);
44241        let b = _mm256_set1_ps(f32::MAX);
44242        let c = _mm256_set1_epi32(i32::MAX);
44243        let r = _mm256_fixupimm_ps::<5>(a, b, c);
44244        let e = _mm256_set1_ps(0.0);
44245        assert_eq_m256(r, e);
44246    }
44247
44248    #[simd_test(enable = "avx512f,avx512vl")]
44249    unsafe fn test_mm256_mask_fixupimm_ps() {
44250        let a = _mm256_set1_ps(f32::NAN);
44251        let b = _mm256_set1_ps(f32::MAX);
44252        let c = _mm256_set1_epi32(i32::MAX);
44253        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
44254        let e = _mm256_set1_ps(0.0);
44255        assert_eq_m256(r, e);
44256    }
44257
44258    #[simd_test(enable = "avx512f,avx512vl")]
44259    unsafe fn test_mm256_maskz_fixupimm_ps() {
44260        let a = _mm256_set1_ps(f32::NAN);
44261        let b = _mm256_set1_ps(f32::MAX);
44262        let c = _mm256_set1_epi32(i32::MAX);
44263        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
44264        let e = _mm256_set1_ps(0.0);
44265        assert_eq_m256(r, e);
44266    }
44267
44268    #[simd_test(enable = "avx512f,avx512vl")]
44269    unsafe fn test_mm_fixupimm_ps() {
44270        let a = _mm_set1_ps(f32::NAN);
44271        let b = _mm_set1_ps(f32::MAX);
44272        let c = _mm_set1_epi32(i32::MAX);
44273        let r = _mm_fixupimm_ps::<5>(a, b, c);
44274        let e = _mm_set1_ps(0.0);
44275        assert_eq_m128(r, e);
44276    }
44277
44278    #[simd_test(enable = "avx512f,avx512vl")]
44279    unsafe fn test_mm_mask_fixupimm_ps() {
44280        let a = _mm_set1_ps(f32::NAN);
44281        let b = _mm_set1_ps(f32::MAX);
44282        let c = _mm_set1_epi32(i32::MAX);
44283        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
44284        let e = _mm_set1_ps(0.0);
44285        assert_eq_m128(r, e);
44286    }
44287
44288    #[simd_test(enable = "avx512f,avx512vl")]
44289    unsafe fn test_mm_maskz_fixupimm_ps() {
44290        let a = _mm_set1_ps(f32::NAN);
44291        let b = _mm_set1_ps(f32::MAX);
44292        let c = _mm_set1_epi32(i32::MAX);
44293        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
44294        let e = _mm_set1_ps(0.0);
44295        assert_eq_m128(r, e);
44296    }
44297
44298    #[simd_test(enable = "avx512f")]
44299    unsafe fn test_mm512_ternarylogic_epi32() {
44300        let a = _mm512_set1_epi32(1 << 2);
44301        let b = _mm512_set1_epi32(1 << 1);
44302        let c = _mm512_set1_epi32(1 << 0);
44303        let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
44304        let e = _mm512_set1_epi32(0);
44305        assert_eq_m512i(r, e);
44306    }
44307
44308    #[simd_test(enable = "avx512f")]
44309    unsafe fn test_mm512_mask_ternarylogic_epi32() {
44310        let src = _mm512_set1_epi32(1 << 2);
44311        let a = _mm512_set1_epi32(1 << 1);
44312        let b = _mm512_set1_epi32(1 << 0);
44313        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
44314        assert_eq_m512i(r, src);
44315        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
44316        let e = _mm512_set1_epi32(0);
44317        assert_eq_m512i(r, e);
44318    }
44319
44320    #[simd_test(enable = "avx512f")]
44321    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
44322        let a = _mm512_set1_epi32(1 << 2);
44323        let b = _mm512_set1_epi32(1 << 1);
44324        let c = _mm512_set1_epi32(1 << 0);
44325        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
44326        assert_eq_m512i(r, _mm512_setzero_si512());
44327        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
44328        let e = _mm512_set1_epi32(0);
44329        assert_eq_m512i(r, e);
44330    }
44331
44332    #[simd_test(enable = "avx512f,avx512vl")]
44333    unsafe fn test_mm256_ternarylogic_epi32() {
44334        let a = _mm256_set1_epi32(1 << 2);
44335        let b = _mm256_set1_epi32(1 << 1);
44336        let c = _mm256_set1_epi32(1 << 0);
44337        let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
44338        let e = _mm256_set1_epi32(0);
44339        assert_eq_m256i(r, e);
44340    }
44341
44342    #[simd_test(enable = "avx512f,avx512vl")]
44343    unsafe fn test_mm256_mask_ternarylogic_epi32() {
44344        let src = _mm256_set1_epi32(1 << 2);
44345        let a = _mm256_set1_epi32(1 << 1);
44346        let b = _mm256_set1_epi32(1 << 0);
44347        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
44348        assert_eq_m256i(r, src);
44349        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
44350        let e = _mm256_set1_epi32(0);
44351        assert_eq_m256i(r, e);
44352    }
44353
44354    #[simd_test(enable = "avx512f,avx512vl")]
44355    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
44356        let a = _mm256_set1_epi32(1 << 2);
44357        let b = _mm256_set1_epi32(1 << 1);
44358        let c = _mm256_set1_epi32(1 << 0);
44359        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
44360        assert_eq_m256i(r, _mm256_setzero_si256());
44361        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
44362        let e = _mm256_set1_epi32(0);
44363        assert_eq_m256i(r, e);
44364    }
44365
44366    #[simd_test(enable = "avx512f,avx512vl")]
44367    unsafe fn test_mm_ternarylogic_epi32() {
44368        let a = _mm_set1_epi32(1 << 2);
44369        let b = _mm_set1_epi32(1 << 1);
44370        let c = _mm_set1_epi32(1 << 0);
44371        let r = _mm_ternarylogic_epi32::<8>(a, b, c);
44372        let e = _mm_set1_epi32(0);
44373        assert_eq_m128i(r, e);
44374    }
44375
44376    #[simd_test(enable = "avx512f,avx512vl")]
44377    unsafe fn test_mm_mask_ternarylogic_epi32() {
44378        let src = _mm_set1_epi32(1 << 2);
44379        let a = _mm_set1_epi32(1 << 1);
44380        let b = _mm_set1_epi32(1 << 0);
44381        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
44382        assert_eq_m128i(r, src);
44383        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
44384        let e = _mm_set1_epi32(0);
44385        assert_eq_m128i(r, e);
44386    }
44387
44388    #[simd_test(enable = "avx512f,avx512vl")]
44389    unsafe fn test_mm_maskz_ternarylogic_epi32() {
44390        let a = _mm_set1_epi32(1 << 2);
44391        let b = _mm_set1_epi32(1 << 1);
44392        let c = _mm_set1_epi32(1 << 0);
44393        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
44394        assert_eq_m128i(r, _mm_setzero_si128());
44395        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
44396        let e = _mm_set1_epi32(0);
44397        assert_eq_m128i(r, e);
44398    }
44399
44400    #[simd_test(enable = "avx512f")]
44401    unsafe fn test_mm512_getmant_ps() {
44402        let a = _mm512_set1_ps(10.);
44403        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44404        let e = _mm512_set1_ps(1.25);
44405        assert_eq_m512(r, e);
44406    }
44407
44408    #[simd_test(enable = "avx512f")]
44409    unsafe fn test_mm512_mask_getmant_ps() {
44410        let a = _mm512_set1_ps(10.);
44411        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
44412        assert_eq_m512(r, a);
44413        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
44414            a,
44415            0b11111111_00000000,
44416            a,
44417        );
44418        let e = _mm512_setr_ps(
44419            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
44420        );
44421        assert_eq_m512(r, e);
44422    }
44423
44424    #[simd_test(enable = "avx512f")]
44425    unsafe fn test_mm512_maskz_getmant_ps() {
44426        let a = _mm512_set1_ps(10.);
44427        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
44428        assert_eq_m512(r, _mm512_setzero_ps());
44429        let r =
44430            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
44431        let e = _mm512_setr_ps(
44432            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
44433        );
44434        assert_eq_m512(r, e);
44435    }
44436
44437    #[simd_test(enable = "avx512f,avx512vl")]
44438    unsafe fn test_mm256_getmant_ps() {
44439        let a = _mm256_set1_ps(10.);
44440        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44441        let e = _mm256_set1_ps(1.25);
44442        assert_eq_m256(r, e);
44443    }
44444
44445    #[simd_test(enable = "avx512f,avx512vl")]
44446    unsafe fn test_mm256_mask_getmant_ps() {
44447        let a = _mm256_set1_ps(10.);
44448        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
44449        assert_eq_m256(r, a);
44450        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
44451        let e = _mm256_set1_ps(1.25);
44452        assert_eq_m256(r, e);
44453    }
44454
44455    #[simd_test(enable = "avx512f,avx512vl")]
44456    unsafe fn test_mm256_maskz_getmant_ps() {
44457        let a = _mm256_set1_ps(10.);
44458        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
44459        assert_eq_m256(r, _mm256_setzero_ps());
44460        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
44461        let e = _mm256_set1_ps(1.25);
44462        assert_eq_m256(r, e);
44463    }
44464
44465    #[simd_test(enable = "avx512f,avx512vl")]
44466    unsafe fn test_mm_getmant_ps() {
44467        let a = _mm_set1_ps(10.);
44468        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
44469        let e = _mm_set1_ps(1.25);
44470        assert_eq_m128(r, e);
44471    }
44472
44473    #[simd_test(enable = "avx512f,avx512vl")]
44474    unsafe fn test_mm_mask_getmant_ps() {
44475        let a = _mm_set1_ps(10.);
44476        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
44477        assert_eq_m128(r, a);
44478        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
44479        let e = _mm_set1_ps(1.25);
44480        assert_eq_m128(r, e);
44481    }
44482
44483    #[simd_test(enable = "avx512f,avx512vl")]
44484    unsafe fn test_mm_maskz_getmant_ps() {
44485        let a = _mm_set1_ps(10.);
44486        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
44487        assert_eq_m128(r, _mm_setzero_ps());
44488        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
44489        let e = _mm_set1_ps(1.25);
44490        assert_eq_m128(r, e);
44491    }
44492
44493    #[simd_test(enable = "avx512f")]
44494    unsafe fn test_mm512_add_round_ps() {
44495        let a = _mm512_setr_ps(
44496            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44497        );
44498        let b = _mm512_set1_ps(-1.);
44499        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44500        #[rustfmt::skip]
44501        let e = _mm512_setr_ps(
44502            -1., 0.5, 1., 2.5,
44503            3., 4.5, 5., 6.5,
44504            7., 8.5, 9., 10.5,
44505            11., 12.5, 13., -0.99999994,
44506        );
44507        assert_eq_m512(r, e);
44508        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44509        let e = _mm512_setr_ps(
44510            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
44511        );
44512        assert_eq_m512(r, e);
44513    }
44514
44515    #[simd_test(enable = "avx512f")]
44516    unsafe fn test_mm512_mask_add_round_ps() {
44517        let a = _mm512_setr_ps(
44518            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44519        );
44520        let b = _mm512_set1_ps(-1.);
44521        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
44522        assert_eq_m512(r, a);
44523        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44524            a,
44525            0b11111111_00000000,
44526            a,
44527            b,
44528        );
44529        #[rustfmt::skip]
44530        let e = _mm512_setr_ps(
44531            0., 1.5, 2., 3.5,
44532            4., 5.5, 6., 7.5,
44533            7., 8.5, 9., 10.5,
44534            11., 12.5, 13., -0.99999994,
44535        );
44536        assert_eq_m512(r, e);
44537    }
44538
44539    #[simd_test(enable = "avx512f")]
44540    unsafe fn test_mm512_maskz_add_round_ps() {
44541        let a = _mm512_setr_ps(
44542            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44543        );
44544        let b = _mm512_set1_ps(-1.);
44545        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
44546        assert_eq_m512(r, _mm512_setzero_ps());
44547        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44548            0b11111111_00000000,
44549            a,
44550            b,
44551        );
44552        #[rustfmt::skip]
44553        let e = _mm512_setr_ps(
44554            0., 0., 0., 0.,
44555            0., 0., 0., 0.,
44556            7., 8.5, 9., 10.5,
44557            11., 12.5, 13., -0.99999994,
44558        );
44559        assert_eq_m512(r, e);
44560    }
44561
44562    #[simd_test(enable = "avx512f")]
44563    unsafe fn test_mm512_sub_round_ps() {
44564        let a = _mm512_setr_ps(
44565            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44566        );
44567        let b = _mm512_set1_ps(1.);
44568        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44569        #[rustfmt::skip]
44570        let e = _mm512_setr_ps(
44571            -1., 0.5, 1., 2.5,
44572            3., 4.5, 5., 6.5,
44573            7., 8.5, 9., 10.5,
44574            11., 12.5, 13., -0.99999994,
44575        );
44576        assert_eq_m512(r, e);
44577        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44578        let e = _mm512_setr_ps(
44579            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
44580        );
44581        assert_eq_m512(r, e);
44582    }
44583
44584    #[simd_test(enable = "avx512f")]
44585    unsafe fn test_mm512_mask_sub_round_ps() {
44586        let a = _mm512_setr_ps(
44587            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44588        );
44589        let b = _mm512_set1_ps(1.);
44590        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44591            a, 0, a, b,
44592        );
44593        assert_eq_m512(r, a);
44594        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44595            a,
44596            0b11111111_00000000,
44597            a,
44598            b,
44599        );
44600        #[rustfmt::skip]
44601        let e = _mm512_setr_ps(
44602            0., 1.5, 2., 3.5,
44603            4., 5.5, 6., 7.5,
44604            7., 8.5, 9., 10.5,
44605            11., 12.5, 13., -0.99999994,
44606        );
44607        assert_eq_m512(r, e);
44608    }
44609
44610    #[simd_test(enable = "avx512f")]
44611    unsafe fn test_mm512_maskz_sub_round_ps() {
44612        let a = _mm512_setr_ps(
44613            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
44614        );
44615        let b = _mm512_set1_ps(1.);
44616        let r =
44617            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
44618        assert_eq_m512(r, _mm512_setzero_ps());
44619        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44620            0b11111111_00000000,
44621            a,
44622            b,
44623        );
44624        #[rustfmt::skip]
44625        let e = _mm512_setr_ps(
44626            0., 0., 0., 0.,
44627            0., 0., 0., 0.,
44628            7., 8.5, 9., 10.5,
44629            11., 12.5, 13., -0.99999994,
44630        );
44631        assert_eq_m512(r, e);
44632    }
44633
44634    #[simd_test(enable = "avx512f")]
44635    unsafe fn test_mm512_mul_round_ps() {
44636        #[rustfmt::skip]
44637        let a = _mm512_setr_ps(
44638            0., 1.5, 2., 3.5,
44639            4., 5.5, 6., 7.5,
44640            8., 9.5, 10., 11.5,
44641            12., 13.5, 14., 0.00000000000000000000007,
44642        );
44643        let b = _mm512_set1_ps(0.1);
44644        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44645        #[rustfmt::skip]
44646        let e = _mm512_setr_ps(
44647            0., 0.15, 0.2, 0.35,
44648            0.4, 0.55, 0.6, 0.75,
44649            0.8, 0.95, 1.0, 1.15,
44650            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
44651        );
44652        assert_eq_m512(r, e);
44653        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44654        #[rustfmt::skip]
44655        let e = _mm512_setr_ps(
44656            0., 0.14999999, 0.2, 0.35,
44657            0.4, 0.54999995, 0.59999996, 0.75,
44658            0.8, 0.95, 1.0, 1.15,
44659            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
44660        );
44661        assert_eq_m512(r, e);
44662    }
44663
44664    #[simd_test(enable = "avx512f")]
44665    unsafe fn test_mm512_mask_mul_round_ps() {
44666        #[rustfmt::skip]
44667        let a = _mm512_setr_ps(
44668            0., 1.5, 2., 3.5,
44669            4., 5.5, 6., 7.5,
44670            8., 9.5, 10., 11.5,
44671            12., 13.5, 14., 0.00000000000000000000007,
44672        );
44673        let b = _mm512_set1_ps(0.1);
44674        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44675            a, 0, a, b,
44676        );
44677        assert_eq_m512(r, a);
44678        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44679            a,
44680            0b11111111_00000000,
44681            a,
44682            b,
44683        );
44684        #[rustfmt::skip]
44685        let e = _mm512_setr_ps(
44686            0., 1.5, 2., 3.5,
44687            4., 5.5, 6., 7.5,
44688            0.8, 0.95, 1.0, 1.15,
44689            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
44690        );
44691        assert_eq_m512(r, e);
44692    }
44693
44694    #[simd_test(enable = "avx512f")]
44695    unsafe fn test_mm512_maskz_mul_round_ps() {
44696        #[rustfmt::skip]
44697        let a = _mm512_setr_ps(
44698            0., 1.5, 2., 3.5,
44699            4., 5.5, 6., 7.5,
44700            8., 9.5, 10., 11.5,
44701            12., 13.5, 14., 0.00000000000000000000007,
44702        );
44703        let b = _mm512_set1_ps(0.1);
44704        let r =
44705            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
44706        assert_eq_m512(r, _mm512_setzero_ps());
44707        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44708            0b11111111_00000000,
44709            a,
44710            b,
44711        );
44712        #[rustfmt::skip]
44713        let e = _mm512_setr_ps(
44714            0., 0., 0., 0.,
44715            0., 0., 0., 0.,
44716            0.8, 0.95, 1.0, 1.15,
44717            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
44718        );
44719        assert_eq_m512(r, e);
44720    }
44721
44722    #[simd_test(enable = "avx512f")]
44723    unsafe fn test_mm512_div_round_ps() {
44724        let a = _mm512_set1_ps(1.);
44725        let b = _mm512_set1_ps(3.);
44726        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
44727        let e = _mm512_set1_ps(0.33333334);
44728        assert_eq_m512(r, e);
44729        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
44730        let e = _mm512_set1_ps(0.3333333);
44731        assert_eq_m512(r, e);
44732    }
44733
44734    #[simd_test(enable = "avx512f")]
44735    unsafe fn test_mm512_mask_div_round_ps() {
44736        let a = _mm512_set1_ps(1.);
44737        let b = _mm512_set1_ps(3.);
44738        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44739            a, 0, a, b,
44740        );
44741        assert_eq_m512(r, a);
44742        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44743            a,
44744            0b11111111_00000000,
44745            a,
44746            b,
44747        );
44748        let e = _mm512_setr_ps(
44749            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
44750            0.33333334, 0.33333334, 0.33333334, 0.33333334,
44751        );
44752        assert_eq_m512(r, e);
44753    }
44754
44755    #[simd_test(enable = "avx512f")]
44756    unsafe fn test_mm512_maskz_div_round_ps() {
44757        let a = _mm512_set1_ps(1.);
44758        let b = _mm512_set1_ps(3.);
44759        let r =
44760            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
44761        assert_eq_m512(r, _mm512_setzero_ps());
44762        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44763            0b11111111_00000000,
44764            a,
44765            b,
44766        );
44767        let e = _mm512_setr_ps(
44768            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
44769            0.33333334, 0.33333334, 0.33333334, 0.33333334,
44770        );
44771        assert_eq_m512(r, e);
44772    }
44773
44774    #[simd_test(enable = "avx512f")]
44775    unsafe fn test_mm512_sqrt_round_ps() {
44776        let a = _mm512_set1_ps(3.);
44777        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
44778        let e = _mm512_set1_ps(1.7320508);
44779        assert_eq_m512(r, e);
44780        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
44781        let e = _mm512_set1_ps(1.7320509);
44782        assert_eq_m512(r, e);
44783    }
44784
44785    #[simd_test(enable = "avx512f")]
44786    unsafe fn test_mm512_mask_sqrt_round_ps() {
44787        let a = _mm512_set1_ps(3.);
44788        let r =
44789            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
44790        assert_eq_m512(r, a);
44791        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44792            a,
44793            0b11111111_00000000,
44794            a,
44795        );
44796        let e = _mm512_setr_ps(
44797            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
44798            1.7320508, 1.7320508, 1.7320508,
44799        );
44800        assert_eq_m512(r, e);
44801    }
44802
44803    #[simd_test(enable = "avx512f")]
44804    unsafe fn test_mm512_maskz_sqrt_round_ps() {
44805        let a = _mm512_set1_ps(3.);
44806        let r =
44807            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
44808        assert_eq_m512(r, _mm512_setzero_ps());
44809        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44810            0b11111111_00000000,
44811            a,
44812        );
44813        let e = _mm512_setr_ps(
44814            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
44815            1.7320508, 1.7320508, 1.7320508,
44816        );
44817        assert_eq_m512(r, e);
44818    }
44819
44820    #[simd_test(enable = "avx512f")]
44821    unsafe fn test_mm512_fmadd_round_ps() {
44822        let a = _mm512_set1_ps(0.00000007);
44823        let b = _mm512_set1_ps(1.);
44824        let c = _mm512_set1_ps(-1.);
44825        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
44826        let e = _mm512_set1_ps(-0.99999994);
44827        assert_eq_m512(r, e);
44828        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
44829        let e = _mm512_set1_ps(-0.9999999);
44830        assert_eq_m512(r, e);
44831    }
44832
44833    #[simd_test(enable = "avx512f")]
44834    unsafe fn test_mm512_mask_fmadd_round_ps() {
44835        let a = _mm512_set1_ps(0.00000007);
44836        let b = _mm512_set1_ps(1.);
44837        let c = _mm512_set1_ps(-1.);
44838        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44839            a, 0, b, c,
44840        );
44841        assert_eq_m512(r, a);
44842        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44843            a,
44844            0b00000000_11111111,
44845            b,
44846            c,
44847        );
44848        #[rustfmt::skip]
44849        let e = _mm512_setr_ps(
44850            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44851            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44852            0.00000007, 0.00000007, 0.00000007, 0.00000007,
44853            0.00000007, 0.00000007, 0.00000007, 0.00000007,
44854        );
44855        assert_eq_m512(r, e);
44856    }
44857
44858    #[simd_test(enable = "avx512f")]
44859    unsafe fn test_mm512_maskz_fmadd_round_ps() {
44860        let a = _mm512_set1_ps(0.00000007);
44861        let b = _mm512_set1_ps(1.);
44862        let c = _mm512_set1_ps(-1.);
44863        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44864            0, a, b, c,
44865        );
44866        assert_eq_m512(r, _mm512_setzero_ps());
44867        #[rustfmt::skip]
44868        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44869            0b00000000_11111111,
44870            a,
44871            b,
44872            c,
44873        );
44874        #[rustfmt::skip]
44875        let e = _mm512_setr_ps(
44876            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44877            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44878            0., 0., 0., 0.,
44879            0., 0., 0., 0.,
44880        );
44881        assert_eq_m512(r, e);
44882    }
44883
44884    #[simd_test(enable = "avx512f")]
44885    unsafe fn test_mm512_mask3_fmadd_round_ps() {
44886        let a = _mm512_set1_ps(0.00000007);
44887        let b = _mm512_set1_ps(1.);
44888        let c = _mm512_set1_ps(-1.);
44889        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44890            a, b, c, 0,
44891        );
44892        assert_eq_m512(r, c);
44893        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44894            a,
44895            b,
44896            c,
44897            0b00000000_11111111,
44898        );
44899        #[rustfmt::skip]
44900        let e = _mm512_setr_ps(
44901            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44902            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44903            -1., -1., -1., -1.,
44904            -1., -1., -1., -1.,
44905        );
44906        assert_eq_m512(r, e);
44907    }
44908
44909    #[simd_test(enable = "avx512f")]
44910    unsafe fn test_mm512_fmsub_round_ps() {
44911        let a = _mm512_set1_ps(0.00000007);
44912        let b = _mm512_set1_ps(1.);
44913        let c = _mm512_set1_ps(1.);
44914        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
44915        let e = _mm512_set1_ps(-0.99999994);
44916        assert_eq_m512(r, e);
44917        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
44918        let e = _mm512_set1_ps(-0.9999999);
44919        assert_eq_m512(r, e);
44920    }
44921
44922    #[simd_test(enable = "avx512f")]
44923    unsafe fn test_mm512_mask_fmsub_round_ps() {
44924        let a = _mm512_set1_ps(0.00000007);
44925        let b = _mm512_set1_ps(1.);
44926        let c = _mm512_set1_ps(1.);
44927        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44928            a, 0, b, c,
44929        );
44930        assert_eq_m512(r, a);
44931        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44932            a,
44933            0b00000000_11111111,
44934            b,
44935            c,
44936        );
44937        #[rustfmt::skip]
44938        let e = _mm512_setr_ps(
44939            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44940            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44941            0.00000007, 0.00000007, 0.00000007, 0.00000007,
44942            0.00000007, 0.00000007, 0.00000007, 0.00000007,
44943        );
44944        assert_eq_m512(r, e);
44945    }
44946
44947    #[simd_test(enable = "avx512f")]
44948    unsafe fn test_mm512_maskz_fmsub_round_ps() {
44949        let a = _mm512_set1_ps(0.00000007);
44950        let b = _mm512_set1_ps(1.);
44951        let c = _mm512_set1_ps(1.);
44952        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44953            0, a, b, c,
44954        );
44955        assert_eq_m512(r, _mm512_setzero_ps());
44956        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44957            0b00000000_11111111,
44958            a,
44959            b,
44960            c,
44961        );
44962        #[rustfmt::skip]
44963        let e = _mm512_setr_ps(
44964            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44965            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44966            0., 0., 0., 0.,
44967            0., 0., 0., 0.,
44968        );
44969        assert_eq_m512(r, e);
44970    }
44971
44972    #[simd_test(enable = "avx512f")]
44973    unsafe fn test_mm512_mask3_fmsub_round_ps() {
44974        let a = _mm512_set1_ps(0.00000007);
44975        let b = _mm512_set1_ps(1.);
44976        let c = _mm512_set1_ps(1.);
44977        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44978            a, b, c, 0,
44979        );
44980        assert_eq_m512(r, c);
44981        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
44982            a,
44983            b,
44984            c,
44985            0b00000000_11111111,
44986        );
44987        #[rustfmt::skip]
44988        let e = _mm512_setr_ps(
44989            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44990            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
44991            1., 1., 1., 1.,
44992            1., 1., 1., 1.,
44993        );
44994        assert_eq_m512(r, e);
44995    }
44996
44997    #[simd_test(enable = "avx512f")]
44998    unsafe fn test_mm512_fmaddsub_round_ps() {
44999        let a = _mm512_set1_ps(0.00000007);
45000        let b = _mm512_set1_ps(1.);
45001        let c = _mm512_set1_ps(-1.);
45002        let r =
45003            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45004        #[rustfmt::skip]
45005        let e = _mm512_setr_ps(
45006            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45007            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45008            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45009            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45010        );
45011        assert_eq_m512(r, e);
45012        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45013        let e = _mm512_setr_ps(
45014            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
45015            -0.9999999, 1., -0.9999999, 1., -0.9999999,
45016        );
45017        assert_eq_m512(r, e);
45018    }
45019
45020    #[simd_test(enable = "avx512f")]
45021    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
45022        let a = _mm512_set1_ps(0.00000007);
45023        let b = _mm512_set1_ps(1.);
45024        let c = _mm512_set1_ps(-1.);
45025        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45026            a, 0, b, c,
45027        );
45028        assert_eq_m512(r, a);
45029        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45030            a,
45031            0b00000000_11111111,
45032            b,
45033            c,
45034        );
45035        #[rustfmt::skip]
45036        let e = _mm512_setr_ps(
45037            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45038            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45039            0.00000007, 0.00000007, 0.00000007, 0.00000007,
45040            0.00000007, 0.00000007, 0.00000007, 0.00000007,
45041        );
45042        assert_eq_m512(r, e);
45043    }
45044
45045    #[simd_test(enable = "avx512f")]
45046    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
45047        let a = _mm512_set1_ps(0.00000007);
45048        let b = _mm512_set1_ps(1.);
45049        let c = _mm512_set1_ps(-1.);
45050        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45051            0, a, b, c,
45052        );
45053        assert_eq_m512(r, _mm512_setzero_ps());
45054        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45055            0b00000000_11111111,
45056            a,
45057            b,
45058            c,
45059        );
45060        #[rustfmt::skip]
45061        let e = _mm512_setr_ps(
45062            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45063            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45064            0., 0., 0., 0.,
45065            0., 0., 0., 0.,
45066        );
45067        assert_eq_m512(r, e);
45068    }
45069
45070    #[simd_test(enable = "avx512f")]
45071    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
45072        let a = _mm512_set1_ps(0.00000007);
45073        let b = _mm512_set1_ps(1.);
45074        let c = _mm512_set1_ps(-1.);
45075        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45076            a, b, c, 0,
45077        );
45078        assert_eq_m512(r, c);
45079        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45080            a,
45081            b,
45082            c,
45083            0b00000000_11111111,
45084        );
45085        #[rustfmt::skip]
45086        let e = _mm512_setr_ps(
45087            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45088            1.0000001, -0.99999994, 1.0000001, -0.99999994,
45089            -1., -1., -1., -1.,
45090            -1., -1., -1., -1.,
45091        );
45092        assert_eq_m512(r, e);
45093    }
45094
45095    #[simd_test(enable = "avx512f")]
45096    unsafe fn test_mm512_fmsubadd_round_ps() {
45097        let a = _mm512_set1_ps(0.00000007);
45098        let b = _mm512_set1_ps(1.);
45099        let c = _mm512_set1_ps(-1.);
45100        let r =
45101            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45102        #[rustfmt::skip]
45103        let e = _mm512_setr_ps(
45104            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45105            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45106            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45107            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45108        );
45109        assert_eq_m512(r, e);
45110        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45111        let e = _mm512_setr_ps(
45112            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
45113            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
45114        );
45115        assert_eq_m512(r, e);
45116    }
45117
45118    #[simd_test(enable = "avx512f")]
45119    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
45120        let a = _mm512_set1_ps(0.00000007);
45121        let b = _mm512_set1_ps(1.);
45122        let c = _mm512_set1_ps(-1.);
45123        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45124            a, 0, b, c,
45125        );
45126        assert_eq_m512(r, a);
45127        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45128            a,
45129            0b00000000_11111111,
45130            b,
45131            c,
45132        );
45133        #[rustfmt::skip]
45134        let e = _mm512_setr_ps(
45135            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45136            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45137            0.00000007, 0.00000007, 0.00000007, 0.00000007,
45138            0.00000007, 0.00000007, 0.00000007, 0.00000007,
45139        );
45140        assert_eq_m512(r, e);
45141    }
45142
45143    #[simd_test(enable = "avx512f")]
45144    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
45145        let a = _mm512_set1_ps(0.00000007);
45146        let b = _mm512_set1_ps(1.);
45147        let c = _mm512_set1_ps(-1.);
45148        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45149            0, a, b, c,
45150        );
45151        assert_eq_m512(r, _mm512_setzero_ps());
45152        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45153            0b00000000_11111111,
45154            a,
45155            b,
45156            c,
45157        );
45158        #[rustfmt::skip]
45159        let e = _mm512_setr_ps(
45160            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45161            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45162            0., 0., 0., 0.,
45163            0., 0., 0., 0.,
45164        );
45165        assert_eq_m512(r, e);
45166    }
45167
45168    #[simd_test(enable = "avx512f")]
45169    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
45170        let a = _mm512_set1_ps(0.00000007);
45171        let b = _mm512_set1_ps(1.);
45172        let c = _mm512_set1_ps(-1.);
45173        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45174            a, b, c, 0,
45175        );
45176        assert_eq_m512(r, c);
45177        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45178            a,
45179            b,
45180            c,
45181            0b00000000_11111111,
45182        );
45183        #[rustfmt::skip]
45184        let e = _mm512_setr_ps(
45185            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45186            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
45187            -1., -1., -1., -1.,
45188            -1., -1., -1., -1.,
45189        );
45190        assert_eq_m512(r, e);
45191    }
45192
45193    #[simd_test(enable = "avx512f")]
45194    unsafe fn test_mm512_fnmadd_round_ps() {
45195        let a = _mm512_set1_ps(0.00000007);
45196        let b = _mm512_set1_ps(1.);
45197        let c = _mm512_set1_ps(1.);
45198        let r =
45199            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45200        let e = _mm512_set1_ps(0.99999994);
45201        assert_eq_m512(r, e);
45202        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45203        let e = _mm512_set1_ps(0.9999999);
45204        assert_eq_m512(r, e);
45205    }
45206
45207    #[simd_test(enable = "avx512f")]
45208    unsafe fn test_mm512_mask_fnmadd_round_ps() {
45209        let a = _mm512_set1_ps(0.00000007);
45210        let b = _mm512_set1_ps(1.);
45211        let c = _mm512_set1_ps(1.);
45212        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45213            a, 0, b, c,
45214        );
45215        assert_eq_m512(r, a);
45216        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45217            a,
45218            0b00000000_11111111,
45219            b,
45220            c,
45221        );
45222        let e = _mm512_setr_ps(
45223            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45224            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45225            0.00000007, 0.00000007,
45226        );
45227        assert_eq_m512(r, e);
45228    }
45229
45230    #[simd_test(enable = "avx512f")]
45231    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
45232        let a = _mm512_set1_ps(0.00000007);
45233        let b = _mm512_set1_ps(1.);
45234        let c = _mm512_set1_ps(1.);
45235        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45236            0, a, b, c,
45237        );
45238        assert_eq_m512(r, _mm512_setzero_ps());
45239        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45240            0b00000000_11111111,
45241            a,
45242            b,
45243            c,
45244        );
45245        let e = _mm512_setr_ps(
45246            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45247            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
45248        );
45249        assert_eq_m512(r, e);
45250    }
45251
45252    #[simd_test(enable = "avx512f")]
45253    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
45254        let a = _mm512_set1_ps(0.00000007);
45255        let b = _mm512_set1_ps(1.);
45256        let c = _mm512_set1_ps(1.);
45257        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45258            a, b, c, 0,
45259        );
45260        assert_eq_m512(r, c);
45261        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45262            a,
45263            b,
45264            c,
45265            0b00000000_11111111,
45266        );
45267        let e = _mm512_setr_ps(
45268            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45269            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
45270        );
45271        assert_eq_m512(r, e);
45272    }
45273
45274    #[simd_test(enable = "avx512f")]
45275    unsafe fn test_mm512_fnmsub_round_ps() {
45276        let a = _mm512_set1_ps(0.00000007);
45277        let b = _mm512_set1_ps(1.);
45278        let c = _mm512_set1_ps(-1.);
45279        let r =
45280            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
45281        let e = _mm512_set1_ps(0.99999994);
45282        assert_eq_m512(r, e);
45283        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
45284        let e = _mm512_set1_ps(0.9999999);
45285        assert_eq_m512(r, e);
45286    }
45287
45288    #[simd_test(enable = "avx512f")]
45289    unsafe fn test_mm512_mask_fnmsub_round_ps() {
45290        let a = _mm512_set1_ps(0.00000007);
45291        let b = _mm512_set1_ps(1.);
45292        let c = _mm512_set1_ps(-1.);
45293        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45294            a, 0, b, c,
45295        );
45296        assert_eq_m512(r, a);
45297        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45298            a,
45299            0b00000000_11111111,
45300            b,
45301            c,
45302        );
45303        let e = _mm512_setr_ps(
45304            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45305            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
45306            0.00000007, 0.00000007,
45307        );
45308        assert_eq_m512(r, e);
45309    }
45310
45311    #[simd_test(enable = "avx512f")]
45312    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
45313        let a = _mm512_set1_ps(0.00000007);
45314        let b = _mm512_set1_ps(1.);
45315        let c = _mm512_set1_ps(-1.);
45316        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45317            0, a, b, c,
45318        );
45319        assert_eq_m512(r, _mm512_setzero_ps());
45320        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45321            0b00000000_11111111,
45322            a,
45323            b,
45324            c,
45325        );
45326        let e = _mm512_setr_ps(
45327            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45328            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
45329        );
45330        assert_eq_m512(r, e);
45331    }
45332
45333    #[simd_test(enable = "avx512f")]
45334    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
45335        let a = _mm512_set1_ps(0.00000007);
45336        let b = _mm512_set1_ps(1.);
45337        let c = _mm512_set1_ps(-1.);
45338        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45339            a, b, c, 0,
45340        );
45341        assert_eq_m512(r, c);
45342        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45343            a,
45344            b,
45345            c,
45346            0b00000000_11111111,
45347        );
45348        let e = _mm512_setr_ps(
45349            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
45350            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
45351        );
45352        assert_eq_m512(r, e);
45353    }
45354
45355    #[simd_test(enable = "avx512f")]
45356    unsafe fn test_mm512_max_round_ps() {
45357        let a = _mm512_setr_ps(
45358            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45359        );
45360        let b = _mm512_setr_ps(
45361            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45362        );
45363        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
45364        let e = _mm512_setr_ps(
45365            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
45366        );
45367        assert_eq_m512(r, e);
45368    }
45369
45370    #[simd_test(enable = "avx512f")]
45371    unsafe fn test_mm512_mask_max_round_ps() {
45372        let a = _mm512_setr_ps(
45373            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45374        );
45375        let b = _mm512_setr_ps(
45376            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45377        );
45378        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
45379        assert_eq_m512(r, a);
45380        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
45381        let e = _mm512_setr_ps(
45382            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
45383        );
45384        assert_eq_m512(r, e);
45385    }
45386
45387    #[simd_test(enable = "avx512f")]
45388    unsafe fn test_mm512_maskz_max_round_ps() {
45389        let a = _mm512_setr_ps(
45390            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45391        );
45392        let b = _mm512_setr_ps(
45393            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45394        );
45395        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
45396        assert_eq_m512(r, _mm512_setzero_ps());
45397        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
45398        let e = _mm512_setr_ps(
45399            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45400        );
45401        assert_eq_m512(r, e);
45402    }
45403
45404    #[simd_test(enable = "avx512f")]
45405    unsafe fn test_mm512_min_round_ps() {
45406        let a = _mm512_setr_ps(
45407            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45408        );
45409        let b = _mm512_setr_ps(
45410            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45411        );
45412        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
45413        let e = _mm512_setr_ps(
45414            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
45415        );
45416        assert_eq_m512(r, e);
45417    }
45418
45419    #[simd_test(enable = "avx512f")]
45420    unsafe fn test_mm512_mask_min_round_ps() {
45421        let a = _mm512_setr_ps(
45422            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45423        );
45424        let b = _mm512_setr_ps(
45425            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45426        );
45427        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
45428        assert_eq_m512(r, a);
45429        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
45430        let e = _mm512_setr_ps(
45431            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45432        );
45433        assert_eq_m512(r, e);
45434    }
45435
45436    #[simd_test(enable = "avx512f")]
45437    unsafe fn test_mm512_maskz_min_round_ps() {
45438        let a = _mm512_setr_ps(
45439            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45440        );
45441        let b = _mm512_setr_ps(
45442            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
45443        );
45444        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
45445        assert_eq_m512(r, _mm512_setzero_ps());
45446        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
45447        let e = _mm512_setr_ps(
45448            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
45449        );
45450        assert_eq_m512(r, e);
45451    }
45452
45453    #[simd_test(enable = "avx512f")]
45454    unsafe fn test_mm512_getexp_round_ps() {
45455        let a = _mm512_set1_ps(3.);
45456        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
45457        let e = _mm512_set1_ps(1.);
45458        assert_eq_m512(r, e);
45459    }
45460
45461    #[simd_test(enable = "avx512f")]
45462    unsafe fn test_mm512_mask_getexp_round_ps() {
45463        let a = _mm512_set1_ps(3.);
45464        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
45465        assert_eq_m512(r, a);
45466        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
45467        let e = _mm512_setr_ps(
45468            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45469        );
45470        assert_eq_m512(r, e);
45471    }
45472
45473    #[simd_test(enable = "avx512f")]
45474    unsafe fn test_mm512_maskz_getexp_round_ps() {
45475        let a = _mm512_set1_ps(3.);
45476        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
45477        assert_eq_m512(r, _mm512_setzero_ps());
45478        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
45479        let e = _mm512_setr_ps(
45480            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45481        );
45482        assert_eq_m512(r, e);
45483    }
45484
45485    #[simd_test(enable = "avx512f")]
45486    unsafe fn test_mm512_roundscale_round_ps() {
45487        let a = _mm512_set1_ps(1.1);
45488        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
45489        let e = _mm512_set1_ps(1.0);
45490        assert_eq_m512(r, e);
45491    }
45492
45493    #[simd_test(enable = "avx512f")]
45494    unsafe fn test_mm512_mask_roundscale_round_ps() {
45495        let a = _mm512_set1_ps(1.1);
45496        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
45497        let e = _mm512_set1_ps(1.1);
45498        assert_eq_m512(r, e);
45499        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
45500            a,
45501            0b11111111_11111111,
45502            a,
45503        );
45504        let e = _mm512_set1_ps(1.0);
45505        assert_eq_m512(r, e);
45506    }
45507
45508    #[simd_test(enable = "avx512f")]
45509    unsafe fn test_mm512_maskz_roundscale_round_ps() {
45510        let a = _mm512_set1_ps(1.1);
45511        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
45512        assert_eq_m512(r, _mm512_setzero_ps());
45513        let r =
45514            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
45515        let e = _mm512_set1_ps(1.0);
45516        assert_eq_m512(r, e);
45517    }
45518
45519    #[simd_test(enable = "avx512f")]
45520    unsafe fn test_mm512_scalef_round_ps() {
45521        let a = _mm512_set1_ps(1.);
45522        let b = _mm512_set1_ps(3.);
45523        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
45524        let e = _mm512_set1_ps(8.);
45525        assert_eq_m512(r, e);
45526    }
45527
45528    #[simd_test(enable = "avx512f")]
45529    unsafe fn test_mm512_mask_scalef_round_ps() {
45530        let a = _mm512_set1_ps(1.);
45531        let b = _mm512_set1_ps(3.);
45532        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45533            a, 0, a, b,
45534        );
45535        assert_eq_m512(r, a);
45536        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45537            a,
45538            0b11111111_00000000,
45539            a,
45540            b,
45541        );
45542        let e = _mm512_set_ps(
45543            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45544        );
45545        assert_eq_m512(r, e);
45546    }
45547
45548    #[simd_test(enable = "avx512f")]
45549    unsafe fn test_mm512_maskz_scalef_round_ps() {
45550        let a = _mm512_set1_ps(1.);
45551        let b = _mm512_set1_ps(3.);
45552        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45553            0, a, b,
45554        );
45555        assert_eq_m512(r, _mm512_setzero_ps());
45556        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
45557            0b11111111_00000000,
45558            a,
45559            b,
45560        );
45561        let e = _mm512_set_ps(
45562            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45563        );
45564        assert_eq_m512(r, e);
45565    }
45566
45567    #[simd_test(enable = "avx512f")]
45568    unsafe fn test_mm512_fixupimm_round_ps() {
45569        let a = _mm512_set1_ps(f32::NAN);
45570        let b = _mm512_set1_ps(f32::MAX);
45571        let c = _mm512_set1_epi32(i32::MAX);
45572        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
45573        let e = _mm512_set1_ps(0.0);
45574        assert_eq_m512(r, e);
45575    }
45576
45577    #[simd_test(enable = "avx512f")]
45578    unsafe fn test_mm512_mask_fixupimm_round_ps() {
45579        #[rustfmt::skip]
45580        let a = _mm512_set_ps(
45581            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45582            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45583            1., 1., 1., 1.,
45584            1., 1., 1., 1.,
45585        );
45586        let b = _mm512_set1_ps(f32::MAX);
45587        let c = _mm512_set1_epi32(i32::MAX);
45588        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
45589            a,
45590            0b11111111_00000000,
45591            b,
45592            c,
45593        );
45594        let e = _mm512_set_ps(
45595            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45596        );
45597        assert_eq_m512(r, e);
45598    }
45599
45600    #[simd_test(enable = "avx512f")]
45601    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
45602        #[rustfmt::skip]
45603        let a = _mm512_set_ps(
45604            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45605            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
45606            1., 1., 1., 1.,
45607            1., 1., 1., 1.,
45608        );
45609        let b = _mm512_set1_ps(f32::MAX);
45610        let c = _mm512_set1_epi32(i32::MAX);
45611        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
45612            0b11111111_00000000,
45613            a,
45614            b,
45615            c,
45616        );
45617        let e = _mm512_set_ps(
45618            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
45619        );
45620        assert_eq_m512(r, e);
45621    }
45622
45623    #[simd_test(enable = "avx512f")]
45624    unsafe fn test_mm512_getmant_round_ps() {
45625        let a = _mm512_set1_ps(10.);
45626        let r = _mm512_getmant_round_ps::<
45627            _MM_MANT_NORM_1_2,
45628            _MM_MANT_SIGN_SRC,
45629            _MM_FROUND_CUR_DIRECTION,
45630        >(a);
45631        let e = _mm512_set1_ps(1.25);
45632        assert_eq_m512(r, e);
45633    }
45634
45635    #[simd_test(enable = "avx512f")]
45636    unsafe fn test_mm512_mask_getmant_round_ps() {
45637        let a = _mm512_set1_ps(10.);
45638        let r = _mm512_mask_getmant_round_ps::<
45639            _MM_MANT_NORM_1_2,
45640            _MM_MANT_SIGN_SRC,
45641            _MM_FROUND_CUR_DIRECTION,
45642        >(a, 0, a);
45643        assert_eq_m512(r, a);
45644        let r = _mm512_mask_getmant_round_ps::<
45645            _MM_MANT_NORM_1_2,
45646            _MM_MANT_SIGN_SRC,
45647            _MM_FROUND_CUR_DIRECTION,
45648        >(a, 0b11111111_00000000, a);
45649        let e = _mm512_setr_ps(
45650            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
45651        );
45652        assert_eq_m512(r, e);
45653    }
45654
45655    #[simd_test(enable = "avx512f")]
45656    unsafe fn test_mm512_maskz_getmant_round_ps() {
45657        let a = _mm512_set1_ps(10.);
45658        let r = _mm512_maskz_getmant_round_ps::<
45659            _MM_MANT_NORM_1_2,
45660            _MM_MANT_SIGN_SRC,
45661            _MM_FROUND_CUR_DIRECTION,
45662        >(0, a);
45663        assert_eq_m512(r, _mm512_setzero_ps());
45664        let r = _mm512_maskz_getmant_round_ps::<
45665            _MM_MANT_NORM_1_2,
45666            _MM_MANT_SIGN_SRC,
45667            _MM_FROUND_CUR_DIRECTION,
45668        >(0b11111111_00000000, a);
45669        let e = _mm512_setr_ps(
45670            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
45671        );
45672        assert_eq_m512(r, e);
45673    }
45674
45675    #[simd_test(enable = "avx512f")]
45676    unsafe fn test_mm512_cvtps_epi32() {
45677        let a = _mm512_setr_ps(
45678            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45679        );
45680        let r = _mm512_cvtps_epi32(a);
45681        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
45682        assert_eq_m512i(r, e);
45683    }
45684
45685    #[simd_test(enable = "avx512f")]
45686    unsafe fn test_mm512_mask_cvtps_epi32() {
45687        let a = _mm512_setr_ps(
45688            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45689        );
45690        let src = _mm512_set1_epi32(0);
45691        let r = _mm512_mask_cvtps_epi32(src, 0, a);
45692        assert_eq_m512i(r, src);
45693        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
45694        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
45695        assert_eq_m512i(r, e);
45696    }
45697
45698    #[simd_test(enable = "avx512f")]
45699    unsafe fn test_mm512_maskz_cvtps_epi32() {
45700        let a = _mm512_setr_ps(
45701            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45702        );
45703        let r = _mm512_maskz_cvtps_epi32(0, a);
45704        assert_eq_m512i(r, _mm512_setzero_si512());
45705        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
45706        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
45707        assert_eq_m512i(r, e);
45708    }
45709
45710    #[simd_test(enable = "avx512f,avx512vl")]
45711    unsafe fn test_mm256_mask_cvtps_epi32() {
45712        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45713        let src = _mm256_set1_epi32(0);
45714        let r = _mm256_mask_cvtps_epi32(src, 0, a);
45715        assert_eq_m256i(r, src);
45716        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
45717        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45718        assert_eq_m256i(r, e);
45719    }
45720
45721    #[simd_test(enable = "avx512f,avx512vl")]
45722    unsafe fn test_mm256_maskz_cvtps_epi32() {
45723        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45724        let r = _mm256_maskz_cvtps_epi32(0, a);
45725        assert_eq_m256i(r, _mm256_setzero_si256());
45726        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
45727        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45728        assert_eq_m256i(r, e);
45729    }
45730
45731    #[simd_test(enable = "avx512f,avx512vl")]
45732    unsafe fn test_mm_mask_cvtps_epi32() {
45733        let a = _mm_set_ps(12., 13.5, 14., 15.5);
45734        let src = _mm_set1_epi32(0);
45735        let r = _mm_mask_cvtps_epi32(src, 0, a);
45736        assert_eq_m128i(r, src);
45737        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
45738        let e = _mm_set_epi32(12, 14, 14, 16);
45739        assert_eq_m128i(r, e);
45740    }
45741
45742    #[simd_test(enable = "avx512f,avx512vl")]
45743    unsafe fn test_mm_maskz_cvtps_epi32() {
45744        let a = _mm_set_ps(12., 13.5, 14., 15.5);
45745        let r = _mm_maskz_cvtps_epi32(0, a);
45746        assert_eq_m128i(r, _mm_setzero_si128());
45747        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
45748        let e = _mm_set_epi32(12, 14, 14, 16);
45749        assert_eq_m128i(r, e);
45750    }
45751
45752    #[simd_test(enable = "avx512f")]
45753    unsafe fn test_mm512_cvtps_epu32() {
45754        let a = _mm512_setr_ps(
45755            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45756        );
45757        let r = _mm512_cvtps_epu32(a);
45758        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
45759        assert_eq_m512i(r, e);
45760    }
45761
45762    #[simd_test(enable = "avx512f")]
45763    unsafe fn test_mm512_mask_cvtps_epu32() {
45764        let a = _mm512_setr_ps(
45765            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45766        );
45767        let src = _mm512_set1_epi32(0);
45768        let r = _mm512_mask_cvtps_epu32(src, 0, a);
45769        assert_eq_m512i(r, src);
45770        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
45771        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
45772        assert_eq_m512i(r, e);
45773    }
45774
45775    #[simd_test(enable = "avx512f")]
45776    unsafe fn test_mm512_maskz_cvtps_epu32() {
45777        let a = _mm512_setr_ps(
45778            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
45779        );
45780        let r = _mm512_maskz_cvtps_epu32(0, a);
45781        assert_eq_m512i(r, _mm512_setzero_si512());
45782        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
45783        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
45784        assert_eq_m512i(r, e);
45785    }
45786
45787    #[simd_test(enable = "avx512f,avx512vl")]
45788    unsafe fn test_mm256_cvtps_epu32() {
45789        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45790        let r = _mm256_cvtps_epu32(a);
45791        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45792        assert_eq_m256i(r, e);
45793    }
45794
45795    #[simd_test(enable = "avx512f,avx512vl")]
45796    unsafe fn test_mm256_mask_cvtps_epu32() {
45797        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45798        let src = _mm256_set1_epi32(0);
45799        let r = _mm256_mask_cvtps_epu32(src, 0, a);
45800        assert_eq_m256i(r, src);
45801        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
45802        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45803        assert_eq_m256i(r, e);
45804    }
45805
45806    #[simd_test(enable = "avx512f,avx512vl")]
45807    unsafe fn test_mm256_maskz_cvtps_epu32() {
45808        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
45809        let r = _mm256_maskz_cvtps_epu32(0, a);
45810        assert_eq_m256i(r, _mm256_setzero_si256());
45811        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
45812        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
45813        assert_eq_m256i(r, e);
45814    }
45815
45816    #[simd_test(enable = "avx512f,avx512vl")]
45817    unsafe fn test_mm_cvtps_epu32() {
45818        let a = _mm_set_ps(12., 13.5, 14., 15.5);
45819        let r = _mm_cvtps_epu32(a);
45820        let e = _mm_set_epi32(12, 14, 14, 16);
45821        assert_eq_m128i(r, e);
45822    }
45823
45824    #[simd_test(enable = "avx512f,avx512vl")]
45825    unsafe fn test_mm_mask_cvtps_epu32() {
45826        let a = _mm_set_ps(12., 13.5, 14., 15.5);
45827        let src = _mm_set1_epi32(0);
45828        let r = _mm_mask_cvtps_epu32(src, 0, a);
45829        assert_eq_m128i(r, src);
45830        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
45831        let e = _mm_set_epi32(12, 14, 14, 16);
45832        assert_eq_m128i(r, e);
45833    }
45834
45835    #[simd_test(enable = "avx512f,avx512vl")]
45836    unsafe fn test_mm_maskz_cvtps_epu32() {
45837        let a = _mm_set_ps(12., 13.5, 14., 15.5);
45838        let r = _mm_maskz_cvtps_epu32(0, a);
45839        assert_eq_m128i(r, _mm_setzero_si128());
45840        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
45841        let e = _mm_set_epi32(12, 14, 14, 16);
45842        assert_eq_m128i(r, e);
45843    }
45844
45845    #[simd_test(enable = "avx512f")]
45846    unsafe fn test_mm512_cvtepi8_epi32() {
45847        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45848        let r = _mm512_cvtepi8_epi32(a);
45849        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45850        assert_eq_m512i(r, e);
45851    }
45852
45853    #[simd_test(enable = "avx512f")]
45854    unsafe fn test_mm512_mask_cvtepi8_epi32() {
45855        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45856        let src = _mm512_set1_epi32(-1);
45857        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
45858        assert_eq_m512i(r, src);
45859        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
45860        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
45861        assert_eq_m512i(r, e);
45862    }
45863
45864    #[simd_test(enable = "avx512f")]
45865    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
45866        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45867        let r = _mm512_maskz_cvtepi8_epi32(0, a);
45868        assert_eq_m512i(r, _mm512_setzero_si512());
45869        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
45870        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
45871        assert_eq_m512i(r, e);
45872    }
45873
45874    #[simd_test(enable = "avx512f,avx512vl")]
45875    unsafe fn test_mm256_mask_cvtepi8_epi32() {
45876        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45877        let src = _mm256_set1_epi32(-1);
45878        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
45879        assert_eq_m256i(r, src);
45880        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
45881        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
45882        assert_eq_m256i(r, e);
45883    }
45884
45885    #[simd_test(enable = "avx512f,avx512vl")]
45886    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
45887        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45888        let r = _mm256_maskz_cvtepi8_epi32(0, a);
45889        assert_eq_m256i(r, _mm256_setzero_si256());
45890        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
45891        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
45892        assert_eq_m256i(r, e);
45893    }
45894
45895    #[simd_test(enable = "avx512f,avx512vl")]
45896    unsafe fn test_mm_mask_cvtepi8_epi32() {
45897        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45898        let src = _mm_set1_epi32(-1);
45899        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
45900        assert_eq_m128i(r, src);
45901        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
45902        let e = _mm_set_epi32(12, 13, 14, 15);
45903        assert_eq_m128i(r, e);
45904    }
45905
45906    #[simd_test(enable = "avx512f,avx512vl")]
45907    unsafe fn test_mm_maskz_cvtepi8_epi32() {
45908        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45909        let r = _mm_maskz_cvtepi8_epi32(0, a);
45910        assert_eq_m128i(r, _mm_setzero_si128());
45911        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
45912        let e = _mm_set_epi32(12, 13, 14, 15);
45913        assert_eq_m128i(r, e);
45914    }
45915
45916    #[simd_test(enable = "avx512f")]
45917    unsafe fn test_mm512_cvtepu8_epi32() {
45918        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45919        let r = _mm512_cvtepu8_epi32(a);
45920        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45921        assert_eq_m512i(r, e);
45922    }
45923
45924    #[simd_test(enable = "avx512f")]
45925    unsafe fn test_mm512_mask_cvtepu8_epi32() {
45926        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45927        let src = _mm512_set1_epi32(-1);
45928        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
45929        assert_eq_m512i(r, src);
45930        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
45931        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
45932        assert_eq_m512i(r, e);
45933    }
45934
45935    #[simd_test(enable = "avx512f")]
45936    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
45937        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45938        let r = _mm512_maskz_cvtepu8_epi32(0, a);
45939        assert_eq_m512i(r, _mm512_setzero_si512());
45940        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
45941        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
45942        assert_eq_m512i(r, e);
45943    }
45944
45945    #[simd_test(enable = "avx512f,avx512vl")]
45946    unsafe fn test_mm256_mask_cvtepu8_epi32() {
45947        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45948        let src = _mm256_set1_epi32(-1);
45949        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
45950        assert_eq_m256i(r, src);
45951        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
45952        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
45953        assert_eq_m256i(r, e);
45954    }
45955
45956    #[simd_test(enable = "avx512f,avx512vl")]
45957    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
45958        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45959        let r = _mm256_maskz_cvtepu8_epi32(0, a);
45960        assert_eq_m256i(r, _mm256_setzero_si256());
45961        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
45962        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
45963        assert_eq_m256i(r, e);
45964    }
45965
45966    #[simd_test(enable = "avx512f,avx512vl")]
45967    unsafe fn test_mm_mask_cvtepu8_epi32() {
45968        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45969        let src = _mm_set1_epi32(-1);
45970        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
45971        assert_eq_m128i(r, src);
45972        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
45973        let e = _mm_set_epi32(12, 13, 14, 15);
45974        assert_eq_m128i(r, e);
45975    }
45976
45977    #[simd_test(enable = "avx512f,avx512vl")]
45978    unsafe fn test_mm_maskz_cvtepu8_epi32() {
45979        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45980        let r = _mm_maskz_cvtepu8_epi32(0, a);
45981        assert_eq_m128i(r, _mm_setzero_si128());
45982        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
45983        let e = _mm_set_epi32(12, 13, 14, 15);
45984        assert_eq_m128i(r, e);
45985    }
45986
45987    #[simd_test(enable = "avx512f")]
45988    unsafe fn test_mm512_cvtepi16_epi32() {
45989        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45990        let r = _mm512_cvtepi16_epi32(a);
45991        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45992        assert_eq_m512i(r, e);
45993    }
45994
45995    #[simd_test(enable = "avx512f")]
45996    unsafe fn test_mm512_mask_cvtepi16_epi32() {
45997        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
45998        let src = _mm512_set1_epi32(-1);
45999        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
46000        assert_eq_m512i(r, src);
46001        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
46002        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46003        assert_eq_m512i(r, e);
46004    }
46005
46006    #[simd_test(enable = "avx512f")]
46007    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
46008        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46009        let r = _mm512_maskz_cvtepi16_epi32(0, a);
46010        assert_eq_m512i(r, _mm512_setzero_si512());
46011        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
46012        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46013        assert_eq_m512i(r, e);
46014    }
46015
46016    #[simd_test(enable = "avx512f,avx512vl")]
46017    unsafe fn test_mm256_mask_cvtepi16_epi32() {
46018        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46019        let src = _mm256_set1_epi32(-1);
46020        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
46021        assert_eq_m256i(r, src);
46022        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
46023        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46024        assert_eq_m256i(r, e);
46025    }
46026
46027    #[simd_test(enable = "avx512f,avx512vl")]
46028    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
46029        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46030        let r = _mm256_maskz_cvtepi16_epi32(0, a);
46031        assert_eq_m256i(r, _mm256_setzero_si256());
46032        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
46033        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46034        assert_eq_m256i(r, e);
46035    }
46036
46037    #[simd_test(enable = "avx512f,avx512vl")]
46038    unsafe fn test_mm_mask_cvtepi16_epi32() {
46039        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46040        let src = _mm_set1_epi32(-1);
46041        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
46042        assert_eq_m128i(r, src);
46043        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
46044        let e = _mm_set_epi32(4, 5, 6, 7);
46045        assert_eq_m128i(r, e);
46046    }
46047
46048    #[simd_test(enable = "avx512f,avx512vl")]
46049    unsafe fn test_mm_maskz_cvtepi16_epi32() {
46050        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46051        let r = _mm_maskz_cvtepi16_epi32(0, a);
46052        assert_eq_m128i(r, _mm_setzero_si128());
46053        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
46054        let e = _mm_set_epi32(4, 5, 6, 7);
46055        assert_eq_m128i(r, e);
46056    }
46057
46058    #[simd_test(enable = "avx512f")]
46059    unsafe fn test_mm512_cvtepu16_epi32() {
46060        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46061        let r = _mm512_cvtepu16_epi32(a);
46062        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46063        assert_eq_m512i(r, e);
46064    }
46065
46066    #[simd_test(enable = "avx512f")]
46067    unsafe fn test_mm512_mask_cvtepu16_epi32() {
46068        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46069        let src = _mm512_set1_epi32(-1);
46070        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
46071        assert_eq_m512i(r, src);
46072        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
46073        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46074        assert_eq_m512i(r, e);
46075    }
46076
46077    #[simd_test(enable = "avx512f")]
46078    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
46079        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46080        let r = _mm512_maskz_cvtepu16_epi32(0, a);
46081        assert_eq_m512i(r, _mm512_setzero_si512());
46082        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
46083        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46084        assert_eq_m512i(r, e);
46085    }
46086
46087    #[simd_test(enable = "avx512f,avx512vl")]
46088    unsafe fn test_mm256_mask_cvtepu16_epi32() {
46089        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46090        let src = _mm256_set1_epi32(-1);
46091        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
46092        assert_eq_m256i(r, src);
46093        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
46094        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46095        assert_eq_m256i(r, e);
46096    }
46097
46098    #[simd_test(enable = "avx512f,avx512vl")]
46099    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
46100        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46101        let r = _mm256_maskz_cvtepu16_epi32(0, a);
46102        assert_eq_m256i(r, _mm256_setzero_si256());
46103        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
46104        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
46105        assert_eq_m256i(r, e);
46106    }
46107
46108    #[simd_test(enable = "avx512f,avx512vl")]
46109    unsafe fn test_mm_mask_cvtepu16_epi32() {
46110        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46111        let src = _mm_set1_epi32(-1);
46112        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
46113        assert_eq_m128i(r, src);
46114        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
46115        let e = _mm_set_epi32(12, 13, 14, 15);
46116        assert_eq_m128i(r, e);
46117    }
46118
46119    #[simd_test(enable = "avx512f,avx512vl")]
46120    unsafe fn test_mm_maskz_cvtepu16_epi32() {
46121        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
46122        let r = _mm_maskz_cvtepu16_epi32(0, a);
46123        assert_eq_m128i(r, _mm_setzero_si128());
46124        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
46125        let e = _mm_set_epi32(12, 13, 14, 15);
46126        assert_eq_m128i(r, e);
46127    }
46128
46129    #[simd_test(enable = "avx512f")]
46130    unsafe fn test_mm512_cvtepi32_ps() {
46131        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46132        let r = _mm512_cvtepi32_ps(a);
46133        let e = _mm512_set_ps(
46134            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46135        );
46136        assert_eq_m512(r, e);
46137    }
46138
46139    #[simd_test(enable = "avx512f")]
46140    unsafe fn test_mm512_mask_cvtepi32_ps() {
46141        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46142        let src = _mm512_set1_ps(-1.);
46143        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
46144        assert_eq_m512(r, src);
46145        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
46146        let e = _mm512_set_ps(
46147            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
46148        );
46149        assert_eq_m512(r, e);
46150    }
46151
46152    #[simd_test(enable = "avx512f")]
46153    unsafe fn test_mm512_maskz_cvtepi32_ps() {
46154        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46155        let r = _mm512_maskz_cvtepi32_ps(0, a);
46156        assert_eq_m512(r, _mm512_setzero_ps());
46157        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
46158        let e = _mm512_set_ps(
46159            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
46160        );
46161        assert_eq_m512(r, e);
46162    }
46163
46164    #[simd_test(enable = "avx512f,avx512vl")]
46165    unsafe fn test_mm256_mask_cvtepi32_ps() {
46166        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46167        let src = _mm256_set1_ps(-1.);
46168        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
46169        assert_eq_m256(r, src);
46170        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
46171        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46172        assert_eq_m256(r, e);
46173    }
46174
46175    #[simd_test(enable = "avx512f,avx512vl")]
46176    unsafe fn test_mm256_maskz_cvtepi32_ps() {
46177        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46178        let r = _mm256_maskz_cvtepi32_ps(0, a);
46179        assert_eq_m256(r, _mm256_setzero_ps());
46180        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
46181        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46182        assert_eq_m256(r, e);
46183    }
46184
46185    #[simd_test(enable = "avx512f,avx512vl")]
46186    unsafe fn test_mm_mask_cvtepi32_ps() {
46187        let a = _mm_set_epi32(1, 2, 3, 4);
46188        let src = _mm_set1_ps(-1.);
46189        let r = _mm_mask_cvtepi32_ps(src, 0, a);
46190        assert_eq_m128(r, src);
46191        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
46192        let e = _mm_set_ps(1., 2., 3., 4.);
46193        assert_eq_m128(r, e);
46194    }
46195
46196    #[simd_test(enable = "avx512f,avx512vl")]
46197    unsafe fn test_mm_maskz_cvtepi32_ps() {
46198        let a = _mm_set_epi32(1, 2, 3, 4);
46199        let r = _mm_maskz_cvtepi32_ps(0, a);
46200        assert_eq_m128(r, _mm_setzero_ps());
46201        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
46202        let e = _mm_set_ps(1., 2., 3., 4.);
46203        assert_eq_m128(r, e);
46204    }
46205
46206    #[simd_test(enable = "avx512f")]
46207    unsafe fn test_mm512_cvtepu32_ps() {
46208        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46209        let r = _mm512_cvtepu32_ps(a);
46210        let e = _mm512_set_ps(
46211            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46212        );
46213        assert_eq_m512(r, e);
46214    }
46215
46216    #[simd_test(enable = "avx512f")]
46217    unsafe fn test_mm512_mask_cvtepu32_ps() {
46218        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46219        let src = _mm512_set1_ps(-1.);
46220        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
46221        assert_eq_m512(r, src);
46222        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
46223        let e = _mm512_set_ps(
46224            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
46225        );
46226        assert_eq_m512(r, e);
46227    }
46228
46229    #[simd_test(enable = "avx512f")]
46230    unsafe fn test_mm512_maskz_cvtepu32_ps() {
46231        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46232        let r = _mm512_maskz_cvtepu32_ps(0, a);
46233        assert_eq_m512(r, _mm512_setzero_ps());
46234        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
46235        let e = _mm512_set_ps(
46236            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
46237        );
46238        assert_eq_m512(r, e);
46239    }
46240
46241    #[simd_test(enable = "avx512f")]
46242    unsafe fn test_mm512_cvtepi32_epi16() {
46243        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46244        let r = _mm512_cvtepi32_epi16(a);
46245        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46246        assert_eq_m256i(r, e);
46247    }
46248
46249    #[simd_test(enable = "avx512f")]
46250    unsafe fn test_mm512_mask_cvtepi32_epi16() {
46251        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46252        let src = _mm256_set1_epi16(-1);
46253        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
46254        assert_eq_m256i(r, src);
46255        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
46256        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46257        assert_eq_m256i(r, e);
46258    }
46259
46260    #[simd_test(enable = "avx512f")]
46261    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
46262        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46263        let r = _mm512_maskz_cvtepi32_epi16(0, a);
46264        assert_eq_m256i(r, _mm256_setzero_si256());
46265        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
46266        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46267        assert_eq_m256i(r, e);
46268    }
46269
46270    #[simd_test(enable = "avx512f,avx512vl")]
46271    unsafe fn test_mm256_cvtepi32_epi16() {
46272        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46273        let r = _mm256_cvtepi32_epi16(a);
46274        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46275        assert_eq_m128i(r, e);
46276    }
46277
46278    #[simd_test(enable = "avx512f,avx512vl")]
46279    unsafe fn test_mm256_mask_cvtepi32_epi16() {
46280        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46281        let src = _mm_set1_epi16(-1);
46282        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
46283        assert_eq_m128i(r, src);
46284        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
46285        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46286        assert_eq_m128i(r, e);
46287    }
46288
46289    #[simd_test(enable = "avx512f,avx512vl")]
46290    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
46291        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46292        let r = _mm256_maskz_cvtepi32_epi16(0, a);
46293        assert_eq_m128i(r, _mm_setzero_si128());
46294        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
46295        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46296        assert_eq_m128i(r, e);
46297    }
46298
46299    #[simd_test(enable = "avx512f,avx512vl")]
46300    unsafe fn test_mm_cvtepi32_epi16() {
46301        let a = _mm_set_epi32(4, 5, 6, 7);
46302        let r = _mm_cvtepi32_epi16(a);
46303        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46304        assert_eq_m128i(r, e);
46305    }
46306
46307    #[simd_test(enable = "avx512f,avx512vl")]
46308    unsafe fn test_mm_mask_cvtepi32_epi16() {
46309        let a = _mm_set_epi32(4, 5, 6, 7);
46310        let src = _mm_set1_epi16(0);
46311        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
46312        assert_eq_m128i(r, src);
46313        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
46314        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46315        assert_eq_m128i(r, e);
46316    }
46317
46318    #[simd_test(enable = "avx512f,avx512vl")]
46319    unsafe fn test_mm_maskz_cvtepi32_epi16() {
46320        let a = _mm_set_epi32(4, 5, 6, 7);
46321        let r = _mm_maskz_cvtepi32_epi16(0, a);
46322        assert_eq_m128i(r, _mm_setzero_si128());
46323        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
46324        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46325        assert_eq_m128i(r, e);
46326    }
46327
46328    #[simd_test(enable = "avx512f")]
46329    unsafe fn test_mm512_cvtepi32_epi8() {
46330        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46331        let r = _mm512_cvtepi32_epi8(a);
46332        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46333        assert_eq_m128i(r, e);
46334    }
46335
46336    #[simd_test(enable = "avx512f")]
46337    unsafe fn test_mm512_mask_cvtepi32_epi8() {
46338        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46339        let src = _mm_set1_epi8(-1);
46340        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
46341        assert_eq_m128i(r, src);
46342        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
46343        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46344        assert_eq_m128i(r, e);
46345    }
46346
46347    #[simd_test(enable = "avx512f")]
46348    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
46349        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46350        let r = _mm512_maskz_cvtepi32_epi8(0, a);
46351        assert_eq_m128i(r, _mm_setzero_si128());
46352        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
46353        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
46354        assert_eq_m128i(r, e);
46355    }
46356
46357    #[simd_test(enable = "avx512f,avx512vl")]
46358    unsafe fn test_mm256_cvtepi32_epi8() {
46359        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46360        let r = _mm256_cvtepi32_epi8(a);
46361        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
46362        assert_eq_m128i(r, e);
46363    }
46364
46365    #[simd_test(enable = "avx512f,avx512vl")]
46366    unsafe fn test_mm256_mask_cvtepi32_epi8() {
46367        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46368        let src = _mm_set1_epi8(0);
46369        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
46370        assert_eq_m128i(r, src);
46371        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
46372        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
46373        assert_eq_m128i(r, e);
46374    }
46375
46376    #[simd_test(enable = "avx512f,avx512vl")]
46377    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
46378        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46379        let r = _mm256_maskz_cvtepi32_epi8(0, a);
46380        assert_eq_m128i(r, _mm_setzero_si128());
46381        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
46382        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
46383        assert_eq_m128i(r, e);
46384    }
46385
46386    #[simd_test(enable = "avx512f,avx512vl")]
46387    unsafe fn test_mm_cvtepi32_epi8() {
46388        let a = _mm_set_epi32(4, 5, 6, 7);
46389        let r = _mm_cvtepi32_epi8(a);
46390        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
46391        assert_eq_m128i(r, e);
46392    }
46393
46394    #[simd_test(enable = "avx512f,avx512vl")]
46395    unsafe fn test_mm_mask_cvtepi32_epi8() {
46396        let a = _mm_set_epi32(4, 5, 6, 7);
46397        let src = _mm_set1_epi8(0);
46398        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
46399        assert_eq_m128i(r, src);
46400        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
46401        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
46402        assert_eq_m128i(r, e);
46403    }
46404
46405    #[simd_test(enable = "avx512f,avx512vl")]
46406    unsafe fn test_mm_maskz_cvtepi32_epi8() {
46407        let a = _mm_set_epi32(4, 5, 6, 7);
46408        let r = _mm_maskz_cvtepi32_epi8(0, a);
46409        assert_eq_m128i(r, _mm_setzero_si128());
46410        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
46411        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
46412        assert_eq_m128i(r, e);
46413    }
46414
46415    #[simd_test(enable = "avx512f")]
46416    unsafe fn test_mm512_cvtsepi32_epi16() {
46417        #[rustfmt::skip]
46418        let a = _mm512_set_epi32(
46419            0, 1, 2, 3,
46420            4, 5, 6, 7,
46421            8, 9, 10, 11,
46422            12, 13, i32::MIN, i32::MAX,
46423        );
46424        let r = _mm512_cvtsepi32_epi16(a);
46425        #[rustfmt::skip]
46426        let e = _mm256_set_epi16(
46427            0, 1, 2, 3,
46428            4, 5, 6, 7,
46429            8, 9, 10, 11,
46430            12, 13, i16::MIN, i16::MAX,
46431        );
46432        assert_eq_m256i(r, e);
46433    }
46434
46435    #[simd_test(enable = "avx512f")]
46436    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
46437        #[rustfmt::skip]
46438        let a = _mm512_set_epi32(
46439            0, 1, 2, 3,
46440            4, 5, 6, 7,
46441            8, 9, 10, 11,
46442            12, 13, i32::MIN, i32::MAX,
46443        );
46444        let src = _mm256_set1_epi16(-1);
46445        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
46446        assert_eq_m256i(r, src);
46447        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
46448        #[rustfmt::skip]
46449        let e = _mm256_set_epi16(
46450            -1, -1, -1, -1,
46451            -1, -1, -1, -1,
46452            8, 9, 10, 11,
46453            12, 13, i16::MIN, i16::MAX,
46454        );
46455        assert_eq_m256i(r, e);
46456    }
46457
46458    #[simd_test(enable = "avx512f")]
46459    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
46460        #[rustfmt::skip]
46461        let a = _mm512_set_epi32(
46462            0, 1, 2, 3,
46463            4, 5, 6, 7,
46464            8, 9, 10, 11,
46465            12, 13, i32::MIN, i32::MAX,
46466        );
46467        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
46468        assert_eq_m256i(r, _mm256_setzero_si256());
46469        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
46470        #[rustfmt::skip]
46471        let e = _mm256_set_epi16(
46472            0, 0, 0, 0,
46473            0, 0, 0, 0,
46474            8, 9, 10, 11,
46475            12, 13, i16::MIN, i16::MAX,
46476        );
46477        assert_eq_m256i(r, e);
46478    }
46479
46480    #[simd_test(enable = "avx512f,avx512vl")]
46481    unsafe fn test_mm256_cvtsepi32_epi16() {
46482        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46483        let r = _mm256_cvtsepi32_epi16(a);
46484        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46485        assert_eq_m128i(r, e);
46486    }
46487
46488    #[simd_test(enable = "avx512f,avx512vl")]
46489    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
46490        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46491        let src = _mm_set1_epi16(-1);
46492        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
46493        assert_eq_m128i(r, src);
46494        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
46495        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46496        assert_eq_m128i(r, e);
46497    }
46498
46499    #[simd_test(enable = "avx512f,avx512vl")]
46500    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
46501        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46502        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
46503        assert_eq_m128i(r, _mm_setzero_si128());
46504        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
46505        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
46506        assert_eq_m128i(r, e);
46507    }
46508
46509    #[simd_test(enable = "avx512f,avx512vl")]
46510    unsafe fn test_mm_cvtsepi32_epi16() {
46511        let a = _mm_set_epi32(4, 5, 6, 7);
46512        let r = _mm_cvtsepi32_epi16(a);
46513        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46514        assert_eq_m128i(r, e);
46515    }
46516
46517    #[simd_test(enable = "avx512f,avx512vl")]
46518    unsafe fn test_mm_mask_cvtsepi32_epi16() {
46519        let a = _mm_set_epi32(4, 5, 6, 7);
46520        let src = _mm_set1_epi16(0);
46521        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
46522        assert_eq_m128i(r, src);
46523        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
46524        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46525        assert_eq_m128i(r, e);
46526    }
46527
46528    #[simd_test(enable = "avx512f,avx512vl")]
46529    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
46530        let a = _mm_set_epi32(4, 5, 6, 7);
46531        let r = _mm_maskz_cvtsepi32_epi16(0, a);
46532        assert_eq_m128i(r, _mm_setzero_si128());
46533        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
46534        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
46535        assert_eq_m128i(r, e);
46536    }
46537
46538    #[simd_test(enable = "avx512f")]
46539    unsafe fn test_mm512_cvtsepi32_epi8() {
46540        #[rustfmt::skip]
46541        let a = _mm512_set_epi32(
46542            0, 1, 2, 3,
46543            4, 5, 6, 7,
46544            8, 9, 10, 11,
46545            12, 13, i32::MIN, i32::MAX,
46546        );
46547        let r = _mm512_cvtsepi32_epi8(a);
46548        #[rustfmt::skip]
46549        let e = _mm_set_epi8(
46550            0, 1, 2, 3,
46551            4, 5, 6, 7,
46552            8, 9, 10, 11,
46553            12, 13, i8::MIN, i8::MAX,
46554        );
46555        assert_eq_m128i(r, e);
46556    }
46557
46558    #[simd_test(enable = "avx512f")]
46559    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
46560        #[rustfmt::skip]
46561        let a = _mm512_set_epi32(
46562            0, 1, 2, 3,
46563            4, 5, 6, 7,
46564            8, 9, 10, 11,
46565            12, 13, i32::MIN, i32::MAX,
46566        );
46567        let src = _mm_set1_epi8(-1);
46568        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
46569        assert_eq_m128i(r, src);
46570        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
46571        #[rustfmt::skip]
46572        let e = _mm_set_epi8(
46573            -1, -1, -1, -1,
46574            -1, -1, -1, -1,
46575            8, 9, 10, 11,
46576            12, 13, i8::MIN, i8::MAX,
46577        );
46578        assert_eq_m128i(r, e);
46579    }
46580
46581    #[simd_test(enable = "avx512f")]
46582    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
46583        #[rustfmt::skip]
46584        let a = _mm512_set_epi32(
46585            0, 1, 2, 3,
46586            4, 5, 6, 7,
46587            8, 9, 10, 11,
46588            12, 13, i32::MIN, i32::MAX,
46589        );
46590        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
46591        assert_eq_m128i(r, _mm_setzero_si128());
46592        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
46593        #[rustfmt::skip]
46594        let e = _mm_set_epi8(
46595            0, 0, 0, 0,
46596            0, 0, 0, 0,
46597            8, 9, 10, 11,
46598            12, 13, i8::MIN, i8::MAX,
46599        );
46600        assert_eq_m128i(r, e);
46601    }
46602
46603    #[simd_test(enable = "avx512f,avx512vl")]
46604    unsafe fn test_mm256_cvtsepi32_epi8() {
46605        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
46606        let r = _mm256_cvtsepi32_epi8(a);
46607        #[rustfmt::skip]
46608        let e = _mm_set_epi8(
46609            0, 0, 0, 0,
46610            0, 0, 0, 0,
46611            9, 10, 11, 12,
46612            13, 14, 15, 16,
46613        );
46614        assert_eq_m128i(r, e);
46615    }
46616
46617    #[simd_test(enable = "avx512f,avx512vl")]
46618    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
46619        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
46620        let src = _mm_set1_epi8(0);
46621        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
46622        assert_eq_m128i(r, src);
46623        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
46624        #[rustfmt::skip]
46625        let e = _mm_set_epi8(
46626            0, 0, 0, 0,
46627            0, 0, 0, 0,
46628            9, 10, 11, 12,
46629            13, 14, 15, 16,
46630        );
46631        assert_eq_m128i(r, e);
46632    }
46633
46634    #[simd_test(enable = "avx512f,avx512vl")]
46635    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
46636        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
46637        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
46638        assert_eq_m128i(r, _mm_setzero_si128());
46639        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
46640        #[rustfmt::skip]
46641        let e = _mm_set_epi8(
46642            0, 0, 0, 0,
46643            0, 0, 0, 0,
46644            9, 10, 11, 12,
46645            13, 14, 15, 16,
46646        );
46647        assert_eq_m128i(r, e);
46648    }
46649
46650    #[simd_test(enable = "avx512f,avx512vl")]
46651    unsafe fn test_mm_cvtsepi32_epi8() {
46652        let a = _mm_set_epi32(13, 14, 15, 16);
46653        let r = _mm_cvtsepi32_epi8(a);
46654        #[rustfmt::skip]
46655        let e = _mm_set_epi8(
46656            0, 0, 0, 0,
46657            0, 0, 0, 0,
46658            0, 0, 0, 0,
46659            13, 14, 15, 16,
46660        );
46661        assert_eq_m128i(r, e);
46662    }
46663
46664    #[simd_test(enable = "avx512f,avx512vl")]
46665    unsafe fn test_mm_mask_cvtsepi32_epi8() {
46666        let a = _mm_set_epi32(13, 14, 15, 16);
46667        let src = _mm_set1_epi8(0);
46668        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
46669        assert_eq_m128i(r, src);
46670        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
46671        #[rustfmt::skip]
46672        let e = _mm_set_epi8(
46673            0, 0, 0, 0,
46674            0, 0, 0, 0,
46675            0, 0, 0, 0,
46676            13, 14, 15, 16,
46677        );
46678        assert_eq_m128i(r, e);
46679    }
46680
46681    #[simd_test(enable = "avx512f,avx512vl")]
46682    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
46683        let a = _mm_set_epi32(13, 14, 15, 16);
46684        let r = _mm_maskz_cvtsepi32_epi8(0, a);
46685        assert_eq_m128i(r, _mm_setzero_si128());
46686        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
46687        #[rustfmt::skip]
46688        let e = _mm_set_epi8(
46689            0, 0, 0, 0,
46690            0, 0, 0, 0,
46691            0, 0, 0, 0,
46692            13, 14, 15, 16,
46693        );
46694        assert_eq_m128i(r, e);
46695    }
46696
46697    #[simd_test(enable = "avx512f")]
46698    unsafe fn test_mm512_cvtusepi32_epi16() {
46699        #[rustfmt::skip]
46700        let a = _mm512_set_epi32(
46701            0, 1, 2, 3,
46702            4, 5, 6, 7,
46703            8, 9, 10, 11,
46704            12, 13, i32::MIN, i32::MIN,
46705        );
46706        let r = _mm512_cvtusepi32_epi16(a);
46707        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
46708        assert_eq_m256i(r, e);
46709    }
46710
46711    #[simd_test(enable = "avx512f")]
46712    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
46713        #[rustfmt::skip]
46714        let a = _mm512_set_epi32(
46715            0, 1, 2, 3,
46716            4, 5, 6, 7,
46717            8, 9, 10, 11,
46718            12, 13, i32::MIN, i32::MIN,
46719        );
46720        let src = _mm256_set1_epi16(-1);
46721        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
46722        assert_eq_m256i(r, src);
46723        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
46724        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
46725        assert_eq_m256i(r, e);
46726    }
46727
46728    #[simd_test(enable = "avx512f")]
46729    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
46730        #[rustfmt::skip]
46731        let a = _mm512_set_epi32(
46732            0, 1, 2, 3,
46733            4, 5, 6, 7,
46734            8, 9, 10, 11,
46735            12, 13, i32::MIN, i32::MIN,
46736        );
46737        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
46738        assert_eq_m256i(r, _mm256_setzero_si256());
46739        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
46740        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
46741        assert_eq_m256i(r, e);
46742    }
46743
46744    #[simd_test(enable = "avx512f,avx512vl")]
46745    unsafe fn test_mm256_cvtusepi32_epi16() {
46746        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46747        let r = _mm256_cvtusepi32_epi16(a);
46748        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
46749        assert_eq_m128i(r, e);
46750    }
46751
46752    #[simd_test(enable = "avx512f,avx512vl")]
46753    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
46754        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46755        let src = _mm_set1_epi16(0);
46756        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
46757        assert_eq_m128i(r, src);
46758        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
46759        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
46760        assert_eq_m128i(r, e);
46761    }
46762
46763    #[simd_test(enable = "avx512f,avx512vl")]
46764    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
46765        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
46766        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
46767        assert_eq_m128i(r, _mm_setzero_si128());
46768        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
46769        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
46770        assert_eq_m128i(r, e);
46771    }
46772
46773    #[simd_test(enable = "avx512f,avx512vl")]
46774    unsafe fn test_mm_cvtusepi32_epi16() {
46775        let a = _mm_set_epi32(5, 6, 7, 8);
46776        let r = _mm_cvtusepi32_epi16(a);
46777        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
46778        assert_eq_m128i(r, e);
46779    }
46780
46781    #[simd_test(enable = "avx512f,avx512vl")]
46782    unsafe fn test_mm_mask_cvtusepi32_epi16() {
46783        let a = _mm_set_epi32(5, 6, 7, 8);
46784        let src = _mm_set1_epi16(0);
46785        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
46786        assert_eq_m128i(r, src);
46787        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
46788        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
46789        assert_eq_m128i(r, e);
46790    }
46791
46792    #[simd_test(enable = "avx512f,avx512vl")]
46793    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
46794        let a = _mm_set_epi32(5, 6, 7, 8);
46795        let r = _mm_maskz_cvtusepi32_epi16(0, a);
46796        assert_eq_m128i(r, _mm_setzero_si128());
46797        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
46798        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
46799        assert_eq_m128i(r, e);
46800    }
46801
46802    #[simd_test(enable = "avx512f")]
46803    unsafe fn test_mm512_cvtusepi32_epi8() {
46804        #[rustfmt::skip]
46805        let a = _mm512_set_epi32(
46806            0, 1, 2, 3,
46807            4, 5, 6, 7,
46808            8, 9, 10, 11,
46809            12, 13, i32::MIN, i32::MIN,
46810        );
46811        let r = _mm512_cvtusepi32_epi8(a);
46812        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
46813        assert_eq_m128i(r, e);
46814    }
46815
46816    #[simd_test(enable = "avx512f")]
46817    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
46818        #[rustfmt::skip]
46819        let a = _mm512_set_epi32(
46820            0, 1, 2, 3,
46821            4, 5, 6, 7,
46822            8, 9, 10, 11,
46823            12, 13, i32::MIN, i32::MIN,
46824        );
46825        let src = _mm_set1_epi8(-1);
46826        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
46827        assert_eq_m128i(r, src);
46828        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
46829        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
46830        assert_eq_m128i(r, e);
46831    }
46832
46833    #[simd_test(enable = "avx512f")]
46834    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
46835        #[rustfmt::skip]
46836        let a = _mm512_set_epi32(
46837            0, 1, 2, 3,
46838            4, 5, 6, 7,
46839            8, 9, 10, 11,
46840            12, 13, i32::MIN, i32::MIN,
46841        );
46842        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
46843        assert_eq_m128i(r, _mm_setzero_si128());
46844        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
46845        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
46846        assert_eq_m128i(r, e);
46847    }
46848
46849    #[simd_test(enable = "avx512f,avx512vl")]
46850    unsafe fn test_mm256_cvtusepi32_epi8() {
46851        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
46852        let r = _mm256_cvtusepi32_epi8(a);
46853        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
46854        assert_eq_m128i(r, e);
46855    }
46856
46857    #[simd_test(enable = "avx512f,avx512vl")]
46858    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
46859        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
46860        let src = _mm_set1_epi8(0);
46861        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
46862        assert_eq_m128i(r, src);
46863        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
46864        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
46865        assert_eq_m128i(r, e);
46866    }
46867
46868    #[simd_test(enable = "avx512f,avx512vl")]
46869    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
46870        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
46871        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
46872        assert_eq_m128i(r, _mm_setzero_si128());
46873        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
46874        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
46875        assert_eq_m128i(r, e);
46876    }
46877
46878    #[simd_test(enable = "avx512f,avx512vl")]
46879    unsafe fn test_mm_cvtusepi32_epi8() {
46880        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
46881        let r = _mm_cvtusepi32_epi8(a);
46882        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
46883        assert_eq_m128i(r, e);
46884    }
46885
46886    #[simd_test(enable = "avx512f,avx512vl")]
46887    unsafe fn test_mm_mask_cvtusepi32_epi8() {
46888        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
46889        let src = _mm_set1_epi8(0);
46890        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
46891        assert_eq_m128i(r, src);
46892        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
46893        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
46894        assert_eq_m128i(r, e);
46895    }
46896
46897    #[simd_test(enable = "avx512f,avx512vl")]
46898    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
46899        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
46900        let r = _mm_maskz_cvtusepi32_epi8(0, a);
46901        assert_eq_m128i(r, _mm_setzero_si128());
46902        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
46903        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
46904        assert_eq_m128i(r, e);
46905    }
46906
46907    #[simd_test(enable = "avx512f")]
46908    unsafe fn test_mm512_cvt_roundps_epi32() {
46909        let a = _mm512_setr_ps(
46910            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
46911        );
46912        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46913        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
46914        assert_eq_m512i(r, e);
46915        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
46916        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
46917        assert_eq_m512i(r, e);
46918    }
46919
46920    #[simd_test(enable = "avx512f")]
46921    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
46922        let a = _mm512_setr_ps(
46923            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
46924        );
46925        let src = _mm512_set1_epi32(0);
46926        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46927            src, 0, a,
46928        );
46929        assert_eq_m512i(r, src);
46930        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46931            src,
46932            0b00000000_11111111,
46933            a,
46934        );
46935        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
46936        assert_eq_m512i(r, e);
46937    }
46938
46939    #[simd_test(enable = "avx512f")]
46940    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
46941        let a = _mm512_setr_ps(
46942            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
46943        );
46944        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46945            0, a,
46946        );
46947        assert_eq_m512i(r, _mm512_setzero_si512());
46948        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46949            0b00000000_11111111,
46950            a,
46951        );
46952        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
46953        assert_eq_m512i(r, e);
46954    }
46955
46956    #[simd_test(enable = "avx512f")]
46957    unsafe fn test_mm512_cvt_roundps_epu32() {
46958        let a = _mm512_setr_ps(
46959            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
46960        );
46961        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46962        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
46963        assert_eq_m512i(r, e);
46964        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
46965        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
46966        assert_eq_m512i(r, e);
46967    }
46968
46969    #[simd_test(enable = "avx512f")]
46970    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
46971        let a = _mm512_setr_ps(
46972            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
46973        );
46974        let src = _mm512_set1_epi32(0);
46975        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46976            src, 0, a,
46977        );
46978        assert_eq_m512i(r, src);
46979        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46980            src,
46981            0b00000000_11111111,
46982            a,
46983        );
46984        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
46985        assert_eq_m512i(r, e);
46986    }
46987
46988    #[simd_test(enable = "avx512f")]
46989    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
46990        let a = _mm512_setr_ps(
46991            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
46992        );
46993        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46994            0, a,
46995        );
46996        assert_eq_m512i(r, _mm512_setzero_si512());
46997        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46998            0b00000000_11111111,
46999            a,
47000        );
47001        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47002        assert_eq_m512i(r, e);
47003    }
47004
47005    #[simd_test(enable = "avx512f")]
47006    unsafe fn test_mm512_cvt_roundepi32_ps() {
47007        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47008        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
47009        let e = _mm512_setr_ps(
47010            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
47011        );
47012        assert_eq_m512(r, e);
47013    }
47014
47015    #[simd_test(enable = "avx512f")]
47016    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
47017        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47018        let src = _mm512_set1_ps(0.);
47019        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47020            src, 0, a,
47021        );
47022        assert_eq_m512(r, src);
47023        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47024            src,
47025            0b00000000_11111111,
47026            a,
47027        );
47028        let e = _mm512_setr_ps(
47029            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47030        );
47031        assert_eq_m512(r, e);
47032    }
47033
47034    #[simd_test(enable = "avx512f")]
47035    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
47036        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47037        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47038            0, a,
47039        );
47040        assert_eq_m512(r, _mm512_setzero_ps());
47041        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47042            0b00000000_11111111,
47043            a,
47044        );
47045        let e = _mm512_setr_ps(
47046            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47047        );
47048        assert_eq_m512(r, e);
47049    }
47050
47051    #[simd_test(enable = "avx512f")]
47052    unsafe fn test_mm512_cvt_roundepu32_ps() {
47053        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47054        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
47055        #[rustfmt::skip]
47056        let e = _mm512_setr_ps(
47057            0., 4294967300., 2., 4294967300.,
47058            4., 4294967300., 6., 4294967300.,
47059            8., 10., 10., 12.,
47060            12., 14., 14., 16.,
47061        );
47062        assert_eq_m512(r, e);
47063    }
47064
47065    #[simd_test(enable = "avx512f")]
47066    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
47067        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47068        let src = _mm512_set1_ps(0.);
47069        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47070            src, 0, a,
47071        );
47072        assert_eq_m512(r, src);
47073        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47074            src,
47075            0b00000000_11111111,
47076            a,
47077        );
47078        #[rustfmt::skip]
47079        let e = _mm512_setr_ps(
47080            0., 4294967300., 2., 4294967300.,
47081            4., 4294967300., 6., 4294967300.,
47082            0., 0., 0., 0.,
47083            0., 0., 0., 0.,
47084        );
47085        assert_eq_m512(r, e);
47086    }
47087
47088    #[simd_test(enable = "avx512f")]
47089    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
47090        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47091        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47092            0, a,
47093        );
47094        assert_eq_m512(r, _mm512_setzero_ps());
47095        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47096            0b00000000_11111111,
47097            a,
47098        );
47099        #[rustfmt::skip]
47100        let e = _mm512_setr_ps(
47101            0., 4294967300., 2., 4294967300.,
47102            4., 4294967300., 6., 4294967300.,
47103            0., 0., 0., 0.,
47104            0., 0., 0., 0.,
47105        );
47106        assert_eq_m512(r, e);
47107    }
47108
47109    #[simd_test(enable = "avx512f")]
47110    unsafe fn test_mm512_cvt_roundps_ph() {
47111        let a = _mm512_set1_ps(1.);
47112        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
47113        let e = _mm256_setr_epi64x(
47114            4323521613979991040,
47115            4323521613979991040,
47116            4323521613979991040,
47117            4323521613979991040,
47118        );
47119        assert_eq_m256i(r, e);
47120    }
47121
47122    #[simd_test(enable = "avx512f")]
47123    unsafe fn test_mm512_mask_cvt_roundps_ph() {
47124        let a = _mm512_set1_ps(1.);
47125        let src = _mm256_set1_epi16(0);
47126        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47127        assert_eq_m256i(r, src);
47128        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47129        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47130        assert_eq_m256i(r, e);
47131    }
47132
47133    #[simd_test(enable = "avx512f")]
47134    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
47135        let a = _mm512_set1_ps(1.);
47136        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
47137        assert_eq_m256i(r, _mm256_setzero_si256());
47138        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47139        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47140        assert_eq_m256i(r, e);
47141    }
47142
47143    #[simd_test(enable = "avx512f,avx512vl")]
47144    unsafe fn test_mm256_mask_cvt_roundps_ph() {
47145        let a = _mm256_set1_ps(1.);
47146        let src = _mm_set1_epi16(0);
47147        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47148        assert_eq_m128i(r, src);
47149        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
47150        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47151        assert_eq_m128i(r, e);
47152    }
47153
47154    #[simd_test(enable = "avx512f,avx512vl")]
47155    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
47156        let a = _mm256_set1_ps(1.);
47157        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
47158        assert_eq_m128i(r, _mm_setzero_si128());
47159        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
47160        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47161        assert_eq_m128i(r, e);
47162    }
47163
47164    #[simd_test(enable = "avx512f,avx512vl")]
47165    unsafe fn test_mm_mask_cvt_roundps_ph() {
47166        let a = _mm_set1_ps(1.);
47167        let src = _mm_set1_epi16(0);
47168        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47169        assert_eq_m128i(r, src);
47170        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
47171        let e = _mm_setr_epi64x(4323521613979991040, 0);
47172        assert_eq_m128i(r, e);
47173    }
47174
47175    #[simd_test(enable = "avx512f,avx512vl")]
47176    unsafe fn test_mm_maskz_cvt_roundps_ph() {
47177        let a = _mm_set1_ps(1.);
47178        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
47179        assert_eq_m128i(r, _mm_setzero_si128());
47180        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
47181        let e = _mm_setr_epi64x(4323521613979991040, 0);
47182        assert_eq_m128i(r, e);
47183    }
47184
47185    #[simd_test(enable = "avx512f")]
47186    unsafe fn test_mm512_cvtps_ph() {
47187        let a = _mm512_set1_ps(1.);
47188        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
47189        let e = _mm256_setr_epi64x(
47190            4323521613979991040,
47191            4323521613979991040,
47192            4323521613979991040,
47193            4323521613979991040,
47194        );
47195        assert_eq_m256i(r, e);
47196    }
47197
47198    #[simd_test(enable = "avx512f")]
47199    unsafe fn test_mm512_mask_cvtps_ph() {
47200        let a = _mm512_set1_ps(1.);
47201        let src = _mm256_set1_epi16(0);
47202        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47203        assert_eq_m256i(r, src);
47204        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47205        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47206        assert_eq_m256i(r, e);
47207    }
47208
47209    #[simd_test(enable = "avx512f")]
47210    unsafe fn test_mm512_maskz_cvtps_ph() {
47211        let a = _mm512_set1_ps(1.);
47212        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
47213        assert_eq_m256i(r, _mm256_setzero_si256());
47214        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47215        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
47216        assert_eq_m256i(r, e);
47217    }
47218
47219    #[simd_test(enable = "avx512f,avx512vl")]
47220    unsafe fn test_mm256_mask_cvtps_ph() {
47221        let a = _mm256_set1_ps(1.);
47222        let src = _mm_set1_epi16(0);
47223        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47224        assert_eq_m128i(r, src);
47225        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
47226        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47227        assert_eq_m128i(r, e);
47228    }
47229
47230    #[simd_test(enable = "avx512f,avx512vl")]
47231    unsafe fn test_mm256_maskz_cvtps_ph() {
47232        let a = _mm256_set1_ps(1.);
47233        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
47234        assert_eq_m128i(r, _mm_setzero_si128());
47235        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
47236        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47237        assert_eq_m128i(r, e);
47238    }
47239
47240    #[simd_test(enable = "avx512f,avx512vl")]
47241    unsafe fn test_mm_mask_cvtps_ph() {
47242        let a = _mm_set1_ps(1.);
47243        let src = _mm_set1_epi16(0);
47244        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
47245        assert_eq_m128i(r, src);
47246        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
47247        let e = _mm_setr_epi64x(4323521613979991040, 0);
47248        assert_eq_m128i(r, e);
47249    }
47250
47251    #[simd_test(enable = "avx512f,avx512vl")]
47252    unsafe fn test_mm_maskz_cvtps_ph() {
47253        let a = _mm_set1_ps(1.);
47254        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
47255        assert_eq_m128i(r, _mm_setzero_si128());
47256        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
47257        let e = _mm_setr_epi64x(4323521613979991040, 0);
47258        assert_eq_m128i(r, e);
47259    }
47260
47261    #[simd_test(enable = "avx512f")]
47262    unsafe fn test_mm512_cvt_roundph_ps() {
47263        let a = _mm256_setr_epi64x(
47264            4323521613979991040,
47265            4323521613979991040,
47266            4323521613979991040,
47267            4323521613979991040,
47268        );
47269        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
47270        let e = _mm512_set1_ps(1.);
47271        assert_eq_m512(r, e);
47272    }
47273
47274    #[simd_test(enable = "avx512f")]
47275    unsafe fn test_mm512_mask_cvt_roundph_ps() {
47276        let a = _mm256_setr_epi64x(
47277            4323521613979991040,
47278            4323521613979991040,
47279            4323521613979991040,
47280            4323521613979991040,
47281        );
47282        let src = _mm512_set1_ps(0.);
47283        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
47284        assert_eq_m512(r, src);
47285        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47286        let e = _mm512_setr_ps(
47287            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47288        );
47289        assert_eq_m512(r, e);
47290    }
47291
47292    #[simd_test(enable = "avx512f")]
47293    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
47294        let a = _mm256_setr_epi64x(
47295            4323521613979991040,
47296            4323521613979991040,
47297            4323521613979991040,
47298            4323521613979991040,
47299        );
47300        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
47301        assert_eq_m512(r, _mm512_setzero_ps());
47302        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47303        let e = _mm512_setr_ps(
47304            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47305        );
47306        assert_eq_m512(r, e);
47307    }
47308
47309    #[simd_test(enable = "avx512f")]
47310    unsafe fn test_mm512_cvtph_ps() {
47311        let a = _mm256_setr_epi64x(
47312            4323521613979991040,
47313            4323521613979991040,
47314            4323521613979991040,
47315            4323521613979991040,
47316        );
47317        let r = _mm512_cvtph_ps(a);
47318        let e = _mm512_set1_ps(1.);
47319        assert_eq_m512(r, e);
47320    }
47321
47322    #[simd_test(enable = "avx512f")]
47323    unsafe fn test_mm512_mask_cvtph_ps() {
47324        let a = _mm256_setr_epi64x(
47325            4323521613979991040,
47326            4323521613979991040,
47327            4323521613979991040,
47328            4323521613979991040,
47329        );
47330        let src = _mm512_set1_ps(0.);
47331        let r = _mm512_mask_cvtph_ps(src, 0, a);
47332        assert_eq_m512(r, src);
47333        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
47334        let e = _mm512_setr_ps(
47335            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47336        );
47337        assert_eq_m512(r, e);
47338    }
47339
47340    #[simd_test(enable = "avx512f")]
47341    unsafe fn test_mm512_maskz_cvtph_ps() {
47342        let a = _mm256_setr_epi64x(
47343            4323521613979991040,
47344            4323521613979991040,
47345            4323521613979991040,
47346            4323521613979991040,
47347        );
47348        let r = _mm512_maskz_cvtph_ps(0, a);
47349        assert_eq_m512(r, _mm512_setzero_ps());
47350        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
47351        let e = _mm512_setr_ps(
47352            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
47353        );
47354        assert_eq_m512(r, e);
47355    }
47356
47357    #[simd_test(enable = "avx512f,avx512vl")]
47358    unsafe fn test_mm256_mask_cvtph_ps() {
47359        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47360        let src = _mm256_set1_ps(0.);
47361        let r = _mm256_mask_cvtph_ps(src, 0, a);
47362        assert_eq_m256(r, src);
47363        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
47364        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
47365        assert_eq_m256(r, e);
47366    }
47367
47368    #[simd_test(enable = "avx512f,avx512vl")]
47369    unsafe fn test_mm256_maskz_cvtph_ps() {
47370        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47371        let r = _mm256_maskz_cvtph_ps(0, a);
47372        assert_eq_m256(r, _mm256_setzero_ps());
47373        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
47374        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
47375        assert_eq_m256(r, e);
47376    }
47377
47378    #[simd_test(enable = "avx512f,avx512vl")]
47379    unsafe fn test_mm_mask_cvtph_ps() {
47380        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47381        let src = _mm_set1_ps(0.);
47382        let r = _mm_mask_cvtph_ps(src, 0, a);
47383        assert_eq_m128(r, src);
47384        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
47385        let e = _mm_setr_ps(1., 1., 1., 1.);
47386        assert_eq_m128(r, e);
47387    }
47388
47389    #[simd_test(enable = "avx512f,avx512vl")]
47390    unsafe fn test_mm_maskz_cvtph_ps() {
47391        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
47392        let r = _mm_maskz_cvtph_ps(0, a);
47393        assert_eq_m128(r, _mm_setzero_ps());
47394        let r = _mm_maskz_cvtph_ps(0b00001111, a);
47395        let e = _mm_setr_ps(1., 1., 1., 1.);
47396        assert_eq_m128(r, e);
47397    }
47398
47399    #[simd_test(enable = "avx512f")]
47400    unsafe fn test_mm512_cvtt_roundps_epi32() {
47401        let a = _mm512_setr_ps(
47402            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47403        );
47404        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
47405        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
47406        assert_eq_m512i(r, e);
47407    }
47408
47409    #[simd_test(enable = "avx512f")]
47410    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
47411        let a = _mm512_setr_ps(
47412            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47413        );
47414        let src = _mm512_set1_epi32(0);
47415        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
47416        assert_eq_m512i(r, src);
47417        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47418        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47419        assert_eq_m512i(r, e);
47420    }
47421
47422    #[simd_test(enable = "avx512f")]
47423    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
47424        let a = _mm512_setr_ps(
47425            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47426        );
47427        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
47428        assert_eq_m512i(r, _mm512_setzero_si512());
47429        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47430        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47431        assert_eq_m512i(r, e);
47432    }
47433
47434    #[simd_test(enable = "avx512f")]
47435    unsafe fn test_mm512_cvtt_roundps_epu32() {
47436        let a = _mm512_setr_ps(
47437            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47438        );
47439        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
47440        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47441        assert_eq_m512i(r, e);
47442    }
47443
47444    #[simd_test(enable = "avx512f")]
47445    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
47446        let a = _mm512_setr_ps(
47447            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47448        );
47449        let src = _mm512_set1_epi32(0);
47450        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
47451        assert_eq_m512i(r, src);
47452        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
47453        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47454        assert_eq_m512i(r, e);
47455    }
47456
47457    #[simd_test(enable = "avx512f")]
47458    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
47459        let a = _mm512_setr_ps(
47460            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47461        );
47462        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
47463        assert_eq_m512i(r, _mm512_setzero_si512());
47464        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
47465        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47466        assert_eq_m512i(r, e);
47467    }
47468
47469    #[simd_test(enable = "avx512f")]
47470    unsafe fn test_mm512_cvttps_epi32() {
47471        let a = _mm512_setr_ps(
47472            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47473        );
47474        let r = _mm512_cvttps_epi32(a);
47475        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
47476        assert_eq_m512i(r, e);
47477    }
47478
47479    #[simd_test(enable = "avx512f")]
47480    unsafe fn test_mm512_mask_cvttps_epi32() {
47481        let a = _mm512_setr_ps(
47482            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47483        );
47484        let src = _mm512_set1_epi32(0);
47485        let r = _mm512_mask_cvttps_epi32(src, 0, a);
47486        assert_eq_m512i(r, src);
47487        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
47488        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47489        assert_eq_m512i(r, e);
47490    }
47491
47492    #[simd_test(enable = "avx512f")]
47493    unsafe fn test_mm512_maskz_cvttps_epi32() {
47494        let a = _mm512_setr_ps(
47495            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47496        );
47497        let r = _mm512_maskz_cvttps_epi32(0, a);
47498        assert_eq_m512i(r, _mm512_setzero_si512());
47499        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
47500        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
47501        assert_eq_m512i(r, e);
47502    }
47503
47504    #[simd_test(enable = "avx512f,avx512vl")]
47505    unsafe fn test_mm256_mask_cvttps_epi32() {
47506        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47507        let src = _mm256_set1_epi32(0);
47508        let r = _mm256_mask_cvttps_epi32(src, 0, a);
47509        assert_eq_m256i(r, src);
47510        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
47511        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47512        assert_eq_m256i(r, e);
47513    }
47514
47515    #[simd_test(enable = "avx512f,avx512vl")]
47516    unsafe fn test_mm256_maskz_cvttps_epi32() {
47517        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47518        let r = _mm256_maskz_cvttps_epi32(0, a);
47519        assert_eq_m256i(r, _mm256_setzero_si256());
47520        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
47521        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47522        assert_eq_m256i(r, e);
47523    }
47524
47525    #[simd_test(enable = "avx512f,avx512vl")]
47526    unsafe fn test_mm_mask_cvttps_epi32() {
47527        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47528        let src = _mm_set1_epi32(0);
47529        let r = _mm_mask_cvttps_epi32(src, 0, a);
47530        assert_eq_m128i(r, src);
47531        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
47532        let e = _mm_set_epi32(12, 13, 14, 15);
47533        assert_eq_m128i(r, e);
47534    }
47535
47536    #[simd_test(enable = "avx512f,avx512vl")]
47537    unsafe fn test_mm_maskz_cvttps_epi32() {
47538        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47539        let r = _mm_maskz_cvttps_epi32(0, a);
47540        assert_eq_m128i(r, _mm_setzero_si128());
47541        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
47542        let e = _mm_set_epi32(12, 13, 14, 15);
47543        assert_eq_m128i(r, e);
47544    }
47545
47546    #[simd_test(enable = "avx512f")]
47547    unsafe fn test_mm512_cvttps_epu32() {
47548        let a = _mm512_setr_ps(
47549            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47550        );
47551        let r = _mm512_cvttps_epu32(a);
47552        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47553        assert_eq_m512i(r, e);
47554    }
47555
47556    #[simd_test(enable = "avx512f")]
47557    unsafe fn test_mm512_mask_cvttps_epu32() {
47558        let a = _mm512_setr_ps(
47559            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47560        );
47561        let src = _mm512_set1_epi32(0);
47562        let r = _mm512_mask_cvttps_epu32(src, 0, a);
47563        assert_eq_m512i(r, src);
47564        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
47565        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47566        assert_eq_m512i(r, e);
47567    }
47568
47569    #[simd_test(enable = "avx512f")]
47570    unsafe fn test_mm512_maskz_cvttps_epu32() {
47571        let a = _mm512_setr_ps(
47572            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47573        );
47574        let r = _mm512_maskz_cvttps_epu32(0, a);
47575        assert_eq_m512i(r, _mm512_setzero_si512());
47576        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
47577        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47578        assert_eq_m512i(r, e);
47579    }
47580
47581    #[simd_test(enable = "avx512f,avx512vl")]
47582    unsafe fn test_mm256_cvttps_epu32() {
47583        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47584        let r = _mm256_cvttps_epu32(a);
47585        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47586        assert_eq_m256i(r, e);
47587    }
47588
47589    #[simd_test(enable = "avx512f,avx512vl")]
47590    unsafe fn test_mm256_mask_cvttps_epu32() {
47591        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47592        let src = _mm256_set1_epi32(0);
47593        let r = _mm256_mask_cvttps_epu32(src, 0, a);
47594        assert_eq_m256i(r, src);
47595        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
47596        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47597        assert_eq_m256i(r, e);
47598    }
47599
47600    #[simd_test(enable = "avx512f,avx512vl")]
47601    unsafe fn test_mm256_maskz_cvttps_epu32() {
47602        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47603        let r = _mm256_maskz_cvttps_epu32(0, a);
47604        assert_eq_m256i(r, _mm256_setzero_si256());
47605        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
47606        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47607        assert_eq_m256i(r, e);
47608    }
47609
47610    #[simd_test(enable = "avx512f,avx512vl")]
47611    unsafe fn test_mm_cvttps_epu32() {
47612        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47613        let r = _mm_cvttps_epu32(a);
47614        let e = _mm_set_epi32(12, 13, 14, 15);
47615        assert_eq_m128i(r, e);
47616    }
47617
47618    #[simd_test(enable = "avx512f,avx512vl")]
47619    unsafe fn test_mm_mask_cvttps_epu32() {
47620        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47621        let src = _mm_set1_epi32(0);
47622        let r = _mm_mask_cvttps_epu32(src, 0, a);
47623        assert_eq_m128i(r, src);
47624        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
47625        let e = _mm_set_epi32(12, 13, 14, 15);
47626        assert_eq_m128i(r, e);
47627    }
47628
47629    #[simd_test(enable = "avx512f,avx512vl")]
47630    unsafe fn test_mm_maskz_cvttps_epu32() {
47631        let a = _mm_set_ps(12., 13.5, 14., 15.5);
47632        let r = _mm_maskz_cvttps_epu32(0, a);
47633        assert_eq_m128i(r, _mm_setzero_si128());
47634        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
47635        let e = _mm_set_epi32(12, 13, 14, 15);
47636        assert_eq_m128i(r, e);
47637    }
47638
47639    #[simd_test(enable = "avx512f")]
47640    unsafe fn test_mm512_i32gather_ps() {
47641        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
47642        // A multiplier of 4 is word-addressing
47643        #[rustfmt::skip]
47644        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47645                                      120, 128, 136, 144, 152, 160, 168, 176);
47646        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr() as *const u8);
47647        #[rustfmt::skip]
47648        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
47649                                         120., 128., 136., 144., 152., 160., 168., 176.));
47650    }
47651
47652    #[simd_test(enable = "avx512f")]
47653    unsafe fn test_mm512_mask_i32gather_ps() {
47654        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
47655        let src = _mm512_set1_ps(2.);
47656        let mask = 0b10101010_10101010;
47657        #[rustfmt::skip]
47658        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47659                                      120, 128, 136, 144, 152, 160, 168, 176);
47660        // A multiplier of 4 is word-addressing
47661        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8);
47662        #[rustfmt::skip]
47663        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
47664                                         2., 128., 2., 144., 2., 160., 2., 176.));
47665    }
47666
47667    #[simd_test(enable = "avx512f")]
47668    unsafe fn test_mm512_i32gather_epi32() {
47669        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
47670        // A multiplier of 4 is word-addressing
47671        #[rustfmt::skip]
47672        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47673                                      120, 128, 136, 144, 152, 160, 168, 176);
47674        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr() as *const u8);
47675        #[rustfmt::skip]
47676        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47677                                             120, 128, 136, 144, 152, 160, 168, 176));
47678    }
47679
47680    #[simd_test(enable = "avx512f")]
47681    unsafe fn test_mm512_mask_i32gather_epi32() {
47682        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
47683        let src = _mm512_set1_epi32(2);
47684        let mask = 0b10101010_10101010;
47685        let index = _mm512_setr_epi32(
47686            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
47687        );
47688        // A multiplier of 4 is word-addressing
47689        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr() as *const u8);
47690        assert_eq_m512i(
47691            r,
47692            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
47693        );
47694    }
47695
47696    #[simd_test(enable = "avx512f")]
47697    unsafe fn test_mm512_i32scatter_ps() {
47698        let mut arr = [0f32; 256];
47699        #[rustfmt::skip]
47700        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47701                                      128, 144, 160, 176, 192, 208, 224, 240);
47702        let src = _mm512_setr_ps(
47703            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
47704        );
47705        // A multiplier of 4 is word-addressing
47706        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src);
47707        let mut expected = [0f32; 256];
47708        for i in 0..16 {
47709            expected[i * 16] = (i + 1) as f32;
47710        }
47711        assert_eq!(&arr[..], &expected[..],);
47712    }
47713
47714    #[simd_test(enable = "avx512f")]
47715    unsafe fn test_mm512_mask_i32scatter_ps() {
47716        let mut arr = [0f32; 256];
47717        let mask = 0b10101010_10101010;
47718        #[rustfmt::skip]
47719        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47720                                      128, 144, 160, 176, 192, 208, 224, 240);
47721        let src = _mm512_setr_ps(
47722            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
47723        );
47724        // A multiplier of 4 is word-addressing
47725        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
47726        let mut expected = [0f32; 256];
47727        for i in 0..8 {
47728            expected[i * 32 + 16] = 2. * (i + 1) as f32;
47729        }
47730        assert_eq!(&arr[..], &expected[..],);
47731    }
47732
47733    #[simd_test(enable = "avx512f")]
47734    unsafe fn test_mm512_i32scatter_epi32() {
47735        let mut arr = [0i32; 256];
47736        #[rustfmt::skip]
47737
47738        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47739                                      128, 144, 160, 176, 192, 208, 224, 240);
47740        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
47741        // A multiplier of 4 is word-addressing
47742        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src);
47743        let mut expected = [0i32; 256];
47744        for i in 0..16 {
47745            expected[i * 16] = (i + 1) as i32;
47746        }
47747        assert_eq!(&arr[..], &expected[..],);
47748    }
47749
47750    #[simd_test(enable = "avx512f")]
47751    unsafe fn test_mm512_mask_i32scatter_epi32() {
47752        let mut arr = [0i32; 256];
47753        let mask = 0b10101010_10101010;
47754        #[rustfmt::skip]
47755        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
47756                                      128, 144, 160, 176, 192, 208, 224, 240);
47757        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
47758        // A multiplier of 4 is word-addressing
47759        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
47760        let mut expected = [0i32; 256];
47761        for i in 0..8 {
47762            expected[i * 32 + 16] = 2 * (i + 1) as i32;
47763        }
47764        assert_eq!(&arr[..], &expected[..],);
47765    }
47766
47767    #[simd_test(enable = "avx512f")]
47768    unsafe fn test_mm512_cmplt_ps_mask() {
47769        #[rustfmt::skip]
47770        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47771                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47772        let b = _mm512_set1_ps(-1.);
47773        let m = _mm512_cmplt_ps_mask(a, b);
47774        assert_eq!(m, 0b00000101_00000101);
47775    }
47776
47777    #[simd_test(enable = "avx512f")]
47778    unsafe fn test_mm512_mask_cmplt_ps_mask() {
47779        #[rustfmt::skip]
47780        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47781                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47782        let b = _mm512_set1_ps(-1.);
47783        let mask = 0b01100110_01100110;
47784        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
47785        assert_eq!(r, 0b00000100_00000100);
47786    }
47787
47788    #[simd_test(enable = "avx512f")]
47789    unsafe fn test_mm512_cmpnlt_ps_mask() {
47790        #[rustfmt::skip]
47791        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47792                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47793        let b = _mm512_set1_ps(-1.);
47794        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
47795    }
47796
47797    #[simd_test(enable = "avx512f")]
47798    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
47799        #[rustfmt::skip]
47800        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47801                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47802        let b = _mm512_set1_ps(-1.);
47803        let mask = 0b01111010_01111010;
47804        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
47805    }
47806
47807    #[simd_test(enable = "avx512f")]
47808    unsafe fn test_mm512_cmpnle_ps_mask() {
47809        #[rustfmt::skip]
47810        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47811                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47812        let b = _mm512_set1_ps(-1.);
47813        let m = _mm512_cmpnle_ps_mask(b, a);
47814        assert_eq!(m, 0b00001101_00001101);
47815    }
47816
47817    #[simd_test(enable = "avx512f")]
47818    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
47819        #[rustfmt::skip]
47820        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47821                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47822        let b = _mm512_set1_ps(-1.);
47823        let mask = 0b01100110_01100110;
47824        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
47825        assert_eq!(r, 0b00000100_00000100);
47826    }
47827
47828    #[simd_test(enable = "avx512f")]
47829    unsafe fn test_mm512_cmple_ps_mask() {
47830        #[rustfmt::skip]
47831        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47832                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47833        let b = _mm512_set1_ps(-1.);
47834        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
47835    }
47836
47837    #[simd_test(enable = "avx512f")]
47838    unsafe fn test_mm512_mask_cmple_ps_mask() {
47839        #[rustfmt::skip]
47840        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
47841                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
47842        let b = _mm512_set1_ps(-1.);
47843        let mask = 0b01111010_01111010;
47844        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
47845    }
47846
47847    #[simd_test(enable = "avx512f")]
47848    unsafe fn test_mm512_cmpeq_ps_mask() {
47849        #[rustfmt::skip]
47850        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
47851                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
47852        #[rustfmt::skip]
47853        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
47854                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
47855        let m = _mm512_cmpeq_ps_mask(b, a);
47856        assert_eq!(m, 0b11001101_11001101);
47857    }
47858
47859    #[simd_test(enable = "avx512f")]
47860    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
47861        #[rustfmt::skip]
47862        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
47863                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
47864        #[rustfmt::skip]
47865        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
47866                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
47867        let mask = 0b01111010_01111010;
47868        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
47869        assert_eq!(r, 0b01001000_01001000);
47870    }
47871
47872    #[simd_test(enable = "avx512f")]
47873    unsafe fn test_mm512_cmpneq_ps_mask() {
47874        #[rustfmt::skip]
47875        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
47876                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
47877        #[rustfmt::skip]
47878        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
47879                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
47880        let m = _mm512_cmpneq_ps_mask(b, a);
47881        assert_eq!(m, 0b00110010_00110010);
47882    }
47883
47884    #[simd_test(enable = "avx512f")]
47885    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
47886        #[rustfmt::skip]
47887        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
47888                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
47889        #[rustfmt::skip]
47890        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
47891                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
47892        let mask = 0b01111010_01111010;
47893        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
47894        assert_eq!(r, 0b00110010_00110010)
47895    }
47896
47897    #[simd_test(enable = "avx512f")]
47898    unsafe fn test_mm512_cmp_ps_mask() {
47899        #[rustfmt::skip]
47900        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
47901                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
47902        let b = _mm512_set1_ps(-1.);
47903        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
47904        assert_eq!(m, 0b00000101_00000101);
47905    }
47906
47907    #[simd_test(enable = "avx512f")]
47908    unsafe fn test_mm512_mask_cmp_ps_mask() {
47909        #[rustfmt::skip]
47910        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
47911                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
47912        let b = _mm512_set1_ps(-1.);
47913        let mask = 0b01100110_01100110;
47914        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
47915        assert_eq!(r, 0b00000100_00000100);
47916    }
47917
47918    #[simd_test(enable = "avx512f,avx512vl")]
47919    unsafe fn test_mm256_cmp_ps_mask() {
47920        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
47921        let b = _mm256_set1_ps(-1.);
47922        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
47923        assert_eq!(m, 0b00000101);
47924    }
47925
47926    #[simd_test(enable = "avx512f,avx512vl")]
47927    unsafe fn test_mm256_mask_cmp_ps_mask() {
47928        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
47929        let b = _mm256_set1_ps(-1.);
47930        let mask = 0b01100110;
47931        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
47932        assert_eq!(r, 0b00000100);
47933    }
47934
47935    #[simd_test(enable = "avx512f,avx512vl")]
47936    unsafe fn test_mm_cmp_ps_mask() {
47937        let a = _mm_set_ps(0., 1., -1., 13.);
47938        let b = _mm_set1_ps(1.);
47939        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
47940        assert_eq!(m, 0b00001010);
47941    }
47942
47943    #[simd_test(enable = "avx512f,avx512vl")]
47944    unsafe fn test_mm_mask_cmp_ps_mask() {
47945        let a = _mm_set_ps(0., 1., -1., 13.);
47946        let b = _mm_set1_ps(1.);
47947        let mask = 0b11111111;
47948        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
47949        assert_eq!(r, 0b00001010);
47950    }
47951
47952    #[simd_test(enable = "avx512f")]
47953    unsafe fn test_mm512_cmp_round_ps_mask() {
47954        #[rustfmt::skip]
47955        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
47956                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
47957        let b = _mm512_set1_ps(-1.);
47958        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
47959        assert_eq!(m, 0b00000101_00000101);
47960    }
47961
47962    #[simd_test(enable = "avx512f")]
47963    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
47964        #[rustfmt::skip]
47965        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
47966                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
47967        let b = _mm512_set1_ps(-1.);
47968        let mask = 0b01100110_01100110;
47969        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
47970        assert_eq!(r, 0b00000100_00000100);
47971    }
47972
47973    #[simd_test(enable = "avx512f")]
47974    unsafe fn test_mm512_cmpord_ps_mask() {
47975        #[rustfmt::skip]
47976        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
47977                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
47978        #[rustfmt::skip]
47979        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
47980                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
47981        let m = _mm512_cmpord_ps_mask(a, b);
47982        assert_eq!(m, 0b00000101_00000101);
47983    }
47984
47985    #[simd_test(enable = "avx512f")]
47986    unsafe fn test_mm512_mask_cmpord_ps_mask() {
47987        #[rustfmt::skip]
47988        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
47989                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
47990        #[rustfmt::skip]
47991        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
47992                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
47993        let mask = 0b11000011_11000011;
47994        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
47995        assert_eq!(m, 0b00000001_00000001);
47996    }
47997
47998    #[simd_test(enable = "avx512f")]
47999    unsafe fn test_mm512_cmpunord_ps_mask() {
48000        #[rustfmt::skip]
48001        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
48002                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
48003        #[rustfmt::skip]
48004        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
48005                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
48006        let m = _mm512_cmpunord_ps_mask(a, b);
48007
48008        assert_eq!(m, 0b11111010_11111010);
48009    }
48010
48011    #[simd_test(enable = "avx512f")]
48012    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
48013        #[rustfmt::skip]
48014        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
48015                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
48016        #[rustfmt::skip]
48017        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
48018                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
48019        let mask = 0b00001111_00001111;
48020        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
48021        assert_eq!(m, 0b000001010_00001010);
48022    }
48023
48024    #[simd_test(enable = "avx512f")]
48025    unsafe fn test_mm_cmp_ss_mask() {
48026        let a = _mm_setr_ps(2., 1., 1., 1.);
48027        let b = _mm_setr_ps(1., 2., 2., 2.);
48028        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
48029        assert_eq!(m, 1);
48030    }
48031
48032    #[simd_test(enable = "avx512f")]
48033    unsafe fn test_mm_mask_cmp_ss_mask() {
48034        let a = _mm_setr_ps(2., 1., 1., 1.);
48035        let b = _mm_setr_ps(1., 2., 2., 2.);
48036        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
48037        assert_eq!(m, 0);
48038        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
48039        assert_eq!(m, 1);
48040    }
48041
48042    #[simd_test(enable = "avx512f")]
48043    unsafe fn test_mm_cmp_round_ss_mask() {
48044        let a = _mm_setr_ps(2., 1., 1., 1.);
48045        let b = _mm_setr_ps(1., 2., 2., 2.);
48046        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
48047        assert_eq!(m, 1);
48048    }
48049
48050    #[simd_test(enable = "avx512f")]
48051    unsafe fn test_mm_mask_cmp_round_ss_mask() {
48052        let a = _mm_setr_ps(2., 1., 1., 1.);
48053        let b = _mm_setr_ps(1., 2., 2., 2.);
48054        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
48055        assert_eq!(m, 0);
48056        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
48057        assert_eq!(m, 1);
48058    }
48059
48060    #[simd_test(enable = "avx512f")]
48061    unsafe fn test_mm_cmp_sd_mask() {
48062        let a = _mm_setr_pd(2., 1.);
48063        let b = _mm_setr_pd(1., 2.);
48064        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
48065        assert_eq!(m, 1);
48066    }
48067
48068    #[simd_test(enable = "avx512f")]
48069    unsafe fn test_mm_mask_cmp_sd_mask() {
48070        let a = _mm_setr_pd(2., 1.);
48071        let b = _mm_setr_pd(1., 2.);
48072        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
48073        assert_eq!(m, 0);
48074        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
48075        assert_eq!(m, 1);
48076    }
48077
48078    #[simd_test(enable = "avx512f")]
48079    unsafe fn test_mm_cmp_round_sd_mask() {
48080        let a = _mm_setr_pd(2., 1.);
48081        let b = _mm_setr_pd(1., 2.);
48082        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
48083        assert_eq!(m, 1);
48084    }
48085
48086    #[simd_test(enable = "avx512f")]
48087    unsafe fn test_mm_mask_cmp_round_sd_mask() {
48088        let a = _mm_setr_pd(2., 1.);
48089        let b = _mm_setr_pd(1., 2.);
48090        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
48091        assert_eq!(m, 0);
48092        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
48093        assert_eq!(m, 1);
48094    }
48095
48096    #[simd_test(enable = "avx512f")]
48097    unsafe fn test_mm512_cmplt_epu32_mask() {
48098        #[rustfmt::skip]
48099        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48100                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48101        let b = _mm512_set1_epi32(-1);
48102        let m = _mm512_cmplt_epu32_mask(a, b);
48103        assert_eq!(m, 0b11001111_11001111);
48104    }
48105
48106    #[simd_test(enable = "avx512f")]
48107    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
48108        #[rustfmt::skip]
48109        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48110                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48111        let b = _mm512_set1_epi32(-1);
48112        let mask = 0b01111010_01111010;
48113        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
48114        assert_eq!(r, 0b01001010_01001010);
48115    }
48116
48117    #[simd_test(enable = "avx512f,avx512vl")]
48118    unsafe fn test_mm256_cmplt_epu32_mask() {
48119        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
48120        let b = _mm256_set1_epi32(1);
48121        let r = _mm256_cmplt_epu32_mask(a, b);
48122        assert_eq!(r, 0b10000000);
48123    }
48124
48125    #[simd_test(enable = "avx512f,avx512vl")]
48126    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
48127        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
48128        let b = _mm256_set1_epi32(1);
48129        let mask = 0b11111111;
48130        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
48131        assert_eq!(r, 0b10000000);
48132    }
48133
48134    #[simd_test(enable = "avx512f,avx512vl")]
48135    unsafe fn test_mm_cmplt_epu32_mask() {
48136        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48137        let b = _mm_set1_epi32(1);
48138        let r = _mm_cmplt_epu32_mask(a, b);
48139        assert_eq!(r, 0b00001000);
48140    }
48141
48142    #[simd_test(enable = "avx512f,avx512vl")]
48143    unsafe fn test_mm_mask_cmplt_epu32_mask() {
48144        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48145        let b = _mm_set1_epi32(1);
48146        let mask = 0b11111111;
48147        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
48148        assert_eq!(r, 0b00001000);
48149    }
48150
48151    #[simd_test(enable = "avx512f")]
48152    unsafe fn test_mm512_cmpgt_epu32_mask() {
48153        #[rustfmt::skip]
48154        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48155                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48156        let b = _mm512_set1_epi32(-1);
48157        let m = _mm512_cmpgt_epu32_mask(b, a);
48158        assert_eq!(m, 0b11001111_11001111);
48159    }
48160
48161    #[simd_test(enable = "avx512f")]
48162    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
48163        #[rustfmt::skip]
48164        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48165                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48166        let b = _mm512_set1_epi32(-1);
48167        let mask = 0b01111010_01111010;
48168        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
48169        assert_eq!(r, 0b01001010_01001010);
48170    }
48171
48172    #[simd_test(enable = "avx512f,avx512vl")]
48173    unsafe fn test_mm256_cmpgt_epu32_mask() {
48174        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
48175        let b = _mm256_set1_epi32(1);
48176        let r = _mm256_cmpgt_epu32_mask(a, b);
48177        assert_eq!(r, 0b00111111);
48178    }
48179
48180    #[simd_test(enable = "avx512f,avx512vl")]
48181    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
48182        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
48183        let b = _mm256_set1_epi32(1);
48184        let mask = 0b11111111;
48185        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
48186        assert_eq!(r, 0b00111111);
48187    }
48188
48189    #[simd_test(enable = "avx512f,avx512vl")]
48190    unsafe fn test_mm_cmpgt_epu32_mask() {
48191        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48192        let b = _mm_set1_epi32(1);
48193        let r = _mm_cmpgt_epu32_mask(a, b);
48194        assert_eq!(r, 0b00000011);
48195    }
48196
48197    #[simd_test(enable = "avx512f,avx512vl")]
48198    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
48199        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48200        let b = _mm_set1_epi32(1);
48201        let mask = 0b11111111;
48202        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
48203        assert_eq!(r, 0b00000011);
48204    }
48205
48206    #[simd_test(enable = "avx512f")]
48207    unsafe fn test_mm512_cmple_epu32_mask() {
48208        #[rustfmt::skip]
48209        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48210                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48211        let b = _mm512_set1_epi32(-1);
48212        assert_eq!(
48213            _mm512_cmple_epu32_mask(a, b),
48214            !_mm512_cmpgt_epu32_mask(a, b)
48215        )
48216    }
48217
48218    #[simd_test(enable = "avx512f")]
48219    unsafe fn test_mm512_mask_cmple_epu32_mask() {
48220        #[rustfmt::skip]
48221        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48222                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48223        let b = _mm512_set1_epi32(-1);
48224        let mask = 0b01111010_01111010;
48225        assert_eq!(
48226            _mm512_mask_cmple_epu32_mask(mask, a, b),
48227            0b01111010_01111010
48228        );
48229    }
48230
48231    #[simd_test(enable = "avx512f,avx512vl")]
48232    unsafe fn test_mm256_cmple_epu32_mask() {
48233        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
48234        let b = _mm256_set1_epi32(1);
48235        let r = _mm256_cmple_epu32_mask(a, b);
48236        assert_eq!(r, 0b11000000)
48237    }
48238
48239    #[simd_test(enable = "avx512f,avx512vl")]
48240    unsafe fn test_mm256_mask_cmple_epu32_mask() {
48241        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
48242        let b = _mm256_set1_epi32(1);
48243        let mask = 0b11111111;
48244        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
48245        assert_eq!(r, 0b11000000)
48246    }
48247
48248    #[simd_test(enable = "avx512f,avx512vl")]
48249    unsafe fn test_mm_cmple_epu32_mask() {
48250        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48251        let b = _mm_set1_epi32(1);
48252        let r = _mm_cmple_epu32_mask(a, b);
48253        assert_eq!(r, 0b00001100)
48254    }
48255
48256    #[simd_test(enable = "avx512f,avx512vl")]
48257    unsafe fn test_mm_mask_cmple_epu32_mask() {
48258        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48259        let b = _mm_set1_epi32(1);
48260        let mask = 0b11111111;
48261        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
48262        assert_eq!(r, 0b00001100)
48263    }
48264
48265    #[simd_test(enable = "avx512f")]
48266    unsafe fn test_mm512_cmpge_epu32_mask() {
48267        #[rustfmt::skip]
48268        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48269                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48270        let b = _mm512_set1_epi32(-1);
48271        assert_eq!(
48272            _mm512_cmpge_epu32_mask(a, b),
48273            !_mm512_cmplt_epu32_mask(a, b)
48274        )
48275    }
48276
48277    #[simd_test(enable = "avx512f")]
48278    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
48279        #[rustfmt::skip]
48280        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48281                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48282        let b = _mm512_set1_epi32(-1);
48283        let mask = 0b01111010_01111010;
48284        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
48285    }
48286
48287    #[simd_test(enable = "avx512f,avx512vl")]
48288    unsafe fn test_mm256_cmpge_epu32_mask() {
48289        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
48290        let b = _mm256_set1_epi32(1);
48291        let r = _mm256_cmpge_epu32_mask(a, b);
48292        assert_eq!(r, 0b01111111)
48293    }
48294
48295    #[simd_test(enable = "avx512f,avx512vl")]
48296    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
48297        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
48298        let b = _mm256_set1_epi32(1);
48299        let mask = 0b11111111;
48300        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
48301        assert_eq!(r, 0b01111111)
48302    }
48303
48304    #[simd_test(enable = "avx512f,avx512vl")]
48305    unsafe fn test_mm_cmpge_epu32_mask() {
48306        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48307        let b = _mm_set1_epi32(1);
48308        let r = _mm_cmpge_epu32_mask(a, b);
48309        assert_eq!(r, 0b00000111)
48310    }
48311
48312    #[simd_test(enable = "avx512f,avx512vl")]
48313    unsafe fn test_mm_mask_cmpge_epu32_mask() {
48314        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
48315        let b = _mm_set1_epi32(1);
48316        let mask = 0b11111111;
48317        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
48318        assert_eq!(r, 0b00000111)
48319    }
48320
48321    #[simd_test(enable = "avx512f")]
48322    unsafe fn test_mm512_cmpeq_epu32_mask() {
48323        #[rustfmt::skip]
48324        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48325                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48326        #[rustfmt::skip]
48327        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48328                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48329        let m = _mm512_cmpeq_epu32_mask(b, a);
48330        assert_eq!(m, 0b11001111_11001111);
48331    }
48332
48333    #[simd_test(enable = "avx512f")]
48334    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
48335        #[rustfmt::skip]
48336        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48337                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48338        #[rustfmt::skip]
48339        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48340                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48341        let mask = 0b01111010_01111010;
48342        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
48343        assert_eq!(r, 0b01001010_01001010);
48344    }
48345
48346    #[simd_test(enable = "avx512f,avx512vl")]
48347    unsafe fn test_mm256_cmpeq_epu32_mask() {
48348        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48349        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48350        let m = _mm256_cmpeq_epu32_mask(b, a);
48351        assert_eq!(m, 0b11001111);
48352    }
48353
48354    #[simd_test(enable = "avx512f,avx512vl")]
48355    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
48356        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48357        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48358        let mask = 0b01111010;
48359        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
48360        assert_eq!(r, 0b01001010);
48361    }
48362
48363    #[simd_test(enable = "avx512f,avx512vl")]
48364    unsafe fn test_mm_cmpeq_epu32_mask() {
48365        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48366        let b = _mm_set_epi32(0, 1, 13, 42);
48367        let m = _mm_cmpeq_epu32_mask(b, a);
48368        assert_eq!(m, 0b00001100);
48369    }
48370
48371    #[simd_test(enable = "avx512f,avx512vl")]
48372    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
48373        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48374        let b = _mm_set_epi32(0, 1, 13, 42);
48375        let mask = 0b11111111;
48376        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
48377        assert_eq!(r, 0b00001100);
48378    }
48379
48380    #[simd_test(enable = "avx512f")]
48381    unsafe fn test_mm512_cmpneq_epu32_mask() {
48382        #[rustfmt::skip]
48383        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48384                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48385        #[rustfmt::skip]
48386        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48387                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48388        let m = _mm512_cmpneq_epu32_mask(b, a);
48389        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
48390    }
48391
48392    #[simd_test(enable = "avx512f")]
48393    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
48394        #[rustfmt::skip]
48395        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
48396                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
48397        #[rustfmt::skip]
48398        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48399                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48400        let mask = 0b01111010_01111010;
48401        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
48402        assert_eq!(r, 0b00110010_00110010);
48403    }
48404
48405    #[simd_test(enable = "avx512f,avx512vl")]
48406    unsafe fn test_mm256_cmpneq_epu32_mask() {
48407        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
48408        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
48409        let r = _mm256_cmpneq_epu32_mask(b, a);
48410        assert_eq!(r, 0b00110000);
48411    }
48412
48413    #[simd_test(enable = "avx512f,avx512vl")]
48414    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
48415        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
48416        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
48417        let mask = 0b11111111;
48418        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
48419        assert_eq!(r, 0b00110000);
48420    }
48421
48422    #[simd_test(enable = "avx512f,avx512vl")]
48423    unsafe fn test_mm_cmpneq_epu32_mask() {
48424        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48425        let b = _mm_set_epi32(0, 1, 13, 42);
48426        let r = _mm_cmpneq_epu32_mask(b, a);
48427        assert_eq!(r, 0b00000011);
48428    }
48429
48430    #[simd_test(enable = "avx512f,avx512vl")]
48431    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
48432        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48433        let b = _mm_set_epi32(0, 1, 13, 42);
48434        let mask = 0b11111111;
48435        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
48436        assert_eq!(r, 0b00000011);
48437    }
48438
48439    #[simd_test(enable = "avx512f")]
48440    unsafe fn test_mm512_cmp_epu32_mask() {
48441        #[rustfmt::skip]
48442        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48443                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48444        let b = _mm512_set1_epi32(-1);
48445        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48446        assert_eq!(m, 0b11001111_11001111);
48447    }
48448
48449    #[simd_test(enable = "avx512f")]
48450    unsafe fn test_mm512_mask_cmp_epu32_mask() {
48451        #[rustfmt::skip]
48452        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48453                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48454        let b = _mm512_set1_epi32(-1);
48455        let mask = 0b01111010_01111010;
48456        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48457        assert_eq!(r, 0b01001010_01001010);
48458    }
48459
48460    #[simd_test(enable = "avx512f,avx512vl")]
48461    unsafe fn test_mm256_cmp_epu32_mask() {
48462        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48463        let b = _mm256_set1_epi32(-1);
48464        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48465        assert_eq!(m, 0b11001111);
48466    }
48467
48468    #[simd_test(enable = "avx512f,avx512vl")]
48469    unsafe fn test_mm256_mask_cmp_epu32_mask() {
48470        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48471        let b = _mm256_set1_epi32(-1);
48472        let mask = 0b11111111;
48473        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48474        assert_eq!(r, 0b11001111);
48475    }
48476
48477    #[simd_test(enable = "avx512f,avx512vl")]
48478    unsafe fn test_mm_cmp_epu32_mask() {
48479        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
48480        let b = _mm_set1_epi32(1);
48481        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
48482        assert_eq!(m, 0b00001000);
48483    }
48484
48485    #[simd_test(enable = "avx512f,avx512vl")]
48486    unsafe fn test_mm_mask_cmp_epu32_mask() {
48487        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
48488        let b = _mm_set1_epi32(1);
48489        let mask = 0b11111111;
48490        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
48491        assert_eq!(r, 0b00001000);
48492    }
48493
48494    #[simd_test(enable = "avx512f")]
48495    unsafe fn test_mm512_cmplt_epi32_mask() {
48496        #[rustfmt::skip]
48497        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48498                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48499        let b = _mm512_set1_epi32(-1);
48500        let m = _mm512_cmplt_epi32_mask(a, b);
48501        assert_eq!(m, 0b00000101_00000101);
48502    }
48503
48504    #[simd_test(enable = "avx512f")]
48505    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
48506        #[rustfmt::skip]
48507        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48508                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48509        let b = _mm512_set1_epi32(-1);
48510        let mask = 0b01100110_01100110;
48511        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
48512        assert_eq!(r, 0b00000100_00000100);
48513    }
48514
48515    #[simd_test(enable = "avx512f,avx512vl")]
48516    unsafe fn test_mm256_cmplt_epi32_mask() {
48517        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
48518        let b = _mm256_set1_epi32(-1);
48519        let r = _mm256_cmplt_epi32_mask(a, b);
48520        assert_eq!(r, 0b00000101);
48521    }
48522
48523    #[simd_test(enable = "avx512f,avx512vl")]
48524    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
48525        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
48526        let b = _mm256_set1_epi32(-1);
48527        let mask = 0b11111111;
48528        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
48529        assert_eq!(r, 0b00000101);
48530    }
48531
48532    #[simd_test(enable = "avx512f,avx512vl")]
48533    unsafe fn test_mm_cmplt_epi32_mask() {
48534        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
48535        let b = _mm_set1_epi32(-1);
48536        let r = _mm_cmplt_epi32_mask(a, b);
48537        assert_eq!(r, 0b00000101);
48538    }
48539
48540    #[simd_test(enable = "avx512f,avx512vl")]
48541    unsafe fn test_mm_mask_cmplt_epi32_mask() {
48542        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
48543        let b = _mm_set1_epi32(-1);
48544        let mask = 0b11111111;
48545        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
48546        assert_eq!(r, 0b00000101);
48547    }
48548
48549    #[simd_test(enable = "avx512f")]
48550    unsafe fn test_mm512_cmpgt_epi32_mask() {
48551        #[rustfmt::skip]
48552        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48553                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48554        let b = _mm512_set1_epi32(-1);
48555        let m = _mm512_cmpgt_epi32_mask(b, a);
48556        assert_eq!(m, 0b00000101_00000101);
48557    }
48558
48559    #[simd_test(enable = "avx512f")]
48560    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
48561        #[rustfmt::skip]
48562        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48563                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48564        let b = _mm512_set1_epi32(-1);
48565        let mask = 0b01100110_01100110;
48566        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
48567        assert_eq!(r, 0b00000100_00000100);
48568    }
48569
48570    #[simd_test(enable = "avx512f,avx512vl")]
48571    unsafe fn test_mm256_cmpgt_epi32_mask() {
48572        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48573        let b = _mm256_set1_epi32(-1);
48574        let r = _mm256_cmpgt_epi32_mask(a, b);
48575        assert_eq!(r, 0b11011010);
48576    }
48577
48578    #[simd_test(enable = "avx512f,avx512vl")]
48579    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
48580        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48581        let b = _mm256_set1_epi32(-1);
48582        let mask = 0b11111111;
48583        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
48584        assert_eq!(r, 0b11011010);
48585    }
48586
48587    #[simd_test(enable = "avx512f,avx512vl")]
48588    unsafe fn test_mm_cmpgt_epi32_mask() {
48589        let a = _mm_set_epi32(0, 1, -1, 13);
48590        let b = _mm_set1_epi32(-1);
48591        let r = _mm_cmpgt_epi32_mask(a, b);
48592        assert_eq!(r, 0b00001101);
48593    }
48594
48595    #[simd_test(enable = "avx512f,avx512vl")]
48596    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
48597        let a = _mm_set_epi32(0, 1, -1, 13);
48598        let b = _mm_set1_epi32(-1);
48599        let mask = 0b11111111;
48600        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
48601        assert_eq!(r, 0b00001101);
48602    }
48603
48604    #[simd_test(enable = "avx512f")]
48605    unsafe fn test_mm512_cmple_epi32_mask() {
48606        #[rustfmt::skip]
48607        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48608                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48609        let b = _mm512_set1_epi32(-1);
48610        assert_eq!(
48611            _mm512_cmple_epi32_mask(a, b),
48612            !_mm512_cmpgt_epi32_mask(a, b)
48613        )
48614    }
48615
48616    #[simd_test(enable = "avx512f")]
48617    unsafe fn test_mm512_mask_cmple_epi32_mask() {
48618        #[rustfmt::skip]
48619        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48620                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48621        let b = _mm512_set1_epi32(-1);
48622        let mask = 0b01111010_01111010;
48623        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
48624    }
48625
48626    #[simd_test(enable = "avx512f,avx512vl")]
48627    unsafe fn test_mm256_cmple_epi32_mask() {
48628        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
48629        let b = _mm256_set1_epi32(-1);
48630        let r = _mm256_cmple_epi32_mask(a, b);
48631        assert_eq!(r, 0b00100101)
48632    }
48633
48634    #[simd_test(enable = "avx512f,avx512vl")]
48635    unsafe fn test_mm256_mask_cmple_epi32_mask() {
48636        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
48637        let b = _mm256_set1_epi32(-1);
48638        let mask = 0b11111111;
48639        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
48640        assert_eq!(r, 0b00100101)
48641    }
48642
48643    #[simd_test(enable = "avx512f,avx512vl")]
48644    unsafe fn test_mm_cmple_epi32_mask() {
48645        let a = _mm_set_epi32(0, 1, -1, 200);
48646        let b = _mm_set1_epi32(-1);
48647        let r = _mm_cmple_epi32_mask(a, b);
48648        assert_eq!(r, 0b00000010)
48649    }
48650
48651    #[simd_test(enable = "avx512f,avx512vl")]
48652    unsafe fn test_mm_mask_cmple_epi32_mask() {
48653        let a = _mm_set_epi32(0, 1, -1, 200);
48654        let b = _mm_set1_epi32(-1);
48655        let mask = 0b11111111;
48656        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
48657        assert_eq!(r, 0b00000010)
48658    }
48659
48660    #[simd_test(enable = "avx512f")]
48661    unsafe fn test_mm512_cmpge_epi32_mask() {
48662        #[rustfmt::skip]
48663        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48664                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48665        let b = _mm512_set1_epi32(-1);
48666        assert_eq!(
48667            _mm512_cmpge_epi32_mask(a, b),
48668            !_mm512_cmplt_epi32_mask(a, b)
48669        )
48670    }
48671
48672    #[simd_test(enable = "avx512f")]
48673    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
48674        #[rustfmt::skip]
48675        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
48676                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48677        let b = _mm512_set1_epi32(-1);
48678        let mask = 0b01111010_01111010;
48679        assert_eq!(
48680            _mm512_mask_cmpge_epi32_mask(mask, a, b),
48681            0b01111010_01111010
48682        );
48683    }
48684
48685    #[simd_test(enable = "avx512f,avx512vl")]
48686    unsafe fn test_mm256_cmpge_epi32_mask() {
48687        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48688        let b = _mm256_set1_epi32(-1);
48689        let r = _mm256_cmpge_epi32_mask(a, b);
48690        assert_eq!(r, 0b11111010)
48691    }
48692
48693    #[simd_test(enable = "avx512f,avx512vl")]
48694    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
48695        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
48696        let b = _mm256_set1_epi32(-1);
48697        let mask = 0b11111111;
48698        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
48699        assert_eq!(r, 0b11111010)
48700    }
48701
48702    #[simd_test(enable = "avx512f,avx512vl")]
48703    unsafe fn test_mm_cmpge_epi32_mask() {
48704        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48705        let b = _mm_set1_epi32(-1);
48706        let r = _mm_cmpge_epi32_mask(a, b);
48707        assert_eq!(r, 0b00001111)
48708    }
48709
48710    #[simd_test(enable = "avx512f,avx512vl")]
48711    unsafe fn test_mm_mask_cmpge_epi32_mask() {
48712        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
48713        let b = _mm_set1_epi32(-1);
48714        let mask = 0b11111111;
48715        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
48716        assert_eq!(r, 0b00001111)
48717    }
48718
48719    #[simd_test(enable = "avx512f")]
48720    unsafe fn test_mm512_cmpeq_epi32_mask() {
48721        #[rustfmt::skip]
48722        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48723                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48724        #[rustfmt::skip]
48725        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48726                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48727        let m = _mm512_cmpeq_epi32_mask(b, a);
48728        assert_eq!(m, 0b11001111_11001111);
48729    }
48730
48731    #[simd_test(enable = "avx512f")]
48732    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
48733        #[rustfmt::skip]
48734        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48735                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48736        #[rustfmt::skip]
48737        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48738                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48739        let mask = 0b01111010_01111010;
48740        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
48741        assert_eq!(r, 0b01001010_01001010);
48742    }
48743
48744    #[simd_test(enable = "avx512f,avx512vl")]
48745    unsafe fn test_mm256_cmpeq_epi32_mask() {
48746        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48747        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48748        let m = _mm256_cmpeq_epi32_mask(b, a);
48749        assert_eq!(m, 0b11001111);
48750    }
48751
48752    #[simd_test(enable = "avx512f,avx512vl")]
48753    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
48754        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48755        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48756        let mask = 0b01111010;
48757        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
48758        assert_eq!(r, 0b01001010);
48759    }
48760
48761    #[simd_test(enable = "avx512f,avx512vl")]
48762    unsafe fn test_mm_cmpeq_epi32_mask() {
48763        let a = _mm_set_epi32(0, 1, -1, 13);
48764        let b = _mm_set_epi32(0, 1, 13, 42);
48765        let m = _mm_cmpeq_epi32_mask(b, a);
48766        assert_eq!(m, 0b00001100);
48767    }
48768
48769    #[simd_test(enable = "avx512f,avx512vl")]
48770    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
48771        let a = _mm_set_epi32(0, 1, -1, 13);
48772        let b = _mm_set_epi32(0, 1, 13, 42);
48773        let mask = 0b11111111;
48774        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
48775        assert_eq!(r, 0b00001100);
48776    }
48777
48778    #[simd_test(enable = "avx512f")]
48779    unsafe fn test_mm512_cmpneq_epi32_mask() {
48780        #[rustfmt::skip]
48781        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48782                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48783        #[rustfmt::skip]
48784        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48785                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48786        let m = _mm512_cmpneq_epi32_mask(b, a);
48787        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
48788    }
48789
48790    #[simd_test(enable = "avx512f")]
48791    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
48792        #[rustfmt::skip]
48793        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
48794                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
48795        #[rustfmt::skip]
48796        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
48797                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48798        let mask = 0b01111010_01111010;
48799        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
48800        assert_eq!(r, 0b00110010_00110010)
48801    }
48802
48803    #[simd_test(enable = "avx512f,avx512vl")]
48804    unsafe fn test_mm256_cmpneq_epi32_mask() {
48805        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48806        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48807        let m = _mm256_cmpneq_epi32_mask(b, a);
48808        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
48809    }
48810
48811    #[simd_test(enable = "avx512f,avx512vl")]
48812    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
48813        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
48814        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
48815        let mask = 0b11111111;
48816        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
48817        assert_eq!(r, 0b00110011)
48818    }
48819
48820    #[simd_test(enable = "avx512f,avx512vl")]
48821    unsafe fn test_mm_cmpneq_epi32_mask() {
48822        let a = _mm_set_epi32(0, 1, -1, 13);
48823        let b = _mm_set_epi32(0, 1, 13, 42);
48824        let r = _mm_cmpneq_epi32_mask(b, a);
48825        assert_eq!(r, 0b00000011)
48826    }
48827
48828    #[simd_test(enable = "avx512f,avx512vl")]
48829    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
48830        let a = _mm_set_epi32(0, 1, -1, 13);
48831        let b = _mm_set_epi32(0, 1, 13, 42);
48832        let mask = 0b11111111;
48833        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
48834        assert_eq!(r, 0b00000011)
48835    }
48836
48837    #[simd_test(enable = "avx512f")]
48838    unsafe fn test_mm512_cmp_epi32_mask() {
48839        #[rustfmt::skip]
48840        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48841                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48842        let b = _mm512_set1_epi32(-1);
48843        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
48844        assert_eq!(m, 0b00000101_00000101);
48845    }
48846
48847    #[simd_test(enable = "avx512f")]
48848    unsafe fn test_mm512_mask_cmp_epi32_mask() {
48849        #[rustfmt::skip]
48850        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
48851                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48852        let b = _mm512_set1_epi32(-1);
48853        let mask = 0b01100110_01100110;
48854        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
48855        assert_eq!(r, 0b00000100_00000100);
48856    }
48857
48858    #[simd_test(enable = "avx512f,avx512vl")]
48859    unsafe fn test_mm256_cmp_epi32_mask() {
48860        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48861        let b = _mm256_set1_epi32(-1);
48862        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
48863        assert_eq!(m, 0b00000101);
48864    }
48865
48866    #[simd_test(enable = "avx512f,avx512vl")]
48867    unsafe fn test_mm256_mask_cmp_epi32_mask() {
48868        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
48869        let b = _mm256_set1_epi32(-1);
48870        let mask = 0b01100110;
48871        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
48872        assert_eq!(r, 0b00000100);
48873    }
48874
48875    #[simd_test(enable = "avx512f,avx512vl")]
48876    unsafe fn test_mm_cmp_epi32_mask() {
48877        let a = _mm_set_epi32(0, 1, -1, 13);
48878        let b = _mm_set1_epi32(1);
48879        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
48880        assert_eq!(m, 0b00001010);
48881    }
48882
48883    #[simd_test(enable = "avx512f,avx512vl")]
48884    unsafe fn test_mm_mask_cmp_epi32_mask() {
48885        let a = _mm_set_epi32(0, 1, -1, 13);
48886        let b = _mm_set1_epi32(1);
48887        let mask = 0b11111111;
48888        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
48889        assert_eq!(r, 0b00001010);
48890    }
48891
48892    #[simd_test(enable = "avx512f")]
48893    unsafe fn test_mm512_set_epi8() {
48894        let r = _mm512_set1_epi8(2);
48895        assert_eq_m512i(
48896            r,
48897            _mm512_set_epi8(
48898                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48899                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48900                2, 2, 2, 2, 2, 2, 2, 2,
48901            ),
48902        )
48903    }
48904
48905    #[simd_test(enable = "avx512f")]
48906    unsafe fn test_mm512_set_epi16() {
48907        let r = _mm512_set1_epi16(2);
48908        assert_eq_m512i(
48909            r,
48910            _mm512_set_epi16(
48911                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48912                2, 2, 2, 2,
48913            ),
48914        )
48915    }
48916
48917    #[simd_test(enable = "avx512f")]
48918    unsafe fn test_mm512_set_epi32() {
48919        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48920        assert_eq_m512i(
48921            r,
48922            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
48923        )
48924    }
48925
48926    #[simd_test(enable = "avx512f")]
48927    unsafe fn test_mm512_setr_epi32() {
48928        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48929        assert_eq_m512i(
48930            r,
48931            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
48932        )
48933    }
48934
48935    #[simd_test(enable = "avx512f")]
48936    unsafe fn test_mm512_set1_epi8() {
48937        let r = _mm512_set_epi8(
48938            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48939            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48940            2, 2, 2, 2, 2, 2,
48941        );
48942        assert_eq_m512i(r, _mm512_set1_epi8(2));
48943    }
48944
48945    #[simd_test(enable = "avx512f")]
48946    unsafe fn test_mm512_set1_epi16() {
48947        let r = _mm512_set_epi16(
48948            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48949            2, 2, 2,
48950        );
48951        assert_eq_m512i(r, _mm512_set1_epi16(2));
48952    }
48953
48954    #[simd_test(enable = "avx512f")]
48955    unsafe fn test_mm512_set1_epi32() {
48956        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
48957        assert_eq_m512i(r, _mm512_set1_epi32(2));
48958    }
48959
48960    #[simd_test(enable = "avx512f")]
48961    unsafe fn test_mm512_setzero_si512() {
48962        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
48963    }
48964
48965    #[simd_test(enable = "avx512f")]
48966    unsafe fn test_mm512_setzero_epi32() {
48967        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
48968    }
48969
48970    #[simd_test(enable = "avx512f")]
48971    unsafe fn test_mm512_set_ps() {
48972        let r = _mm512_setr_ps(
48973            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48974        );
48975        assert_eq_m512(
48976            r,
48977            _mm512_set_ps(
48978                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
48979            ),
48980        )
48981    }
48982
48983    #[simd_test(enable = "avx512f")]
48984    unsafe fn test_mm512_setr_ps() {
48985        let r = _mm512_set_ps(
48986            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48987        );
48988        assert_eq_m512(
48989            r,
48990            _mm512_setr_ps(
48991                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
48992            ),
48993        )
48994    }
48995
48996    #[simd_test(enable = "avx512f")]
48997    unsafe fn test_mm512_set1_ps() {
48998        #[rustfmt::skip]
48999        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
49000                                     2., 2., 2., 2., 2., 2., 2., 2.);
49001        assert_eq_m512(expected, _mm512_set1_ps(2.));
49002    }
49003
49004    #[simd_test(enable = "avx512f")]
49005    unsafe fn test_mm512_set4_epi32() {
49006        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
49007        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
49008    }
49009
49010    #[simd_test(enable = "avx512f")]
49011    unsafe fn test_mm512_set4_ps() {
49012        let r = _mm512_set_ps(
49013            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
49014        );
49015        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
49016    }
49017
49018    #[simd_test(enable = "avx512f")]
49019    unsafe fn test_mm512_setr4_epi32() {
49020        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
49021        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
49022    }
49023
49024    #[simd_test(enable = "avx512f")]
49025    unsafe fn test_mm512_setr4_ps() {
49026        let r = _mm512_set_ps(
49027            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
49028        );
49029        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
49030    }
49031
49032    #[simd_test(enable = "avx512f")]
49033    unsafe fn test_mm512_setzero_ps() {
49034        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
49035    }
49036
49037    #[simd_test(enable = "avx512f")]
49038    unsafe fn test_mm512_setzero() {
49039        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
49040    }
49041
49042    #[simd_test(enable = "avx512f")]
49043    unsafe fn test_mm512_loadu_pd() {
49044        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
49045        let p = a.as_ptr();
49046        let r = _mm512_loadu_pd(black_box(p));
49047        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
49048        assert_eq_m512d(r, e);
49049    }
49050
49051    #[simd_test(enable = "avx512f")]
49052    unsafe fn test_mm512_storeu_pd() {
49053        let a = _mm512_set1_pd(9.);
49054        let mut r = _mm512_undefined_pd();
49055        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
49056        assert_eq_m512d(r, a);
49057    }
49058
49059    #[simd_test(enable = "avx512f")]
49060    unsafe fn test_mm512_loadu_ps() {
49061        let a = &[
49062            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
49063        ];
49064        let p = a.as_ptr();
49065        let r = _mm512_loadu_ps(black_box(p));
49066        let e = _mm512_setr_ps(
49067            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
49068        );
49069        assert_eq_m512(r, e);
49070    }
49071
49072    #[simd_test(enable = "avx512f")]
49073    unsafe fn test_mm512_storeu_ps() {
49074        let a = _mm512_set1_ps(9.);
49075        let mut r = _mm512_undefined_ps();
49076        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
49077        assert_eq_m512(r, a);
49078    }
49079
49080    #[simd_test(enable = "avx512f")]
49081    unsafe fn test_mm512_mask_loadu_epi32() {
49082        let src = _mm512_set1_epi32(42);
49083        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
49084        let p = a.as_ptr();
49085        let m = 0b11101000_11001010;
49086        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
49087        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49088        assert_eq_m512i(r, e);
49089    }
49090
49091    #[simd_test(enable = "avx512f")]
49092    unsafe fn test_mm512_maskz_loadu_epi32() {
49093        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
49094        let p = a.as_ptr();
49095        let m = 0b11101000_11001010;
49096        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
49097        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
49098        assert_eq_m512i(r, e);
49099    }
49100
49101    #[simd_test(enable = "avx512f")]
49102    unsafe fn test_mm512_mask_load_epi32() {
49103        #[repr(align(64))]
49104        struct Align {
49105            data: [i32; 16], // 64 bytes
49106        }
49107        let src = _mm512_set1_epi32(42);
49108        let a = Align {
49109            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
49110        };
49111        let p = a.data.as_ptr();
49112        let m = 0b11101000_11001010;
49113        let r = _mm512_mask_load_epi32(src, m, black_box(p));
49114        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49115        assert_eq_m512i(r, e);
49116    }
49117
49118    #[simd_test(enable = "avx512f")]
49119    unsafe fn test_mm512_maskz_load_epi32() {
49120        #[repr(align(64))]
49121        struct Align {
49122            data: [i32; 16], // 64 bytes
49123        }
49124        let a = Align {
49125            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
49126        };
49127        let p = a.data.as_ptr();
49128        let m = 0b11101000_11001010;
49129        let r = _mm512_maskz_load_epi32(m, black_box(p));
49130        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
49131        assert_eq_m512i(r, e);
49132    }
49133
49134    #[simd_test(enable = "avx512f")]
49135    unsafe fn test_mm512_mask_storeu_epi32() {
49136        let mut r = [42_i32; 16];
49137        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49138        let m = 0b11101000_11001010;
49139        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49140        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49141        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
49142    }
49143
49144    #[simd_test(enable = "avx512f")]
49145    unsafe fn test_mm512_mask_store_epi32() {
49146        #[repr(align(64))]
49147        struct Align {
49148            data: [i32; 16],
49149        }
49150        let mut r = Align { data: [42; 16] };
49151        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49152        let m = 0b11101000_11001010;
49153        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49154        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
49155        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
49156    }
49157
49158    #[simd_test(enable = "avx512f")]
49159    unsafe fn test_mm512_mask_loadu_epi64() {
49160        let src = _mm512_set1_epi64(42);
49161        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
49162        let p = a.as_ptr();
49163        let m = 0b11001010;
49164        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
49165        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49166        assert_eq_m512i(r, e);
49167    }
49168
49169    #[simd_test(enable = "avx512f")]
49170    unsafe fn test_mm512_maskz_loadu_epi64() {
49171        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
49172        let p = a.as_ptr();
49173        let m = 0b11001010;
49174        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
49175        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
49176        assert_eq_m512i(r, e);
49177    }
49178
49179    #[simd_test(enable = "avx512f")]
49180    unsafe fn test_mm512_mask_load_epi64() {
49181        #[repr(align(64))]
49182        struct Align {
49183            data: [i64; 8], // 64 bytes
49184        }
49185        let src = _mm512_set1_epi64(42);
49186        let a = Align {
49187            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
49188        };
49189        let p = a.data.as_ptr();
49190        let m = 0b11001010;
49191        let r = _mm512_mask_load_epi64(src, m, black_box(p));
49192        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49193        assert_eq_m512i(r, e);
49194    }
49195
49196    #[simd_test(enable = "avx512f")]
49197    unsafe fn test_mm512_maskz_load_epi64() {
49198        #[repr(align(64))]
49199        struct Align {
49200            data: [i64; 8], // 64 bytes
49201        }
49202        let a = Align {
49203            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
49204        };
49205        let p = a.data.as_ptr();
49206        let m = 0b11001010;
49207        let r = _mm512_maskz_load_epi64(m, black_box(p));
49208        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
49209        assert_eq_m512i(r, e);
49210    }
49211
49212    #[simd_test(enable = "avx512f")]
49213    unsafe fn test_mm512_mask_storeu_epi64() {
49214        let mut r = [42_i64; 8];
49215        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
49216        let m = 0b11001010;
49217        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49218        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49219        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
49220    }
49221
49222    #[simd_test(enable = "avx512f")]
49223    unsafe fn test_mm512_mask_store_epi64() {
49224        #[repr(align(64))]
49225        struct Align {
49226            data: [i64; 8],
49227        }
49228        let mut r = Align { data: [42; 8] };
49229        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
49230        let m = 0b11001010;
49231        let p = r.data.as_mut_ptr();
49232        _mm512_mask_store_epi64(p, m, a);
49233        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
49234        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
49235    }
49236
49237    #[simd_test(enable = "avx512f")]
49238    unsafe fn test_mm512_mask_loadu_ps() {
49239        let src = _mm512_set1_ps(42.0);
49240        let a = &[
49241            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
49242            16.0,
49243        ];
49244        let p = a.as_ptr();
49245        let m = 0b11101000_11001010;
49246        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
49247        let e = _mm512_setr_ps(
49248            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49249            16.0,
49250        );
49251        assert_eq_m512(r, e);
49252    }
49253
49254    #[simd_test(enable = "avx512f")]
49255    unsafe fn test_mm512_maskz_loadu_ps() {
49256        let a = &[
49257            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
49258            16.0,
49259        ];
49260        let p = a.as_ptr();
49261        let m = 0b11101000_11001010;
49262        let r = _mm512_maskz_loadu_ps(m, black_box(p));
49263        let e = _mm512_setr_ps(
49264            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
49265        );
49266        assert_eq_m512(r, e);
49267    }
49268
49269    #[simd_test(enable = "avx512f")]
49270    unsafe fn test_mm512_mask_load_ps() {
49271        #[repr(align(64))]
49272        struct Align {
49273            data: [f32; 16], // 64 bytes
49274        }
49275        let src = _mm512_set1_ps(42.0);
49276        let a = Align {
49277            data: [
49278                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
49279                15.0, 16.0,
49280            ],
49281        };
49282        let p = a.data.as_ptr();
49283        let m = 0b11101000_11001010;
49284        let r = _mm512_mask_load_ps(src, m, black_box(p));
49285        let e = _mm512_setr_ps(
49286            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49287            16.0,
49288        );
49289        assert_eq_m512(r, e);
49290    }
49291
49292    #[simd_test(enable = "avx512f")]
49293    unsafe fn test_mm512_maskz_load_ps() {
49294        #[repr(align(64))]
49295        struct Align {
49296            data: [f32; 16], // 64 bytes
49297        }
49298        let a = Align {
49299            data: [
49300                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
49301                15.0, 16.0,
49302            ],
49303        };
49304        let p = a.data.as_ptr();
49305        let m = 0b11101000_11001010;
49306        let r = _mm512_maskz_load_ps(m, black_box(p));
49307        let e = _mm512_setr_ps(
49308            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
49309        );
49310        assert_eq_m512(r, e);
49311    }
49312
49313    #[simd_test(enable = "avx512f")]
49314    unsafe fn test_mm512_mask_storeu_ps() {
49315        let mut r = [42_f32; 16];
49316        let a = _mm512_setr_ps(
49317            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
49318        );
49319        let m = 0b11101000_11001010;
49320        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
49321        let e = _mm512_setr_ps(
49322            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49323            16.0,
49324        );
49325        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
49326    }
49327
49328    #[simd_test(enable = "avx512f")]
49329    unsafe fn test_mm512_mask_store_ps() {
49330        #[repr(align(64))]
49331        struct Align {
49332            data: [f32; 16],
49333        }
49334        let mut r = Align { data: [42.0; 16] };
49335        let a = _mm512_setr_ps(
49336            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
49337        );
49338        let m = 0b11101000_11001010;
49339        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
49340        let e = _mm512_setr_ps(
49341            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
49342            16.0,
49343        );
49344        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
49345    }
49346
49347    #[simd_test(enable = "avx512f")]
49348    unsafe fn test_mm512_mask_loadu_pd() {
49349        let src = _mm512_set1_pd(42.0);
49350        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49351        let p = a.as_ptr();
49352        let m = 0b11001010;
49353        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
49354        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49355        assert_eq_m512d(r, e);
49356    }
49357
49358    #[simd_test(enable = "avx512f")]
49359    unsafe fn test_mm512_maskz_loadu_pd() {
49360        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49361        let p = a.as_ptr();
49362        let m = 0b11001010;
49363        let r = _mm512_maskz_loadu_pd(m, black_box(p));
49364        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49365        assert_eq_m512d(r, e);
49366    }
49367
49368    #[simd_test(enable = "avx512f")]
49369    unsafe fn test_mm512_mask_load_pd() {
49370        #[repr(align(64))]
49371        struct Align {
49372            data: [f64; 8], // 64 bytes
49373        }
49374        let src = _mm512_set1_pd(42.0);
49375        let a = Align {
49376            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49377        };
49378        let p = a.data.as_ptr();
49379        let m = 0b11001010;
49380        let r = _mm512_mask_load_pd(src, m, black_box(p));
49381        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49382        assert_eq_m512d(r, e);
49383    }
49384
49385    #[simd_test(enable = "avx512f")]
49386    unsafe fn test_mm512_maskz_load_pd() {
49387        #[repr(align(64))]
49388        struct Align {
49389            data: [f64; 8], // 64 bytes
49390        }
49391        let a = Align {
49392            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49393        };
49394        let p = a.data.as_ptr();
49395        let m = 0b11001010;
49396        let r = _mm512_maskz_load_pd(m, black_box(p));
49397        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49398        assert_eq_m512d(r, e);
49399    }
49400
49401    #[simd_test(enable = "avx512f")]
49402    unsafe fn test_mm512_mask_storeu_pd() {
49403        let mut r = [42_f64; 8];
49404        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49405        let m = 0b11001010;
49406        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
49407        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49408        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
49409    }
49410
49411    #[simd_test(enable = "avx512f")]
49412    unsafe fn test_mm512_mask_store_pd() {
49413        #[repr(align(64))]
49414        struct Align {
49415            data: [f64; 8],
49416        }
49417        let mut r = Align { data: [42.0; 8] };
49418        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49419        let m = 0b11001010;
49420        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
49421        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49422        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
49423    }
49424
49425    #[simd_test(enable = "avx512f,avx512vl")]
49426    unsafe fn test_mm256_mask_loadu_epi32() {
49427        let src = _mm256_set1_epi32(42);
49428        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
49429        let p = a.as_ptr();
49430        let m = 0b11001010;
49431        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
49432        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49433        assert_eq_m256i(r, e);
49434    }
49435
49436    #[simd_test(enable = "avx512f,avx512vl")]
49437    unsafe fn test_mm256_maskz_loadu_epi32() {
49438        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
49439        let p = a.as_ptr();
49440        let m = 0b11001010;
49441        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
49442        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
49443        assert_eq_m256i(r, e);
49444    }
49445
49446    #[simd_test(enable = "avx512f,avx512vl")]
49447    unsafe fn test_mm256_mask_load_epi32() {
49448        #[repr(align(32))]
49449        struct Align {
49450            data: [i32; 8], // 32 bytes
49451        }
49452        let src = _mm256_set1_epi32(42);
49453        let a = Align {
49454            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
49455        };
49456        let p = a.data.as_ptr();
49457        let m = 0b11001010;
49458        let r = _mm256_mask_load_epi32(src, m, black_box(p));
49459        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49460        assert_eq_m256i(r, e);
49461    }
49462
49463    #[simd_test(enable = "avx512f,avx512vl")]
49464    unsafe fn test_mm256_maskz_load_epi32() {
49465        #[repr(align(32))]
49466        struct Align {
49467            data: [i32; 8], // 32 bytes
49468        }
49469        let a = Align {
49470            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
49471        };
49472        let p = a.data.as_ptr();
49473        let m = 0b11001010;
49474        let r = _mm256_maskz_load_epi32(m, black_box(p));
49475        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
49476        assert_eq_m256i(r, e);
49477    }
49478
49479    #[simd_test(enable = "avx512f,avx512vl")]
49480    unsafe fn test_mm256_mask_storeu_epi32() {
49481        let mut r = [42_i32; 8];
49482        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49483        let m = 0b11001010;
49484        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49485        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49486        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
49487    }
49488
49489    #[simd_test(enable = "avx512f,avx512vl")]
49490    unsafe fn test_mm256_mask_store_epi32() {
49491        #[repr(align(64))]
49492        struct Align {
49493            data: [i32; 8],
49494        }
49495        let mut r = Align { data: [42; 8] };
49496        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49497        let m = 0b11001010;
49498        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49499        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
49500        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
49501    }
49502
49503    #[simd_test(enable = "avx512f,avx512vl")]
49504    unsafe fn test_mm256_mask_loadu_epi64() {
49505        let src = _mm256_set1_epi64x(42);
49506        let a = &[1_i64, 2, 3, 4];
49507        let p = a.as_ptr();
49508        let m = 0b1010;
49509        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
49510        let e = _mm256_setr_epi64x(42, 2, 42, 4);
49511        assert_eq_m256i(r, e);
49512    }
49513
49514    #[simd_test(enable = "avx512f,avx512vl")]
49515    unsafe fn test_mm256_maskz_loadu_epi64() {
49516        let a = &[1_i64, 2, 3, 4];
49517        let p = a.as_ptr();
49518        let m = 0b1010;
49519        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
49520        let e = _mm256_setr_epi64x(0, 2, 0, 4);
49521        assert_eq_m256i(r, e);
49522    }
49523
49524    #[simd_test(enable = "avx512f,avx512vl")]
49525    unsafe fn test_mm256_mask_load_epi64() {
49526        #[repr(align(32))]
49527        struct Align {
49528            data: [i64; 4], // 32 bytes
49529        }
49530        let src = _mm256_set1_epi64x(42);
49531        let a = Align {
49532            data: [1_i64, 2, 3, 4],
49533        };
49534        let p = a.data.as_ptr();
49535        let m = 0b1010;
49536        let r = _mm256_mask_load_epi64(src, m, black_box(p));
49537        let e = _mm256_setr_epi64x(42, 2, 42, 4);
49538        assert_eq_m256i(r, e);
49539    }
49540
49541    #[simd_test(enable = "avx512f,avx512vl")]
49542    unsafe fn test_mm256_maskz_load_epi64() {
49543        #[repr(align(32))]
49544        struct Align {
49545            data: [i64; 4], // 32 bytes
49546        }
49547        let a = Align {
49548            data: [1_i64, 2, 3, 4],
49549        };
49550        let p = a.data.as_ptr();
49551        let m = 0b1010;
49552        let r = _mm256_maskz_load_epi64(m, black_box(p));
49553        let e = _mm256_setr_epi64x(0, 2, 0, 4);
49554        assert_eq_m256i(r, e);
49555    }
49556
49557    #[simd_test(enable = "avx512f,avx512vl")]
49558    unsafe fn test_mm256_mask_storeu_epi64() {
49559        let mut r = [42_i64; 4];
49560        let a = _mm256_setr_epi64x(1, 2, 3, 4);
49561        let m = 0b1010;
49562        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49563        let e = _mm256_setr_epi64x(42, 2, 42, 4);
49564        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
49565    }
49566
49567    #[simd_test(enable = "avx512f,avx512vl")]
49568    unsafe fn test_mm256_mask_store_epi64() {
49569        #[repr(align(32))]
49570        struct Align {
49571            data: [i64; 4],
49572        }
49573        let mut r = Align { data: [42; 4] };
49574        let a = _mm256_setr_epi64x(1, 2, 3, 4);
49575        let m = 0b1010;
49576        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
49577        let e = _mm256_setr_epi64x(42, 2, 42, 4);
49578        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
49579    }
49580
49581    #[simd_test(enable = "avx512f,avx512vl")]
49582    unsafe fn test_mm256_mask_loadu_ps() {
49583        let src = _mm256_set1_ps(42.0);
49584        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49585        let p = a.as_ptr();
49586        let m = 0b11001010;
49587        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
49588        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49589        assert_eq_m256(r, e);
49590    }
49591
49592    #[simd_test(enable = "avx512f,avx512vl")]
49593    unsafe fn test_mm256_maskz_loadu_ps() {
49594        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
49595        let p = a.as_ptr();
49596        let m = 0b11001010;
49597        let r = _mm256_maskz_loadu_ps(m, black_box(p));
49598        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49599        assert_eq_m256(r, e);
49600    }
49601
49602    #[simd_test(enable = "avx512f,avx512vl")]
49603    unsafe fn test_mm256_mask_load_ps() {
49604        #[repr(align(32))]
49605        struct Align {
49606            data: [f32; 8], // 32 bytes
49607        }
49608        let src = _mm256_set1_ps(42.0);
49609        let a = Align {
49610            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49611        };
49612        let p = a.data.as_ptr();
49613        let m = 0b11001010;
49614        let r = _mm256_mask_load_ps(src, m, black_box(p));
49615        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49616        assert_eq_m256(r, e);
49617    }
49618
49619    #[simd_test(enable = "avx512f,avx512vl")]
49620    unsafe fn test_mm256_maskz_load_ps() {
49621        #[repr(align(32))]
49622        struct Align {
49623            data: [f32; 8], // 32 bytes
49624        }
49625        let a = Align {
49626            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
49627        };
49628        let p = a.data.as_ptr();
49629        let m = 0b11001010;
49630        let r = _mm256_maskz_load_ps(m, black_box(p));
49631        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
49632        assert_eq_m256(r, e);
49633    }
49634
49635    #[simd_test(enable = "avx512f,avx512vl")]
49636    unsafe fn test_mm256_mask_storeu_ps() {
49637        let mut r = [42_f32; 8];
49638        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49639        let m = 0b11001010;
49640        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
49641        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49642        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
49643    }
49644
49645    #[simd_test(enable = "avx512f,avx512vl")]
49646    unsafe fn test_mm256_mask_store_ps() {
49647        #[repr(align(32))]
49648        struct Align {
49649            data: [f32; 8],
49650        }
49651        let mut r = Align { data: [42.0; 8] };
49652        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
49653        let m = 0b11001010;
49654        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
49655        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
49656        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
49657    }
49658
49659    #[simd_test(enable = "avx512f,avx512vl")]
49660    unsafe fn test_mm256_mask_loadu_pd() {
49661        let src = _mm256_set1_pd(42.0);
49662        let a = &[1.0_f64, 2.0, 3.0, 4.0];
49663        let p = a.as_ptr();
49664        let m = 0b1010;
49665        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
49666        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49667        assert_eq_m256d(r, e);
49668    }
49669
49670    #[simd_test(enable = "avx512f,avx512vl")]
49671    unsafe fn test_mm256_maskz_loadu_pd() {
49672        let a = &[1.0_f64, 2.0, 3.0, 4.0];
49673        let p = a.as_ptr();
49674        let m = 0b1010;
49675        let r = _mm256_maskz_loadu_pd(m, black_box(p));
49676        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
49677        assert_eq_m256d(r, e);
49678    }
49679
49680    #[simd_test(enable = "avx512f,avx512vl")]
49681    unsafe fn test_mm256_mask_load_pd() {
49682        #[repr(align(32))]
49683        struct Align {
49684            data: [f64; 4], // 32 bytes
49685        }
49686        let src = _mm256_set1_pd(42.0);
49687        let a = Align {
49688            data: [1.0_f64, 2.0, 3.0, 4.0],
49689        };
49690        let p = a.data.as_ptr();
49691        let m = 0b1010;
49692        let r = _mm256_mask_load_pd(src, m, black_box(p));
49693        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49694        assert_eq_m256d(r, e);
49695    }
49696
49697    #[simd_test(enable = "avx512f,avx512vl")]
49698    unsafe fn test_mm256_maskz_load_pd() {
49699        #[repr(align(32))]
49700        struct Align {
49701            data: [f64; 4], // 32 bytes
49702        }
49703        let a = Align {
49704            data: [1.0_f64, 2.0, 3.0, 4.0],
49705        };
49706        let p = a.data.as_ptr();
49707        let m = 0b1010;
49708        let r = _mm256_maskz_load_pd(m, black_box(p));
49709        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
49710        assert_eq_m256d(r, e);
49711    }
49712
49713    #[simd_test(enable = "avx512f,avx512vl")]
49714    unsafe fn test_mm256_mask_storeu_pd() {
49715        let mut r = [42_f64; 4];
49716        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
49717        let m = 0b1010;
49718        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
49719        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49720        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
49721    }
49722
49723    #[simd_test(enable = "avx512f,avx512vl")]
49724    unsafe fn test_mm256_mask_store_pd() {
49725        #[repr(align(32))]
49726        struct Align {
49727            data: [f64; 4],
49728        }
49729        let mut r = Align { data: [42.0; 4] };
49730        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
49731        let m = 0b1010;
49732        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
49733        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
49734        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
49735    }
49736
49737    #[simd_test(enable = "avx512f,avx512vl")]
49738    unsafe fn test_mm_mask_loadu_epi32() {
49739        let src = _mm_set1_epi32(42);
49740        let a = &[1_i32, 2, 3, 4];
49741        let p = a.as_ptr();
49742        let m = 0b1010;
49743        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
49744        let e = _mm_setr_epi32(42, 2, 42, 4);
49745        assert_eq_m128i(r, e);
49746    }
49747
49748    #[simd_test(enable = "avx512f,avx512vl")]
49749    unsafe fn test_mm_maskz_loadu_epi32() {
49750        let a = &[1_i32, 2, 3, 4];
49751        let p = a.as_ptr();
49752        let m = 0b1010;
49753        let r = _mm_maskz_loadu_epi32(m, black_box(p));
49754        let e = _mm_setr_epi32(0, 2, 0, 4);
49755        assert_eq_m128i(r, e);
49756    }
49757
49758    #[simd_test(enable = "avx512f,avx512vl")]
49759    unsafe fn test_mm_mask_load_epi32() {
49760        #[repr(align(16))]
49761        struct Align {
49762            data: [i32; 4], // 32 bytes
49763        }
49764        let src = _mm_set1_epi32(42);
49765        let a = Align {
49766            data: [1_i32, 2, 3, 4],
49767        };
49768        let p = a.data.as_ptr();
49769        let m = 0b1010;
49770        let r = _mm_mask_load_epi32(src, m, black_box(p));
49771        let e = _mm_setr_epi32(42, 2, 42, 4);
49772        assert_eq_m128i(r, e);
49773    }
49774
49775    #[simd_test(enable = "avx512f,avx512vl")]
49776    unsafe fn test_mm_maskz_load_epi32() {
49777        #[repr(align(16))]
49778        struct Align {
49779            data: [i32; 4], // 16 bytes
49780        }
49781        let a = Align {
49782            data: [1_i32, 2, 3, 4],
49783        };
49784        let p = a.data.as_ptr();
49785        let m = 0b1010;
49786        let r = _mm_maskz_load_epi32(m, black_box(p));
49787        let e = _mm_setr_epi32(0, 2, 0, 4);
49788        assert_eq_m128i(r, e);
49789    }
49790
49791    #[simd_test(enable = "avx512f,avx512vl")]
49792    unsafe fn test_mm_mask_storeu_epi32() {
49793        let mut r = [42_i32; 4];
49794        let a = _mm_setr_epi32(1, 2, 3, 4);
49795        let m = 0b1010;
49796        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
49797        let e = _mm_setr_epi32(42, 2, 42, 4);
49798        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
49799    }
49800
49801    #[simd_test(enable = "avx512f,avx512vl")]
49802    unsafe fn test_mm_mask_store_epi32() {
49803        #[repr(align(16))]
49804        struct Align {
49805            data: [i32; 4], // 16 bytes
49806        }
49807        let mut r = Align { data: [42; 4] };
49808        let a = _mm_setr_epi32(1, 2, 3, 4);
49809        let m = 0b1010;
49810        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
49811        let e = _mm_setr_epi32(42, 2, 42, 4);
49812        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
49813    }
49814
49815    #[simd_test(enable = "avx512f,avx512vl")]
49816    unsafe fn test_mm_mask_loadu_epi64() {
49817        let src = _mm_set1_epi64x(42);
49818        let a = &[1_i64, 2];
49819        let p = a.as_ptr();
49820        let m = 0b10;
49821        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
49822        let e = _mm_setr_epi64x(42, 2);
49823        assert_eq_m128i(r, e);
49824    }
49825
49826    #[simd_test(enable = "avx512f,avx512vl")]
49827    unsafe fn test_mm_maskz_loadu_epi64() {
49828        let a = &[1_i64, 2];
49829        let p = a.as_ptr();
49830        let m = 0b10;
49831        let r = _mm_maskz_loadu_epi64(m, black_box(p));
49832        let e = _mm_setr_epi64x(0, 2);
49833        assert_eq_m128i(r, e);
49834    }
49835
49836    #[simd_test(enable = "avx512f,avx512vl")]
49837    unsafe fn test_mm_mask_load_epi64() {
49838        #[repr(align(16))]
49839        struct Align {
49840            data: [i64; 2], // 16 bytes
49841        }
49842        let src = _mm_set1_epi64x(42);
49843        let a = Align { data: [1_i64, 2] };
49844        let p = a.data.as_ptr();
49845        let m = 0b10;
49846        let r = _mm_mask_load_epi64(src, m, black_box(p));
49847        let e = _mm_setr_epi64x(42, 2);
49848        assert_eq_m128i(r, e);
49849    }
49850
49851    #[simd_test(enable = "avx512f,avx512vl")]
49852    unsafe fn test_mm_maskz_load_epi64() {
49853        #[repr(align(16))]
49854        struct Align {
49855            data: [i64; 2], // 16 bytes
49856        }
49857        let a = Align { data: [1_i64, 2] };
49858        let p = a.data.as_ptr();
49859        let m = 0b10;
49860        let r = _mm_maskz_load_epi64(m, black_box(p));
49861        let e = _mm_setr_epi64x(0, 2);
49862        assert_eq_m128i(r, e);
49863    }
49864
49865    #[simd_test(enable = "avx512f,avx512vl")]
49866    unsafe fn test_mm_mask_storeu_epi64() {
49867        let mut r = [42_i64; 2];
49868        let a = _mm_setr_epi64x(1, 2);
49869        let m = 0b10;
49870        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
49871        let e = _mm_setr_epi64x(42, 2);
49872        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
49873    }
49874
49875    #[simd_test(enable = "avx512f,avx512vl")]
49876    unsafe fn test_mm_mask_store_epi64() {
49877        #[repr(align(16))]
49878        struct Align {
49879            data: [i64; 2], // 16 bytes
49880        }
49881        let mut r = Align { data: [42; 2] };
49882        let a = _mm_setr_epi64x(1, 2);
49883        let m = 0b10;
49884        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
49885        let e = _mm_setr_epi64x(42, 2);
49886        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
49887    }
49888
49889    #[simd_test(enable = "avx512f,avx512vl")]
49890    unsafe fn test_mm_mask_loadu_ps() {
49891        let src = _mm_set1_ps(42.0);
49892        let a = &[1.0_f32, 2.0, 3.0, 4.0];
49893        let p = a.as_ptr();
49894        let m = 0b1010;
49895        let r = _mm_mask_loadu_ps(src, m, black_box(p));
49896        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
49897        assert_eq_m128(r, e);
49898    }
49899
49900    #[simd_test(enable = "avx512f,avx512vl")]
49901    unsafe fn test_mm_maskz_loadu_ps() {
49902        let a = &[1.0_f32, 2.0, 3.0, 4.0];
49903        let p = a.as_ptr();
49904        let m = 0b1010;
49905        let r = _mm_maskz_loadu_ps(m, black_box(p));
49906        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
49907        assert_eq_m128(r, e);
49908    }
49909
49910    #[simd_test(enable = "avx512f,avx512vl")]
49911    unsafe fn test_mm_mask_load_ps() {
49912        #[repr(align(16))]
49913        struct Align {
49914            data: [f32; 4], // 16 bytes
49915        }
49916        let src = _mm_set1_ps(42.0);
49917        let a = Align {
49918            data: [1.0_f32, 2.0, 3.0, 4.0],
49919        };
49920        let p = a.data.as_ptr();
49921        let m = 0b1010;
49922        let r = _mm_mask_load_ps(src, m, black_box(p));
49923        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
49924        assert_eq_m128(r, e);
49925    }
49926
49927    #[simd_test(enable = "avx512f,avx512vl")]
49928    unsafe fn test_mm_maskz_load_ps() {
49929        #[repr(align(16))]
49930        struct Align {
49931            data: [f32; 4], // 16 bytes
49932        }
49933        let a = Align {
49934            data: [1.0_f32, 2.0, 3.0, 4.0],
49935        };
49936        let p = a.data.as_ptr();
49937        let m = 0b1010;
49938        let r = _mm_maskz_load_ps(m, black_box(p));
49939        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
49940        assert_eq_m128(r, e);
49941    }
49942
49943    #[simd_test(enable = "avx512f,avx512vl")]
49944    unsafe fn test_mm_mask_storeu_ps() {
49945        let mut r = [42_f32; 4];
49946        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
49947        let m = 0b1010;
49948        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
49949        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
49950        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
49951    }
49952
49953    #[simd_test(enable = "avx512f,avx512vl")]
49954    unsafe fn test_mm_mask_store_ps() {
49955        #[repr(align(16))]
49956        struct Align {
49957            data: [f32; 4], // 16 bytes
49958        }
49959        let mut r = Align { data: [42.0; 4] };
49960        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
49961        let m = 0b1010;
49962        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
49963        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
49964        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
49965    }
49966
49967    #[simd_test(enable = "avx512f,avx512vl")]
49968    unsafe fn test_mm_mask_loadu_pd() {
49969        let src = _mm_set1_pd(42.0);
49970        let a = &[1.0_f64, 2.0];
49971        let p = a.as_ptr();
49972        let m = 0b10;
49973        let r = _mm_mask_loadu_pd(src, m, black_box(p));
49974        let e = _mm_setr_pd(42.0, 2.0);
49975        assert_eq_m128d(r, e);
49976    }
49977
49978    #[simd_test(enable = "avx512f,avx512vl")]
49979    unsafe fn test_mm_maskz_loadu_pd() {
49980        let a = &[1.0_f64, 2.0];
49981        let p = a.as_ptr();
49982        let m = 0b10;
49983        let r = _mm_maskz_loadu_pd(m, black_box(p));
49984        let e = _mm_setr_pd(0.0, 2.0);
49985        assert_eq_m128d(r, e);
49986    }
49987
49988    #[simd_test(enable = "avx512f,avx512vl")]
49989    unsafe fn test_mm_mask_load_pd() {
49990        #[repr(align(16))]
49991        struct Align {
49992            data: [f64; 2], // 16 bytes
49993        }
49994        let src = _mm_set1_pd(42.0);
49995        let a = Align {
49996            data: [1.0_f64, 2.0],
49997        };
49998        let p = a.data.as_ptr();
49999        let m = 0b10;
50000        let r = _mm_mask_load_pd(src, m, black_box(p));
50001        let e = _mm_setr_pd(42.0, 2.0);
50002        assert_eq_m128d(r, e);
50003    }
50004
50005    #[simd_test(enable = "avx512f,avx512vl")]
50006    unsafe fn test_mm_maskz_load_pd() {
50007        #[repr(align(16))]
50008        struct Align {
50009            data: [f64; 2], // 16 bytes
50010        }
50011        let a = Align {
50012            data: [1.0_f64, 2.0],
50013        };
50014        let p = a.data.as_ptr();
50015        let m = 0b10;
50016        let r = _mm_maskz_load_pd(m, black_box(p));
50017        let e = _mm_setr_pd(0.0, 2.0);
50018        assert_eq_m128d(r, e);
50019    }
50020
50021    #[simd_test(enable = "avx512f")]
50022    unsafe fn test_mm_mask_load_ss() {
50023        #[repr(align(16))]
50024        struct Align {
50025            data: f32,
50026        }
50027        let src = _mm_set_ss(2.0);
50028        let mem = Align { data: 1.0 };
50029        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
50030        assert_eq_m128(r, _mm_set_ss(1.0));
50031        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
50032        assert_eq_m128(r, _mm_set_ss(2.0));
50033    }
50034
50035    #[simd_test(enable = "avx512f")]
50036    unsafe fn test_mm_maskz_load_ss() {
50037        #[repr(align(16))]
50038        struct Align {
50039            data: f32,
50040        }
50041        let mem = Align { data: 1.0 };
50042        let r = _mm_maskz_load_ss(0b1, &mem.data);
50043        assert_eq_m128(r, _mm_set_ss(1.0));
50044        let r = _mm_maskz_load_ss(0b0, &mem.data);
50045        assert_eq_m128(r, _mm_set_ss(0.0));
50046    }
50047
50048    #[simd_test(enable = "avx512f")]
50049    unsafe fn test_mm_mask_load_sd() {
50050        #[repr(align(16))]
50051        struct Align {
50052            data: f64,
50053        }
50054        let src = _mm_set_sd(2.0);
50055        let mem = Align { data: 1.0 };
50056        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
50057        assert_eq_m128d(r, _mm_set_sd(1.0));
50058        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
50059        assert_eq_m128d(r, _mm_set_sd(2.0));
50060    }
50061
50062    #[simd_test(enable = "avx512f")]
50063    unsafe fn test_mm_maskz_load_sd() {
50064        #[repr(align(16))]
50065        struct Align {
50066            data: f64,
50067        }
50068        let mem = Align { data: 1.0 };
50069        let r = _mm_maskz_load_sd(0b1, &mem.data);
50070        assert_eq_m128d(r, _mm_set_sd(1.0));
50071        let r = _mm_maskz_load_sd(0b0, &mem.data);
50072        assert_eq_m128d(r, _mm_set_sd(0.0));
50073    }
50074
50075    #[simd_test(enable = "avx512f,avx512vl")]
50076    unsafe fn test_mm_mask_storeu_pd() {
50077        let mut r = [42_f64; 2];
50078        let a = _mm_setr_pd(1.0, 2.0);
50079        let m = 0b10;
50080        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
50081        let e = _mm_setr_pd(42.0, 2.0);
50082        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
50083    }
50084
50085    #[simd_test(enable = "avx512f,avx512vl")]
50086    unsafe fn test_mm_mask_store_pd() {
50087        #[repr(align(16))]
50088        struct Align {
50089            data: [f64; 2], // 16 bytes
50090        }
50091        let mut r = Align { data: [42.0; 2] };
50092        let a = _mm_setr_pd(1.0, 2.0);
50093        let m = 0b10;
50094        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
50095        let e = _mm_setr_pd(42.0, 2.0);
50096        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
50097    }
50098
50099    #[simd_test(enable = "avx512f")]
50100    unsafe fn test_mm_mask_store_ss() {
50101        #[repr(align(16))]
50102        struct Align {
50103            data: f32,
50104        }
50105        let a = _mm_set_ss(2.0);
50106        let mut mem = Align { data: 1.0 };
50107        _mm_mask_store_ss(&mut mem.data, 0b1, a);
50108        assert_eq!(mem.data, 2.0);
50109        _mm_mask_store_ss(&mut mem.data, 0b0, a);
50110        assert_eq!(mem.data, 2.0);
50111    }
50112
50113    #[simd_test(enable = "avx512f")]
50114    unsafe fn test_mm_mask_store_sd() {
50115        #[repr(align(16))]
50116        struct Align {
50117            data: f64,
50118        }
50119        let a = _mm_set_sd(2.0);
50120        let mut mem = Align { data: 1.0 };
50121        _mm_mask_store_sd(&mut mem.data, 0b1, a);
50122        assert_eq!(mem.data, 2.0);
50123        _mm_mask_store_sd(&mut mem.data, 0b0, a);
50124        assert_eq!(mem.data, 2.0);
50125    }
50126
50127    #[simd_test(enable = "avx512f")]
50128    unsafe fn test_mm512_setr_pd() {
50129        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
50130        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
50131    }
50132
50133    #[simd_test(enable = "avx512f")]
50134    unsafe fn test_mm512_set_pd() {
50135        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
50136        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
50137    }
50138
50139    #[simd_test(enable = "avx512f")]
50140    unsafe fn test_mm512_rol_epi32() {
50141        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50142        let r = _mm512_rol_epi32::<1>(a);
50143        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50144        assert_eq_m512i(r, e);
50145    }
50146
50147    #[simd_test(enable = "avx512f")]
50148    unsafe fn test_mm512_mask_rol_epi32() {
50149        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50150        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
50151        assert_eq_m512i(r, a);
50152        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
50153        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50154        assert_eq_m512i(r, e);
50155    }
50156
50157    #[simd_test(enable = "avx512f")]
50158    unsafe fn test_mm512_maskz_rol_epi32() {
50159        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50160        let r = _mm512_maskz_rol_epi32::<1>(0, a);
50161        assert_eq_m512i(r, _mm512_setzero_si512());
50162        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
50163        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50164        assert_eq_m512i(r, e);
50165    }
50166
50167    #[simd_test(enable = "avx512f,avx512vl")]
50168    unsafe fn test_mm256_rol_epi32() {
50169        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50170        let r = _mm256_rol_epi32::<1>(a);
50171        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50172        assert_eq_m256i(r, e);
50173    }
50174
50175    #[simd_test(enable = "avx512f,avx512vl")]
50176    unsafe fn test_mm256_mask_rol_epi32() {
50177        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50178        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
50179        assert_eq_m256i(r, a);
50180        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
50181        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50182        assert_eq_m256i(r, e);
50183    }
50184
50185    #[simd_test(enable = "avx512f,avx512vl")]
50186    unsafe fn test_mm256_maskz_rol_epi32() {
50187        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50188        let r = _mm256_maskz_rol_epi32::<1>(0, a);
50189        assert_eq_m256i(r, _mm256_setzero_si256());
50190        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
50191        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50192        assert_eq_m256i(r, e);
50193    }
50194
50195    #[simd_test(enable = "avx512f,avx512vl")]
50196    unsafe fn test_mm_rol_epi32() {
50197        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50198        let r = _mm_rol_epi32::<1>(a);
50199        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50200        assert_eq_m128i(r, e);
50201    }
50202
50203    #[simd_test(enable = "avx512f,avx512vl")]
50204    unsafe fn test_mm_mask_rol_epi32() {
50205        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50206        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
50207        assert_eq_m128i(r, a);
50208        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
50209        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50210        assert_eq_m128i(r, e);
50211    }
50212
50213    #[simd_test(enable = "avx512f,avx512vl")]
50214    unsafe fn test_mm_maskz_rol_epi32() {
50215        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50216        let r = _mm_maskz_rol_epi32::<1>(0, a);
50217        assert_eq_m128i(r, _mm_setzero_si128());
50218        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
50219        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50220        assert_eq_m128i(r, e);
50221    }
50222
50223    #[simd_test(enable = "avx512f")]
50224    unsafe fn test_mm512_ror_epi32() {
50225        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50226        let r = _mm512_ror_epi32::<1>(a);
50227        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50228        assert_eq_m512i(r, e);
50229    }
50230
50231    #[simd_test(enable = "avx512f")]
50232    unsafe fn test_mm512_mask_ror_epi32() {
50233        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50234        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
50235        assert_eq_m512i(r, a);
50236        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
50237        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50238        assert_eq_m512i(r, e);
50239    }
50240
50241    #[simd_test(enable = "avx512f")]
50242    unsafe fn test_mm512_maskz_ror_epi32() {
50243        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50244        let r = _mm512_maskz_ror_epi32::<1>(0, a);
50245        assert_eq_m512i(r, _mm512_setzero_si512());
50246        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
50247        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50248        assert_eq_m512i(r, e);
50249    }
50250
50251    #[simd_test(enable = "avx512f,avx512vl")]
50252    unsafe fn test_mm256_ror_epi32() {
50253        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50254        let r = _mm256_ror_epi32::<1>(a);
50255        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50256        assert_eq_m256i(r, e);
50257    }
50258
50259    #[simd_test(enable = "avx512f,avx512vl")]
50260    unsafe fn test_mm256_mask_ror_epi32() {
50261        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50262        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
50263        assert_eq_m256i(r, a);
50264        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
50265        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50266        assert_eq_m256i(r, e);
50267    }
50268
50269    #[simd_test(enable = "avx512f,avx512vl")]
50270    unsafe fn test_mm256_maskz_ror_epi32() {
50271        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50272        let r = _mm256_maskz_ror_epi32::<1>(0, a);
50273        assert_eq_m256i(r, _mm256_setzero_si256());
50274        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
50275        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50276        assert_eq_m256i(r, e);
50277    }
50278
50279    #[simd_test(enable = "avx512f,avx512vl")]
50280    unsafe fn test_mm_ror_epi32() {
50281        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50282        let r = _mm_ror_epi32::<1>(a);
50283        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50284        assert_eq_m128i(r, e);
50285    }
50286
50287    #[simd_test(enable = "avx512f,avx512vl")]
50288    unsafe fn test_mm_mask_ror_epi32() {
50289        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50290        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
50291        assert_eq_m128i(r, a);
50292        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
50293        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50294        assert_eq_m128i(r, e);
50295    }
50296
50297    #[simd_test(enable = "avx512f,avx512vl")]
50298    unsafe fn test_mm_maskz_ror_epi32() {
50299        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50300        let r = _mm_maskz_ror_epi32::<1>(0, a);
50301        assert_eq_m128i(r, _mm_setzero_si128());
50302        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
50303        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50304        assert_eq_m128i(r, e);
50305    }
50306
50307    #[simd_test(enable = "avx512f")]
50308    unsafe fn test_mm512_slli_epi32() {
50309        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50310        let r = _mm512_slli_epi32::<1>(a);
50311        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50312        assert_eq_m512i(r, e);
50313    }
50314
50315    #[simd_test(enable = "avx512f")]
50316    unsafe fn test_mm512_mask_slli_epi32() {
50317        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50318        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
50319        assert_eq_m512i(r, a);
50320        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
50321        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50322        assert_eq_m512i(r, e);
50323    }
50324
50325    #[simd_test(enable = "avx512f")]
50326    unsafe fn test_mm512_maskz_slli_epi32() {
50327        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50328        let r = _mm512_maskz_slli_epi32::<1>(0, a);
50329        assert_eq_m512i(r, _mm512_setzero_si512());
50330        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
50331        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
50332        assert_eq_m512i(r, e);
50333    }
50334
50335    #[simd_test(enable = "avx512f,avx512vl")]
50336    unsafe fn test_mm256_mask_slli_epi32() {
50337        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50338        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
50339        assert_eq_m256i(r, a);
50340        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
50341        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50342        assert_eq_m256i(r, e);
50343    }
50344
50345    #[simd_test(enable = "avx512f,avx512vl")]
50346    unsafe fn test_mm256_maskz_slli_epi32() {
50347        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50348        let r = _mm256_maskz_slli_epi32::<1>(0, a);
50349        assert_eq_m256i(r, _mm256_setzero_si256());
50350        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
50351        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50352        assert_eq_m256i(r, e);
50353    }
50354
50355    #[simd_test(enable = "avx512f,avx512vl")]
50356    unsafe fn test_mm_mask_slli_epi32() {
50357        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50358        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
50359        assert_eq_m128i(r, a);
50360        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
50361        let e = _mm_set_epi32(0, 2, 2, 2);
50362        assert_eq_m128i(r, e);
50363    }
50364
50365    #[simd_test(enable = "avx512f,avx512vl")]
50366    unsafe fn test_mm_maskz_slli_epi32() {
50367        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50368        let r = _mm_maskz_slli_epi32::<1>(0, a);
50369        assert_eq_m128i(r, _mm_setzero_si128());
50370        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
50371        let e = _mm_set_epi32(0, 2, 2, 2);
50372        assert_eq_m128i(r, e);
50373    }
50374
50375    #[simd_test(enable = "avx512f")]
50376    unsafe fn test_mm512_srli_epi32() {
50377        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50378        let r = _mm512_srli_epi32::<1>(a);
50379        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50380        assert_eq_m512i(r, e);
50381    }
50382
50383    #[simd_test(enable = "avx512f")]
50384    unsafe fn test_mm512_mask_srli_epi32() {
50385        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50386        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
50387        assert_eq_m512i(r, a);
50388        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
50389        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50390        assert_eq_m512i(r, e);
50391    }
50392
50393    #[simd_test(enable = "avx512f")]
50394    unsafe fn test_mm512_maskz_srli_epi32() {
50395        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
50396        let r = _mm512_maskz_srli_epi32::<1>(0, a);
50397        assert_eq_m512i(r, _mm512_setzero_si512());
50398        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
50399        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
50400        assert_eq_m512i(r, e);
50401    }
50402
50403    #[simd_test(enable = "avx512f,avx512vl")]
50404    unsafe fn test_mm256_mask_srli_epi32() {
50405        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50406        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
50407        assert_eq_m256i(r, a);
50408        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
50409        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50410        assert_eq_m256i(r, e);
50411    }
50412
50413    #[simd_test(enable = "avx512f,avx512vl")]
50414    unsafe fn test_mm256_maskz_srli_epi32() {
50415        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50416        let r = _mm256_maskz_srli_epi32::<1>(0, a);
50417        assert_eq_m256i(r, _mm256_setzero_si256());
50418        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
50419        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50420        assert_eq_m256i(r, e);
50421    }
50422
50423    #[simd_test(enable = "avx512f,avx512vl")]
50424    unsafe fn test_mm_mask_srli_epi32() {
50425        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50426        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
50427        assert_eq_m128i(r, a);
50428        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
50429        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50430        assert_eq_m128i(r, e);
50431    }
50432
50433    #[simd_test(enable = "avx512f,avx512vl")]
50434    unsafe fn test_mm_maskz_srli_epi32() {
50435        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50436        let r = _mm_maskz_srli_epi32::<1>(0, a);
50437        assert_eq_m128i(r, _mm_setzero_si128());
50438        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
50439        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50440        assert_eq_m128i(r, e);
50441    }
50442
50443    #[simd_test(enable = "avx512f")]
50444    unsafe fn test_mm512_rolv_epi32() {
50445        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50446        let b = _mm512_set1_epi32(1);
50447        let r = _mm512_rolv_epi32(a, b);
50448        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50449        assert_eq_m512i(r, e);
50450    }
50451
50452    #[simd_test(enable = "avx512f")]
50453    unsafe fn test_mm512_mask_rolv_epi32() {
50454        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50455        let b = _mm512_set1_epi32(1);
50456        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
50457        assert_eq_m512i(r, a);
50458        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
50459        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50460        assert_eq_m512i(r, e);
50461    }
50462
50463    #[simd_test(enable = "avx512f")]
50464    unsafe fn test_mm512_maskz_rolv_epi32() {
50465        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50466        let b = _mm512_set1_epi32(1);
50467        let r = _mm512_maskz_rolv_epi32(0, a, b);
50468        assert_eq_m512i(r, _mm512_setzero_si512());
50469        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
50470        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50471        assert_eq_m512i(r, e);
50472    }
50473
50474    #[simd_test(enable = "avx512f,avx512vl")]
50475    unsafe fn test_mm256_rolv_epi32() {
50476        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50477        let b = _mm256_set1_epi32(1);
50478        let r = _mm256_rolv_epi32(a, b);
50479        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50480        assert_eq_m256i(r, e);
50481    }
50482
50483    #[simd_test(enable = "avx512f,avx512vl")]
50484    unsafe fn test_mm256_mask_rolv_epi32() {
50485        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50486        let b = _mm256_set1_epi32(1);
50487        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
50488        assert_eq_m256i(r, a);
50489        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
50490        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50491        assert_eq_m256i(r, e);
50492    }
50493
50494    #[simd_test(enable = "avx512f,avx512vl")]
50495    unsafe fn test_mm256_maskz_rolv_epi32() {
50496        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50497        let b = _mm256_set1_epi32(1);
50498        let r = _mm256_maskz_rolv_epi32(0, a, b);
50499        assert_eq_m256i(r, _mm256_setzero_si256());
50500        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
50501        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50502        assert_eq_m256i(r, e);
50503    }
50504
50505    #[simd_test(enable = "avx512f,avx512vl")]
50506    unsafe fn test_mm_rolv_epi32() {
50507        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50508        let b = _mm_set1_epi32(1);
50509        let r = _mm_rolv_epi32(a, b);
50510        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50511        assert_eq_m128i(r, e);
50512    }
50513
50514    #[simd_test(enable = "avx512f,avx512vl")]
50515    unsafe fn test_mm_mask_rolv_epi32() {
50516        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50517        let b = _mm_set1_epi32(1);
50518        let r = _mm_mask_rolv_epi32(a, 0, a, b);
50519        assert_eq_m128i(r, a);
50520        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
50521        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50522        assert_eq_m128i(r, e);
50523    }
50524
50525    #[simd_test(enable = "avx512f,avx512vl")]
50526    unsafe fn test_mm_maskz_rolv_epi32() {
50527        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50528        let b = _mm_set1_epi32(1);
50529        let r = _mm_maskz_rolv_epi32(0, a, b);
50530        assert_eq_m128i(r, _mm_setzero_si128());
50531        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
50532        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
50533        assert_eq_m128i(r, e);
50534    }
50535
50536    #[simd_test(enable = "avx512f")]
50537    unsafe fn test_mm512_rorv_epi32() {
50538        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50539        let b = _mm512_set1_epi32(1);
50540        let r = _mm512_rorv_epi32(a, b);
50541        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50542        assert_eq_m512i(r, e);
50543    }
50544
50545    #[simd_test(enable = "avx512f")]
50546    unsafe fn test_mm512_mask_rorv_epi32() {
50547        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50548        let b = _mm512_set1_epi32(1);
50549        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
50550        assert_eq_m512i(r, a);
50551        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
50552        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50553        assert_eq_m512i(r, e);
50554    }
50555
50556    #[simd_test(enable = "avx512f")]
50557    unsafe fn test_mm512_maskz_rorv_epi32() {
50558        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
50559        let b = _mm512_set1_epi32(1);
50560        let r = _mm512_maskz_rorv_epi32(0, a, b);
50561        assert_eq_m512i(r, _mm512_setzero_si512());
50562        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
50563        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50564        assert_eq_m512i(r, e);
50565    }
50566
50567    #[simd_test(enable = "avx512f,avx512vl")]
50568    unsafe fn test_mm256_rorv_epi32() {
50569        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50570        let b = _mm256_set1_epi32(1);
50571        let r = _mm256_rorv_epi32(a, b);
50572        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50573        assert_eq_m256i(r, e);
50574    }
50575
50576    #[simd_test(enable = "avx512f,avx512vl")]
50577    unsafe fn test_mm256_mask_rorv_epi32() {
50578        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50579        let b = _mm256_set1_epi32(1);
50580        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
50581        assert_eq_m256i(r, a);
50582        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
50583        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50584        assert_eq_m256i(r, e);
50585    }
50586
50587    #[simd_test(enable = "avx512f,avx512vl")]
50588    unsafe fn test_mm256_maskz_rorv_epi32() {
50589        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
50590        let b = _mm256_set1_epi32(1);
50591        let r = _mm256_maskz_rorv_epi32(0, a, b);
50592        assert_eq_m256i(r, _mm256_setzero_si256());
50593        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
50594        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50595        assert_eq_m256i(r, e);
50596    }
50597
50598    #[simd_test(enable = "avx512f,avx512vl")]
50599    unsafe fn test_mm_rorv_epi32() {
50600        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50601        let b = _mm_set1_epi32(1);
50602        let r = _mm_rorv_epi32(a, b);
50603        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50604        assert_eq_m128i(r, e);
50605    }
50606
50607    #[simd_test(enable = "avx512f,avx512vl")]
50608    unsafe fn test_mm_mask_rorv_epi32() {
50609        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50610        let b = _mm_set1_epi32(1);
50611        let r = _mm_mask_rorv_epi32(a, 0, a, b);
50612        assert_eq_m128i(r, a);
50613        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
50614        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50615        assert_eq_m128i(r, e);
50616    }
50617
50618    #[simd_test(enable = "avx512f,avx512vl")]
50619    unsafe fn test_mm_maskz_rorv_epi32() {
50620        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
50621        let b = _mm_set1_epi32(1);
50622        let r = _mm_maskz_rorv_epi32(0, a, b);
50623        assert_eq_m128i(r, _mm_setzero_si128());
50624        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
50625        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
50626        assert_eq_m128i(r, e);
50627    }
50628
50629    #[simd_test(enable = "avx512f")]
50630    unsafe fn test_mm512_sllv_epi32() {
50631        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50632        let count = _mm512_set1_epi32(1);
50633        let r = _mm512_sllv_epi32(a, count);
50634        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50635        assert_eq_m512i(r, e);
50636    }
50637
50638    #[simd_test(enable = "avx512f")]
50639    unsafe fn test_mm512_mask_sllv_epi32() {
50640        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50641        let count = _mm512_set1_epi32(1);
50642        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
50643        assert_eq_m512i(r, a);
50644        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
50645        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50646        assert_eq_m512i(r, e);
50647    }
50648
50649    #[simd_test(enable = "avx512f")]
50650    unsafe fn test_mm512_maskz_sllv_epi32() {
50651        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
50652        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50653        let r = _mm512_maskz_sllv_epi32(0, a, count);
50654        assert_eq_m512i(r, _mm512_setzero_si512());
50655        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
50656        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
50657        assert_eq_m512i(r, e);
50658    }
50659
50660    #[simd_test(enable = "avx512f,avx512vl")]
50661    unsafe fn test_mm256_mask_sllv_epi32() {
50662        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50663        let count = _mm256_set1_epi32(1);
50664        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
50665        assert_eq_m256i(r, a);
50666        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
50667        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50668        assert_eq_m256i(r, e);
50669    }
50670
50671    #[simd_test(enable = "avx512f,avx512vl")]
50672    unsafe fn test_mm256_maskz_sllv_epi32() {
50673        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
50674        let count = _mm256_set1_epi32(1);
50675        let r = _mm256_maskz_sllv_epi32(0, a, count);
50676        assert_eq_m256i(r, _mm256_setzero_si256());
50677        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
50678        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
50679        assert_eq_m256i(r, e);
50680    }
50681
50682    #[simd_test(enable = "avx512f,avx512vl")]
50683    unsafe fn test_mm_mask_sllv_epi32() {
50684        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50685        let count = _mm_set1_epi32(1);
50686        let r = _mm_mask_sllv_epi32(a, 0, a, count);
50687        assert_eq_m128i(r, a);
50688        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
50689        let e = _mm_set_epi32(0, 2, 2, 2);
50690        assert_eq_m128i(r, e);
50691    }
50692
50693    #[simd_test(enable = "avx512f,avx512vl")]
50694    unsafe fn test_mm_maskz_sllv_epi32() {
50695        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
50696        let count = _mm_set1_epi32(1);
50697        let r = _mm_maskz_sllv_epi32(0, a, count);
50698        assert_eq_m128i(r, _mm_setzero_si128());
50699        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
50700        let e = _mm_set_epi32(0, 2, 2, 2);
50701        assert_eq_m128i(r, e);
50702    }
50703
50704    #[simd_test(enable = "avx512f")]
50705    unsafe fn test_mm512_srlv_epi32() {
50706        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50707        let count = _mm512_set1_epi32(1);
50708        let r = _mm512_srlv_epi32(a, count);
50709        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50710        assert_eq_m512i(r, e);
50711    }
50712
50713    #[simd_test(enable = "avx512f")]
50714    unsafe fn test_mm512_mask_srlv_epi32() {
50715        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50716        let count = _mm512_set1_epi32(1);
50717        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
50718        assert_eq_m512i(r, a);
50719        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
50720        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50721        assert_eq_m512i(r, e);
50722    }
50723
50724    #[simd_test(enable = "avx512f")]
50725    unsafe fn test_mm512_maskz_srlv_epi32() {
50726        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
50727        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
50728        let r = _mm512_maskz_srlv_epi32(0, a, count);
50729        assert_eq_m512i(r, _mm512_setzero_si512());
50730        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
50731        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
50732        assert_eq_m512i(r, e);
50733    }
50734
50735    #[simd_test(enable = "avx512f,avx512vl")]
50736    unsafe fn test_mm256_mask_srlv_epi32() {
50737        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50738        let count = _mm256_set1_epi32(1);
50739        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
50740        assert_eq_m256i(r, a);
50741        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
50742        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50743        assert_eq_m256i(r, e);
50744    }
50745
50746    #[simd_test(enable = "avx512f,avx512vl")]
50747    unsafe fn test_mm256_maskz_srlv_epi32() {
50748        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50749        let count = _mm256_set1_epi32(1);
50750        let r = _mm256_maskz_srlv_epi32(0, a, count);
50751        assert_eq_m256i(r, _mm256_setzero_si256());
50752        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
50753        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50754        assert_eq_m256i(r, e);
50755    }
50756
50757    #[simd_test(enable = "avx512f,avx512vl")]
50758    unsafe fn test_mm_mask_srlv_epi32() {
50759        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50760        let count = _mm_set1_epi32(1);
50761        let r = _mm_mask_srlv_epi32(a, 0, a, count);
50762        assert_eq_m128i(r, a);
50763        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
50764        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50765        assert_eq_m128i(r, e);
50766    }
50767
50768    #[simd_test(enable = "avx512f,avx512vl")]
50769    unsafe fn test_mm_maskz_srlv_epi32() {
50770        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50771        let count = _mm_set1_epi32(1);
50772        let r = _mm_maskz_srlv_epi32(0, a, count);
50773        assert_eq_m128i(r, _mm_setzero_si128());
50774        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
50775        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50776        assert_eq_m128i(r, e);
50777    }
50778
50779    #[simd_test(enable = "avx512f")]
50780    unsafe fn test_mm512_sll_epi32() {
50781        #[rustfmt::skip]
50782        let a = _mm512_set_epi32(
50783            1 << 31, 1 << 0, 1 << 1, 1 << 2,
50784            0, 0, 0, 0,
50785            0, 0, 0, 0,
50786            0, 0, 0, 0,
50787        );
50788        let count = _mm_set_epi32(0, 0, 0, 2);
50789        let r = _mm512_sll_epi32(a, count);
50790        #[rustfmt::skip]
50791        let e = _mm512_set_epi32(
50792            0, 1 << 2, 1 << 3, 1 << 4,
50793            0, 0, 0, 0,
50794            0, 0, 0, 0,
50795            0, 0, 0, 0,
50796        );
50797        assert_eq_m512i(r, e);
50798    }
50799
50800    #[simd_test(enable = "avx512f")]
50801    unsafe fn test_mm512_mask_sll_epi32() {
50802        #[rustfmt::skip]
50803        let a = _mm512_set_epi32(
50804            1 << 31, 1 << 0, 1 << 1, 1 << 2,
50805            0, 0, 0, 0,
50806            0, 0, 0, 0,
50807            0, 0, 0, 0,
50808        );
50809        let count = _mm_set_epi32(0, 0, 0, 2);
50810        let r = _mm512_mask_sll_epi32(a, 0, a, count);
50811        assert_eq_m512i(r, a);
50812        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
50813        #[rustfmt::skip]
50814        let e = _mm512_set_epi32(
50815            0, 1 << 2, 1 << 3, 1 << 4,
50816            0, 0, 0, 0,
50817            0, 0, 0, 0,
50818            0, 0, 0, 0,
50819        );
50820        assert_eq_m512i(r, e);
50821    }
50822
50823    #[simd_test(enable = "avx512f")]
50824    unsafe fn test_mm512_maskz_sll_epi32() {
50825        #[rustfmt::skip]
50826        let a = _mm512_set_epi32(
50827            1 << 31, 1 << 0, 1 << 1, 1 << 2,
50828            0, 0, 0, 0,
50829            0, 0, 0, 0,
50830            0, 0, 0, 1 << 31,
50831        );
50832        let count = _mm_set_epi32(2, 0, 0, 2);
50833        let r = _mm512_maskz_sll_epi32(0, a, count);
50834        assert_eq_m512i(r, _mm512_setzero_si512());
50835        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
50836        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
50837        assert_eq_m512i(r, e);
50838    }
50839
50840    #[simd_test(enable = "avx512f,avx512vl")]
50841    unsafe fn test_mm256_mask_sll_epi32() {
50842        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
50843        let count = _mm_set_epi32(0, 0, 0, 1);
50844        let r = _mm256_mask_sll_epi32(a, 0, a, count);
50845        assert_eq_m256i(r, a);
50846        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
50847        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
50848        assert_eq_m256i(r, e);
50849    }
50850
50851    #[simd_test(enable = "avx512f,avx512vl")]
50852    unsafe fn test_mm256_maskz_sll_epi32() {
50853        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
50854        let count = _mm_set_epi32(0, 0, 0, 1);
50855        let r = _mm256_maskz_sll_epi32(0, a, count);
50856        assert_eq_m256i(r, _mm256_setzero_si256());
50857        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
50858        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
50859        assert_eq_m256i(r, e);
50860    }
50861
50862    #[simd_test(enable = "avx512f,avx512vl")]
50863    unsafe fn test_mm_mask_sll_epi32() {
50864        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
50865        let count = _mm_set_epi32(0, 0, 0, 1);
50866        let r = _mm_mask_sll_epi32(a, 0, a, count);
50867        assert_eq_m128i(r, a);
50868        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
50869        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
50870        assert_eq_m128i(r, e);
50871    }
50872
50873    #[simd_test(enable = "avx512f,avx512vl")]
50874    unsafe fn test_mm_maskz_sll_epi32() {
50875        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
50876        let count = _mm_set_epi32(0, 0, 0, 1);
50877        let r = _mm_maskz_sll_epi32(0, a, count);
50878        assert_eq_m128i(r, _mm_setzero_si128());
50879        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
50880        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
50881        assert_eq_m128i(r, e);
50882    }
50883
50884    #[simd_test(enable = "avx512f")]
50885    unsafe fn test_mm512_srl_epi32() {
50886        #[rustfmt::skip]
50887        let a = _mm512_set_epi32(
50888            1 << 31, 1 << 0, 1 << 1, 1 << 2,
50889            0, 0, 0, 0,
50890            0, 0, 0, 0,
50891            0, 0, 0, 0,
50892        );
50893        let count = _mm_set_epi32(0, 0, 0, 2);
50894        let r = _mm512_srl_epi32(a, count);
50895        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
50896        assert_eq_m512i(r, e);
50897    }
50898
50899    #[simd_test(enable = "avx512f")]
50900    unsafe fn test_mm512_mask_srl_epi32() {
50901        #[rustfmt::skip]
50902        let a = _mm512_set_epi32(
50903            1 << 31, 1 << 0, 1 << 1, 1 << 2,
50904            0, 0, 0, 0,
50905            0, 0, 0, 0,
50906            0, 0, 0, 0,
50907        );
50908        let count = _mm_set_epi32(0, 0, 0, 2);
50909        let r = _mm512_mask_srl_epi32(a, 0, a, count);
50910        assert_eq_m512i(r, a);
50911        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
50912        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
50913        assert_eq_m512i(r, e);
50914    }
50915
50916    #[simd_test(enable = "avx512f")]
50917    unsafe fn test_mm512_maskz_srl_epi32() {
50918        #[rustfmt::skip]
50919        let a = _mm512_set_epi32(
50920            1 << 31, 1 << 0, 1 << 1, 1 << 2,
50921            0, 0, 0, 0,
50922            0, 0, 0, 0,
50923            0, 0, 0, 1 << 31,
50924        );
50925        let count = _mm_set_epi32(2, 0, 0, 2);
50926        let r = _mm512_maskz_srl_epi32(0, a, count);
50927        assert_eq_m512i(r, _mm512_setzero_si512());
50928        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
50929        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
50930        assert_eq_m512i(r, e);
50931    }
50932
50933    #[simd_test(enable = "avx512f,avx512vl")]
50934    unsafe fn test_mm256_mask_srl_epi32() {
50935        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50936        let count = _mm_set_epi32(0, 0, 0, 1);
50937        let r = _mm256_mask_srl_epi32(a, 0, a, count);
50938        assert_eq_m256i(r, a);
50939        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
50940        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50941        assert_eq_m256i(r, e);
50942    }
50943
50944    #[simd_test(enable = "avx512f,avx512vl")]
50945    unsafe fn test_mm256_maskz_srl_epi32() {
50946        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
50947        let count = _mm_set_epi32(0, 0, 0, 1);
50948        let r = _mm256_maskz_srl_epi32(0, a, count);
50949        assert_eq_m256i(r, _mm256_setzero_si256());
50950        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
50951        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
50952        assert_eq_m256i(r, e);
50953    }
50954
50955    #[simd_test(enable = "avx512f,avx512vl")]
50956    unsafe fn test_mm_mask_srl_epi32() {
50957        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50958        let count = _mm_set_epi32(0, 0, 0, 1);
50959        let r = _mm_mask_srl_epi32(a, 0, a, count);
50960        assert_eq_m128i(r, a);
50961        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
50962        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50963        assert_eq_m128i(r, e);
50964    }
50965
50966    #[simd_test(enable = "avx512f,avx512vl")]
50967    unsafe fn test_mm_maskz_srl_epi32() {
50968        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
50969        let count = _mm_set_epi32(0, 0, 0, 1);
50970        let r = _mm_maskz_srl_epi32(0, a, count);
50971        assert_eq_m128i(r, _mm_setzero_si128());
50972        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
50973        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
50974        assert_eq_m128i(r, e);
50975    }
50976
50977    #[simd_test(enable = "avx512f")]
50978    unsafe fn test_mm512_sra_epi32() {
50979        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
50980        let count = _mm_set_epi32(1, 0, 0, 2);
50981        let r = _mm512_sra_epi32(a, count);
50982        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
50983        assert_eq_m512i(r, e);
50984    }
50985
50986    #[simd_test(enable = "avx512f")]
50987    unsafe fn test_mm512_mask_sra_epi32() {
50988        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
50989        let count = _mm_set_epi32(0, 0, 0, 2);
50990        let r = _mm512_mask_sra_epi32(a, 0, a, count);
50991        assert_eq_m512i(r, a);
50992        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
50993        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
50994        assert_eq_m512i(r, e);
50995    }
50996
50997    #[simd_test(enable = "avx512f")]
50998    unsafe fn test_mm512_maskz_sra_epi32() {
50999        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
51000        let count = _mm_set_epi32(2, 0, 0, 2);
51001        let r = _mm512_maskz_sra_epi32(0, a, count);
51002        assert_eq_m512i(r, _mm512_setzero_si512());
51003        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
51004        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
51005        assert_eq_m512i(r, e);
51006    }
51007
51008    #[simd_test(enable = "avx512f,avx512vl")]
51009    unsafe fn test_mm256_mask_sra_epi32() {
51010        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51011        let count = _mm_set_epi32(0, 0, 0, 1);
51012        let r = _mm256_mask_sra_epi32(a, 0, a, count);
51013        assert_eq_m256i(r, a);
51014        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
51015        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51016        assert_eq_m256i(r, e);
51017    }
51018
51019    #[simd_test(enable = "avx512f,avx512vl")]
51020    unsafe fn test_mm256_maskz_sra_epi32() {
51021        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51022        let count = _mm_set_epi32(0, 0, 0, 1);
51023        let r = _mm256_maskz_sra_epi32(0, a, count);
51024        assert_eq_m256i(r, _mm256_setzero_si256());
51025        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
51026        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51027        assert_eq_m256i(r, e);
51028    }
51029
51030    #[simd_test(enable = "avx512f,avx512vl")]
51031    unsafe fn test_mm_mask_sra_epi32() {
51032        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51033        let count = _mm_set_epi32(0, 0, 0, 1);
51034        let r = _mm_mask_sra_epi32(a, 0, a, count);
51035        assert_eq_m128i(r, a);
51036        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
51037        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51038        assert_eq_m128i(r, e);
51039    }
51040
51041    #[simd_test(enable = "avx512f,avx512vl")]
51042    unsafe fn test_mm_maskz_sra_epi32() {
51043        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51044        let count = _mm_set_epi32(0, 0, 0, 1);
51045        let r = _mm_maskz_sra_epi32(0, a, count);
51046        assert_eq_m128i(r, _mm_setzero_si128());
51047        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
51048        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51049        assert_eq_m128i(r, e);
51050    }
51051
51052    #[simd_test(enable = "avx512f")]
51053    unsafe fn test_mm512_srav_epi32() {
51054        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51055        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
51056        let r = _mm512_srav_epi32(a, count);
51057        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51058        assert_eq_m512i(r, e);
51059    }
51060
51061    #[simd_test(enable = "avx512f")]
51062    unsafe fn test_mm512_mask_srav_epi32() {
51063        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
51064        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
51065        let r = _mm512_mask_srav_epi32(a, 0, a, count);
51066        assert_eq_m512i(r, a);
51067        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
51068        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
51069        assert_eq_m512i(r, e);
51070    }
51071
51072    #[simd_test(enable = "avx512f")]
51073    unsafe fn test_mm512_maskz_srav_epi32() {
51074        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
51075        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
51076        let r = _mm512_maskz_srav_epi32(0, a, count);
51077        assert_eq_m512i(r, _mm512_setzero_si512());
51078        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
51079        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
51080        assert_eq_m512i(r, e);
51081    }
51082
51083    #[simd_test(enable = "avx512f,avx512vl")]
51084    unsafe fn test_mm256_mask_srav_epi32() {
51085        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51086        let count = _mm256_set1_epi32(1);
51087        let r = _mm256_mask_srav_epi32(a, 0, a, count);
51088        assert_eq_m256i(r, a);
51089        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
51090        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51091        assert_eq_m256i(r, e);
51092    }
51093
51094    #[simd_test(enable = "avx512f,avx512vl")]
51095    unsafe fn test_mm256_maskz_srav_epi32() {
51096        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51097        let count = _mm256_set1_epi32(1);
51098        let r = _mm256_maskz_srav_epi32(0, a, count);
51099        assert_eq_m256i(r, _mm256_setzero_si256());
51100        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
51101        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51102        assert_eq_m256i(r, e);
51103    }
51104
51105    #[simd_test(enable = "avx512f,avx512vl")]
51106    unsafe fn test_mm_mask_srav_epi32() {
51107        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51108        let count = _mm_set1_epi32(1);
51109        let r = _mm_mask_srav_epi32(a, 0, a, count);
51110        assert_eq_m128i(r, a);
51111        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
51112        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51113        assert_eq_m128i(r, e);
51114    }
51115
51116    #[simd_test(enable = "avx512f,avx512vl")]
51117    unsafe fn test_mm_maskz_srav_epi32() {
51118        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51119        let count = _mm_set1_epi32(1);
51120        let r = _mm_maskz_srav_epi32(0, a, count);
51121        assert_eq_m128i(r, _mm_setzero_si128());
51122        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
51123        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51124        assert_eq_m128i(r, e);
51125    }
51126
51127    #[simd_test(enable = "avx512f")]
51128    unsafe fn test_mm512_srai_epi32() {
51129        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
51130        let r = _mm512_srai_epi32::<2>(a);
51131        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
51132        assert_eq_m512i(r, e);
51133    }
51134
51135    #[simd_test(enable = "avx512f")]
51136    unsafe fn test_mm512_mask_srai_epi32() {
51137        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
51138        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
51139        assert_eq_m512i(r, a);
51140        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
51141        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
51142        assert_eq_m512i(r, e);
51143    }
51144
51145    #[simd_test(enable = "avx512f")]
51146    unsafe fn test_mm512_maskz_srai_epi32() {
51147        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
51148        let r = _mm512_maskz_srai_epi32::<2>(0, a);
51149        assert_eq_m512i(r, _mm512_setzero_si512());
51150        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
51151        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
51152        assert_eq_m512i(r, e);
51153    }
51154
51155    #[simd_test(enable = "avx512f,avx512vl")]
51156    unsafe fn test_mm256_mask_srai_epi32() {
51157        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51158        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
51159        assert_eq_m256i(r, a);
51160        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
51161        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51162        assert_eq_m256i(r, e);
51163    }
51164
51165    #[simd_test(enable = "avx512f,avx512vl")]
51166    unsafe fn test_mm256_maskz_srai_epi32() {
51167        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
51168        let r = _mm256_maskz_srai_epi32::<1>(0, a);
51169        assert_eq_m256i(r, _mm256_setzero_si256());
51170        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
51171        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
51172        assert_eq_m256i(r, e);
51173    }
51174
51175    #[simd_test(enable = "avx512f,avx512vl")]
51176    unsafe fn test_mm_mask_srai_epi32() {
51177        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51178        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
51179        assert_eq_m128i(r, a);
51180        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
51181        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51182        assert_eq_m128i(r, e);
51183    }
51184
51185    #[simd_test(enable = "avx512f,avx512vl")]
51186    unsafe fn test_mm_maskz_srai_epi32() {
51187        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
51188        let r = _mm_maskz_srai_epi32::<1>(0, a);
51189        assert_eq_m128i(r, _mm_setzero_si128());
51190        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
51191        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
51192        assert_eq_m128i(r, e);
51193    }
51194
51195    #[simd_test(enable = "avx512f")]
51196    unsafe fn test_mm512_permute_ps() {
51197        let a = _mm512_setr_ps(
51198            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51199        );
51200        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
51201        let e = _mm512_setr_ps(
51202            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
51203        );
51204        assert_eq_m512(r, e);
51205    }
51206
51207    #[simd_test(enable = "avx512f")]
51208    unsafe fn test_mm512_mask_permute_ps() {
51209        let a = _mm512_setr_ps(
51210            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51211        );
51212        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
51213        assert_eq_m512(r, a);
51214        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
51215        let e = _mm512_setr_ps(
51216            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
51217        );
51218        assert_eq_m512(r, e);
51219    }
51220
51221    #[simd_test(enable = "avx512f")]
51222    unsafe fn test_mm512_maskz_permute_ps() {
51223        let a = _mm512_setr_ps(
51224            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51225        );
51226        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
51227        assert_eq_m512(r, _mm512_setzero_ps());
51228        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
51229        let e = _mm512_setr_ps(
51230            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
51231        );
51232        assert_eq_m512(r, e);
51233    }
51234
51235    #[simd_test(enable = "avx512f,avx512vl")]
51236    unsafe fn test_mm256_mask_permute_ps() {
51237        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51238        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
51239        assert_eq_m256(r, a);
51240        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
51241        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
51242        assert_eq_m256(r, e);
51243    }
51244
51245    #[simd_test(enable = "avx512f,avx512vl")]
51246    unsafe fn test_mm256_maskz_permute_ps() {
51247        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51248        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
51249        assert_eq_m256(r, _mm256_setzero_ps());
51250        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
51251        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
51252        assert_eq_m256(r, e);
51253    }
51254
51255    #[simd_test(enable = "avx512f,avx512vl")]
51256    unsafe fn test_mm_mask_permute_ps() {
51257        let a = _mm_set_ps(0., 1., 2., 3.);
51258        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
51259        assert_eq_m128(r, a);
51260        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
51261        let e = _mm_set_ps(0., 0., 0., 0.);
51262        assert_eq_m128(r, e);
51263    }
51264
51265    #[simd_test(enable = "avx512f,avx512vl")]
51266    unsafe fn test_mm_maskz_permute_ps() {
51267        let a = _mm_set_ps(0., 1., 2., 3.);
51268        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
51269        assert_eq_m128(r, _mm_setzero_ps());
51270        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
51271        let e = _mm_set_ps(0., 0., 0., 0.);
51272        assert_eq_m128(r, e);
51273    }
51274
51275    #[simd_test(enable = "avx512f")]
51276    unsafe fn test_mm512_permutevar_epi32() {
51277        let idx = _mm512_set1_epi32(1);
51278        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51279        let r = _mm512_permutevar_epi32(idx, a);
51280        let e = _mm512_set1_epi32(14);
51281        assert_eq_m512i(r, e);
51282    }
51283
51284    #[simd_test(enable = "avx512f")]
51285    unsafe fn test_mm512_mask_permutevar_epi32() {
51286        let idx = _mm512_set1_epi32(1);
51287        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51288        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
51289        assert_eq_m512i(r, a);
51290        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
51291        let e = _mm512_set1_epi32(14);
51292        assert_eq_m512i(r, e);
51293    }
51294
51295    #[simd_test(enable = "avx512f")]
51296    unsafe fn test_mm512_permutevar_ps() {
51297        let a = _mm512_set_ps(
51298            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51299        );
51300        let b = _mm512_set1_epi32(0b01);
51301        let r = _mm512_permutevar_ps(a, b);
51302        let e = _mm512_set_ps(
51303            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
51304        );
51305        assert_eq_m512(r, e);
51306    }
51307
51308    #[simd_test(enable = "avx512f")]
51309    unsafe fn test_mm512_mask_permutevar_ps() {
51310        let a = _mm512_set_ps(
51311            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51312        );
51313        let b = _mm512_set1_epi32(0b01);
51314        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
51315        assert_eq_m512(r, a);
51316        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
51317        let e = _mm512_set_ps(
51318            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
51319        );
51320        assert_eq_m512(r, e);
51321    }
51322
51323    #[simd_test(enable = "avx512f")]
51324    unsafe fn test_mm512_maskz_permutevar_ps() {
51325        let a = _mm512_set_ps(
51326            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51327        );
51328        let b = _mm512_set1_epi32(0b01);
51329        let r = _mm512_maskz_permutevar_ps(0, a, b);
51330        assert_eq_m512(r, _mm512_setzero_ps());
51331        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
51332        let e = _mm512_set_ps(
51333            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
51334        );
51335        assert_eq_m512(r, e);
51336    }
51337
51338    #[simd_test(enable = "avx512f,avx512vl")]
51339    unsafe fn test_mm256_mask_permutevar_ps() {
51340        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51341        let b = _mm256_set1_epi32(0b01);
51342        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
51343        assert_eq_m256(r, a);
51344        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
51345        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
51346        assert_eq_m256(r, e);
51347    }
51348
51349    #[simd_test(enable = "avx512f,avx512vl")]
51350    unsafe fn test_mm256_maskz_permutevar_ps() {
51351        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51352        let b = _mm256_set1_epi32(0b01);
51353        let r = _mm256_maskz_permutevar_ps(0, a, b);
51354        assert_eq_m256(r, _mm256_setzero_ps());
51355        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
51356        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
51357        assert_eq_m256(r, e);
51358    }
51359
51360    #[simd_test(enable = "avx512f,avx512vl")]
51361    unsafe fn test_mm_mask_permutevar_ps() {
51362        let a = _mm_set_ps(0., 1., 2., 3.);
51363        let b = _mm_set1_epi32(0b01);
51364        let r = _mm_mask_permutevar_ps(a, 0, a, b);
51365        assert_eq_m128(r, a);
51366        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
51367        let e = _mm_set_ps(2., 2., 2., 2.);
51368        assert_eq_m128(r, e);
51369    }
51370
51371    #[simd_test(enable = "avx512f,avx512vl")]
51372    unsafe fn test_mm_maskz_permutevar_ps() {
51373        let a = _mm_set_ps(0., 1., 2., 3.);
51374        let b = _mm_set1_epi32(0b01);
51375        let r = _mm_maskz_permutevar_ps(0, a, b);
51376        assert_eq_m128(r, _mm_setzero_ps());
51377        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
51378        let e = _mm_set_ps(2., 2., 2., 2.);
51379        assert_eq_m128(r, e);
51380    }
51381
51382    #[simd_test(enable = "avx512f")]
51383    unsafe fn test_mm512_permutexvar_epi32() {
51384        let idx = _mm512_set1_epi32(1);
51385        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51386        let r = _mm512_permutexvar_epi32(idx, a);
51387        let e = _mm512_set1_epi32(14);
51388        assert_eq_m512i(r, e);
51389    }
51390
51391    #[simd_test(enable = "avx512f")]
51392    unsafe fn test_mm512_mask_permutexvar_epi32() {
51393        let idx = _mm512_set1_epi32(1);
51394        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51395        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
51396        assert_eq_m512i(r, a);
51397        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
51398        let e = _mm512_set1_epi32(14);
51399        assert_eq_m512i(r, e);
51400    }
51401
51402    #[simd_test(enable = "avx512f")]
51403    unsafe fn test_mm512_maskz_permutexvar_epi32() {
51404        let idx = _mm512_set1_epi32(1);
51405        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51406        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
51407        assert_eq_m512i(r, _mm512_setzero_si512());
51408        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
51409        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
51410        assert_eq_m512i(r, e);
51411    }
51412
51413    #[simd_test(enable = "avx512f,avx512vl")]
51414    unsafe fn test_mm256_permutexvar_epi32() {
51415        let idx = _mm256_set1_epi32(1);
51416        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51417        let r = _mm256_permutexvar_epi32(idx, a);
51418        let e = _mm256_set1_epi32(6);
51419        assert_eq_m256i(r, e);
51420    }
51421
51422    #[simd_test(enable = "avx512f,avx512vl")]
51423    unsafe fn test_mm256_mask_permutexvar_epi32() {
51424        let idx = _mm256_set1_epi32(1);
51425        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51426        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
51427        assert_eq_m256i(r, a);
51428        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
51429        let e = _mm256_set1_epi32(6);
51430        assert_eq_m256i(r, e);
51431    }
51432
51433    #[simd_test(enable = "avx512f,avx512vl")]
51434    unsafe fn test_mm256_maskz_permutexvar_epi32() {
51435        let idx = _mm256_set1_epi32(1);
51436        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51437        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
51438        assert_eq_m256i(r, _mm256_setzero_si256());
51439        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
51440        let e = _mm256_set1_epi32(6);
51441        assert_eq_m256i(r, e);
51442    }
51443
51444    #[simd_test(enable = "avx512f")]
51445    unsafe fn test_mm512_permutexvar_ps() {
51446        let idx = _mm512_set1_epi32(1);
51447        let a = _mm512_set_ps(
51448            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51449        );
51450        let r = _mm512_permutexvar_ps(idx, a);
51451        let e = _mm512_set1_ps(14.);
51452        assert_eq_m512(r, e);
51453    }
51454
51455    #[simd_test(enable = "avx512f")]
51456    unsafe fn test_mm512_mask_permutexvar_ps() {
51457        let idx = _mm512_set1_epi32(1);
51458        let a = _mm512_set_ps(
51459            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51460        );
51461        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
51462        assert_eq_m512(r, a);
51463        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
51464        let e = _mm512_set1_ps(14.);
51465        assert_eq_m512(r, e);
51466    }
51467
51468    #[simd_test(enable = "avx512f")]
51469    unsafe fn test_mm512_maskz_permutexvar_ps() {
51470        let idx = _mm512_set1_epi32(1);
51471        let a = _mm512_set_ps(
51472            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51473        );
51474        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
51475        assert_eq_m512(r, _mm512_setzero_ps());
51476        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
51477        let e = _mm512_set_ps(
51478            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
51479        );
51480        assert_eq_m512(r, e);
51481    }
51482
51483    #[simd_test(enable = "avx512f,avx512vl")]
51484    unsafe fn test_mm256_permutexvar_ps() {
51485        let idx = _mm256_set1_epi32(1);
51486        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51487        let r = _mm256_permutexvar_ps(idx, a);
51488        let e = _mm256_set1_ps(6.);
51489        assert_eq_m256(r, e);
51490    }
51491
51492    #[simd_test(enable = "avx512f,avx512vl")]
51493    unsafe fn test_mm256_mask_permutexvar_ps() {
51494        let idx = _mm256_set1_epi32(1);
51495        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51496        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
51497        assert_eq_m256(r, a);
51498        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
51499        let e = _mm256_set1_ps(6.);
51500        assert_eq_m256(r, e);
51501    }
51502
51503    #[simd_test(enable = "avx512f,avx512vl")]
51504    unsafe fn test_mm256_maskz_permutexvar_ps() {
51505        let idx = _mm256_set1_epi32(1);
51506        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51507        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
51508        assert_eq_m256(r, _mm256_setzero_ps());
51509        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
51510        let e = _mm256_set1_ps(6.);
51511        assert_eq_m256(r, e);
51512    }
51513
51514    #[simd_test(enable = "avx512f")]
51515    unsafe fn test_mm512_permutex2var_epi32() {
51516        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51517        #[rustfmt::skip]
51518        let idx = _mm512_set_epi32(
51519            1, 1 << 4, 2, 1 << 4,
51520            3, 1 << 4, 4, 1 << 4,
51521            5, 1 << 4, 6, 1 << 4,
51522            7, 1 << 4, 8, 1 << 4,
51523        );
51524        let b = _mm512_set1_epi32(100);
51525        let r = _mm512_permutex2var_epi32(a, idx, b);
51526        let e = _mm512_set_epi32(
51527            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
51528        );
51529        assert_eq_m512i(r, e);
51530    }
51531
51532    #[simd_test(enable = "avx512f")]
51533    unsafe fn test_mm512_mask_permutex2var_epi32() {
51534        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51535        #[rustfmt::skip]
51536        let idx = _mm512_set_epi32(
51537            1, 1 << 4, 2, 1 << 4,
51538            3, 1 << 4, 4, 1 << 4,
51539            5, 1 << 4, 6, 1 << 4,
51540            7, 1 << 4, 8, 1 << 4,
51541        );
51542        let b = _mm512_set1_epi32(100);
51543        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
51544        assert_eq_m512i(r, a);
51545        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
51546        let e = _mm512_set_epi32(
51547            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
51548        );
51549        assert_eq_m512i(r, e);
51550    }
51551
51552    #[simd_test(enable = "avx512f")]
51553    unsafe fn test_mm512_maskz_permutex2var_epi32() {
51554        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51555        #[rustfmt::skip]
51556        let idx = _mm512_set_epi32(
51557            1, 1 << 4, 2, 1 << 4,
51558            3, 1 << 4, 4, 1 << 4,
51559            5, 1 << 4, 6, 1 << 4,
51560            7, 1 << 4, 8, 1 << 4,
51561        );
51562        let b = _mm512_set1_epi32(100);
51563        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
51564        assert_eq_m512i(r, _mm512_setzero_si512());
51565        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
51566        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
51567        assert_eq_m512i(r, e);
51568    }
51569
51570    #[simd_test(enable = "avx512f")]
51571    unsafe fn test_mm512_mask2_permutex2var_epi32() {
51572        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
51573        #[rustfmt::skip]
51574        let idx = _mm512_set_epi32(
51575            1000, 1 << 4, 2000, 1 << 4,
51576            3000, 1 << 4, 4000, 1 << 4,
51577            5, 1 << 4, 6, 1 << 4,
51578            7, 1 << 4, 8, 1 << 4,
51579        );
51580        let b = _mm512_set1_epi32(100);
51581        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
51582        assert_eq_m512i(r, idx);
51583        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
51584        #[rustfmt::skip]
51585        let e = _mm512_set_epi32(
51586            1000, 1 << 4, 2000, 1 << 4,
51587            3000, 1 << 4, 4000, 1 << 4,
51588            10, 100, 9, 100,
51589            8, 100, 7, 100,
51590        );
51591        assert_eq_m512i(r, e);
51592    }
51593
51594    #[simd_test(enable = "avx512f,avx512vl")]
51595    unsafe fn test_mm256_permutex2var_epi32() {
51596        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51597        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51598        let b = _mm256_set1_epi32(100);
51599        let r = _mm256_permutex2var_epi32(a, idx, b);
51600        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51601        assert_eq_m256i(r, e);
51602    }
51603
51604    #[simd_test(enable = "avx512f,avx512vl")]
51605    unsafe fn test_mm256_mask_permutex2var_epi32() {
51606        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51607        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51608        let b = _mm256_set1_epi32(100);
51609        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
51610        assert_eq_m256i(r, a);
51611        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
51612        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51613        assert_eq_m256i(r, e);
51614    }
51615
51616    #[simd_test(enable = "avx512f,avx512vl")]
51617    unsafe fn test_mm256_maskz_permutex2var_epi32() {
51618        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51619        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51620        let b = _mm256_set1_epi32(100);
51621        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
51622        assert_eq_m256i(r, _mm256_setzero_si256());
51623        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
51624        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51625        assert_eq_m256i(r, e);
51626    }
51627
51628    #[simd_test(enable = "avx512f,avx512vl")]
51629    unsafe fn test_mm256_mask2_permutex2var_epi32() {
51630        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
51631        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51632        let b = _mm256_set1_epi32(100);
51633        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
51634        assert_eq_m256i(r, idx);
51635        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
51636        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
51637        assert_eq_m256i(r, e);
51638    }
51639
51640    #[simd_test(enable = "avx512f,avx512vl")]
51641    unsafe fn test_mm_permutex2var_epi32() {
51642        let a = _mm_set_epi32(0, 1, 2, 3);
51643        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51644        let b = _mm_set1_epi32(100);
51645        let r = _mm_permutex2var_epi32(a, idx, b);
51646        let e = _mm_set_epi32(2, 100, 1, 100);
51647        assert_eq_m128i(r, e);
51648    }
51649
51650    #[simd_test(enable = "avx512f,avx512vl")]
51651    unsafe fn test_mm_mask_permutex2var_epi32() {
51652        let a = _mm_set_epi32(0, 1, 2, 3);
51653        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51654        let b = _mm_set1_epi32(100);
51655        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
51656        assert_eq_m128i(r, a);
51657        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
51658        let e = _mm_set_epi32(2, 100, 1, 100);
51659        assert_eq_m128i(r, e);
51660    }
51661
51662    #[simd_test(enable = "avx512f,avx512vl")]
51663    unsafe fn test_mm_maskz_permutex2var_epi32() {
51664        let a = _mm_set_epi32(0, 1, 2, 3);
51665        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51666        let b = _mm_set1_epi32(100);
51667        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
51668        assert_eq_m128i(r, _mm_setzero_si128());
51669        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
51670        let e = _mm_set_epi32(2, 100, 1, 100);
51671        assert_eq_m128i(r, e);
51672    }
51673
51674    #[simd_test(enable = "avx512f,avx512vl")]
51675    unsafe fn test_mm_mask2_permutex2var_epi32() {
51676        let a = _mm_set_epi32(0, 1, 2, 3);
51677        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51678        let b = _mm_set1_epi32(100);
51679        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
51680        assert_eq_m128i(r, idx);
51681        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
51682        let e = _mm_set_epi32(2, 100, 1, 100);
51683        assert_eq_m128i(r, e);
51684    }
51685
51686    #[simd_test(enable = "avx512f")]
51687    unsafe fn test_mm512_permutex2var_ps() {
51688        let a = _mm512_set_ps(
51689            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51690        );
51691        #[rustfmt::skip]
51692        let idx = _mm512_set_epi32(
51693            1, 1 << 4, 2, 1 << 4,
51694            3, 1 << 4, 4, 1 << 4,
51695            5, 1 << 4, 6, 1 << 4,
51696            7, 1 << 4, 8, 1 << 4,
51697        );
51698        let b = _mm512_set1_ps(100.);
51699        let r = _mm512_permutex2var_ps(a, idx, b);
51700        let e = _mm512_set_ps(
51701            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
51702        );
51703        assert_eq_m512(r, e);
51704    }
51705
51706    #[simd_test(enable = "avx512f")]
51707    unsafe fn test_mm512_mask_permutex2var_ps() {
51708        let a = _mm512_set_ps(
51709            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51710        );
51711        #[rustfmt::skip]
51712        let idx = _mm512_set_epi32(
51713            1, 1 << 4, 2, 1 << 4,
51714            3, 1 << 4, 4, 1 << 4,
51715            5, 1 << 4, 6, 1 << 4,
51716            7, 1 << 4, 8, 1 << 4,
51717        );
51718        let b = _mm512_set1_ps(100.);
51719        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
51720        assert_eq_m512(r, a);
51721        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
51722        let e = _mm512_set_ps(
51723            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
51724        );
51725        assert_eq_m512(r, e);
51726    }
51727
51728    #[simd_test(enable = "avx512f")]
51729    unsafe fn test_mm512_maskz_permutex2var_ps() {
51730        let a = _mm512_set_ps(
51731            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51732        );
51733        #[rustfmt::skip]
51734        let idx = _mm512_set_epi32(
51735            1, 1 << 4, 2, 1 << 4,
51736            3, 1 << 4, 4, 1 << 4,
51737            5, 1 << 4, 6, 1 << 4,
51738            7, 1 << 4, 8, 1 << 4,
51739        );
51740        let b = _mm512_set1_ps(100.);
51741        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
51742        assert_eq_m512(r, _mm512_setzero_ps());
51743        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
51744        let e = _mm512_set_ps(
51745            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
51746        );
51747        assert_eq_m512(r, e);
51748    }
51749
51750    #[simd_test(enable = "avx512f")]
51751    unsafe fn test_mm512_mask2_permutex2var_ps() {
51752        let a = _mm512_set_ps(
51753            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51754        );
51755        #[rustfmt::skip]
51756        let idx = _mm512_set_epi32(
51757            1, 1 << 4, 2, 1 << 4,
51758            3, 1 << 4, 4, 1 << 4,
51759            5, 1 << 4, 6, 1 << 4,
51760            7, 1 << 4, 8, 1 << 4,
51761        );
51762        let b = _mm512_set1_ps(100.);
51763        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
51764        assert_eq_m512(r, _mm512_castsi512_ps(idx));
51765        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
51766        let e = _mm512_set_ps(
51767            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
51768        );
51769        assert_eq_m512(r, e);
51770    }
51771
51772    #[simd_test(enable = "avx512f,avx512vl")]
51773    unsafe fn test_mm256_permutex2var_ps() {
51774        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51775        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51776        let b = _mm256_set1_ps(100.);
51777        let r = _mm256_permutex2var_ps(a, idx, b);
51778        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51779        assert_eq_m256(r, e);
51780    }
51781
51782    #[simd_test(enable = "avx512f,avx512vl")]
51783    unsafe fn test_mm256_mask_permutex2var_ps() {
51784        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51785        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51786        let b = _mm256_set1_ps(100.);
51787        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
51788        assert_eq_m256(r, a);
51789        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
51790        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51791        assert_eq_m256(r, e);
51792    }
51793
51794    #[simd_test(enable = "avx512f,avx512vl")]
51795    unsafe fn test_mm256_maskz_permutex2var_ps() {
51796        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51797        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51798        let b = _mm256_set1_ps(100.);
51799        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
51800        assert_eq_m256(r, _mm256_setzero_ps());
51801        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
51802        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51803        assert_eq_m256(r, e);
51804    }
51805
51806    #[simd_test(enable = "avx512f,avx512vl")]
51807    unsafe fn test_mm256_mask2_permutex2var_ps() {
51808        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
51809        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
51810        let b = _mm256_set1_ps(100.);
51811        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
51812        assert_eq_m256(r, _mm256_castsi256_ps(idx));
51813        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
51814        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
51815        assert_eq_m256(r, e);
51816    }
51817
51818    #[simd_test(enable = "avx512f,avx512vl")]
51819    unsafe fn test_mm_permutex2var_ps() {
51820        let a = _mm_set_ps(0., 1., 2., 3.);
51821        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51822        let b = _mm_set1_ps(100.);
51823        let r = _mm_permutex2var_ps(a, idx, b);
51824        let e = _mm_set_ps(2., 100., 1., 100.);
51825        assert_eq_m128(r, e);
51826    }
51827
51828    #[simd_test(enable = "avx512f,avx512vl")]
51829    unsafe fn test_mm_mask_permutex2var_ps() {
51830        let a = _mm_set_ps(0., 1., 2., 3.);
51831        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51832        let b = _mm_set1_ps(100.);
51833        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
51834        assert_eq_m128(r, a);
51835        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
51836        let e = _mm_set_ps(2., 100., 1., 100.);
51837        assert_eq_m128(r, e);
51838    }
51839
51840    #[simd_test(enable = "avx512f,avx512vl")]
51841    unsafe fn test_mm_maskz_permutex2var_ps() {
51842        let a = _mm_set_ps(0., 1., 2., 3.);
51843        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51844        let b = _mm_set1_ps(100.);
51845        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
51846        assert_eq_m128(r, _mm_setzero_ps());
51847        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
51848        let e = _mm_set_ps(2., 100., 1., 100.);
51849        assert_eq_m128(r, e);
51850    }
51851
51852    #[simd_test(enable = "avx512f,avx512vl")]
51853    unsafe fn test_mm_mask2_permutex2var_ps() {
51854        let a = _mm_set_ps(0., 1., 2., 3.);
51855        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
51856        let b = _mm_set1_ps(100.);
51857        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
51858        assert_eq_m128(r, _mm_castsi128_ps(idx));
51859        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
51860        let e = _mm_set_ps(2., 100., 1., 100.);
51861        assert_eq_m128(r, e);
51862    }
51863
51864    #[simd_test(enable = "avx512f")]
51865    unsafe fn test_mm512_shuffle_epi32() {
51866        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
51867        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
51868        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
51869        assert_eq_m512i(r, e);
51870    }
51871
51872    #[simd_test(enable = "avx512f")]
51873    unsafe fn test_mm512_mask_shuffle_epi32() {
51874        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
51875        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
51876        assert_eq_m512i(r, a);
51877        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
51878        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
51879        assert_eq_m512i(r, e);
51880    }
51881
51882    #[simd_test(enable = "avx512f")]
51883    unsafe fn test_mm512_maskz_shuffle_epi32() {
51884        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
51885        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
51886        assert_eq_m512i(r, _mm512_setzero_si512());
51887        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
51888        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
51889        assert_eq_m512i(r, e);
51890    }
51891
51892    #[simd_test(enable = "avx512f,avx512vl")]
51893    unsafe fn test_mm256_mask_shuffle_epi32() {
51894        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
51895        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
51896        assert_eq_m256i(r, a);
51897        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
51898        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
51899        assert_eq_m256i(r, e);
51900    }
51901
51902    #[simd_test(enable = "avx512f,avx512vl")]
51903    unsafe fn test_mm256_maskz_shuffle_epi32() {
51904        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
51905        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
51906        assert_eq_m256i(r, _mm256_setzero_si256());
51907        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
51908        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
51909        assert_eq_m256i(r, e);
51910    }
51911
51912    #[simd_test(enable = "avx512f,avx512vl")]
51913    unsafe fn test_mm_mask_shuffle_epi32() {
51914        let a = _mm_set_epi32(1, 4, 5, 8);
51915        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
51916        assert_eq_m128i(r, a);
51917        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
51918        let e = _mm_set_epi32(8, 8, 1, 1);
51919        assert_eq_m128i(r, e);
51920    }
51921
51922    #[simd_test(enable = "avx512f,avx512vl")]
51923    unsafe fn test_mm_maskz_shuffle_epi32() {
51924        let a = _mm_set_epi32(1, 4, 5, 8);
51925        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
51926        assert_eq_m128i(r, _mm_setzero_si128());
51927        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
51928        let e = _mm_set_epi32(8, 8, 1, 1);
51929        assert_eq_m128i(r, e);
51930    }
51931
51932    #[simd_test(enable = "avx512f")]
51933    unsafe fn test_mm512_shuffle_ps() {
51934        let a = _mm512_setr_ps(
51935            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
51936        );
51937        let b = _mm512_setr_ps(
51938            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
51939        );
51940        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
51941        let e = _mm512_setr_ps(
51942            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
51943        );
51944        assert_eq_m512(r, e);
51945    }
51946
51947    #[simd_test(enable = "avx512f")]
51948    unsafe fn test_mm512_mask_shuffle_ps() {
51949        let a = _mm512_setr_ps(
51950            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
51951        );
51952        let b = _mm512_setr_ps(
51953            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
51954        );
51955        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
51956        assert_eq_m512(r, a);
51957        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
51958        let e = _mm512_setr_ps(
51959            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
51960        );
51961        assert_eq_m512(r, e);
51962    }
51963
51964    #[simd_test(enable = "avx512f")]
51965    unsafe fn test_mm512_maskz_shuffle_ps() {
51966        let a = _mm512_setr_ps(
51967            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
51968        );
51969        let b = _mm512_setr_ps(
51970            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
51971        );
51972        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
51973        assert_eq_m512(r, _mm512_setzero_ps());
51974        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
51975        let e = _mm512_setr_ps(
51976            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
51977        );
51978        assert_eq_m512(r, e);
51979    }
51980
51981    #[simd_test(enable = "avx512f,avx512vl")]
51982    unsafe fn test_mm256_mask_shuffle_ps() {
51983        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
51984        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
51985        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
51986        assert_eq_m256(r, a);
51987        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
51988        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
51989        assert_eq_m256(r, e);
51990    }
51991
51992    #[simd_test(enable = "avx512f,avx512vl")]
51993    unsafe fn test_mm256_maskz_shuffle_ps() {
51994        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
51995        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
51996        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
51997        assert_eq_m256(r, _mm256_setzero_ps());
51998        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
51999        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
52000        assert_eq_m256(r, e);
52001    }
52002
52003    #[simd_test(enable = "avx512f,avx512vl")]
52004    unsafe fn test_mm_mask_shuffle_ps() {
52005        let a = _mm_set_ps(1., 4., 5., 8.);
52006        let b = _mm_set_ps(2., 3., 6., 7.);
52007        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
52008        assert_eq_m128(r, a);
52009        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
52010        let e = _mm_set_ps(7., 7., 1., 1.);
52011        assert_eq_m128(r, e);
52012    }
52013
52014    #[simd_test(enable = "avx512f,avx512vl")]
52015    unsafe fn test_mm_maskz_shuffle_ps() {
52016        let a = _mm_set_ps(1., 4., 5., 8.);
52017        let b = _mm_set_ps(2., 3., 6., 7.);
52018        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
52019        assert_eq_m128(r, _mm_setzero_ps());
52020        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
52021        let e = _mm_set_ps(7., 7., 1., 1.);
52022        assert_eq_m128(r, e);
52023    }
52024
52025    #[simd_test(enable = "avx512f")]
52026    unsafe fn test_mm512_shuffle_i32x4() {
52027        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
52028        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
52029        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
52030        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
52031        assert_eq_m512i(r, e);
52032    }
52033
52034    #[simd_test(enable = "avx512f")]
52035    unsafe fn test_mm512_mask_shuffle_i32x4() {
52036        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
52037        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
52038        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
52039        assert_eq_m512i(r, a);
52040        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
52041        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
52042        assert_eq_m512i(r, e);
52043    }
52044
52045    #[simd_test(enable = "avx512f")]
52046    unsafe fn test_mm512_maskz_shuffle_i32x4() {
52047        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
52048        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
52049        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
52050        assert_eq_m512i(r, _mm512_setzero_si512());
52051        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
52052        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
52053        assert_eq_m512i(r, e);
52054    }
52055
52056    #[simd_test(enable = "avx512f,avx512vl")]
52057    unsafe fn test_mm256_shuffle_i32x4() {
52058        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52059        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
52060        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
52061        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
52062        assert_eq_m256i(r, e);
52063    }
52064
52065    #[simd_test(enable = "avx512f,avx512vl")]
52066    unsafe fn test_mm256_mask_shuffle_i32x4() {
52067        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52068        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
52069        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
52070        assert_eq_m256i(r, a);
52071        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
52072        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
52073        assert_eq_m256i(r, e);
52074    }
52075
52076    #[simd_test(enable = "avx512f,avx512vl")]
52077    unsafe fn test_mm256_maskz_shuffle_i32x4() {
52078        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
52079        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
52080        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
52081        assert_eq_m256i(r, _mm256_setzero_si256());
52082        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
52083        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
52084        assert_eq_m256i(r, e);
52085    }
52086
52087    #[simd_test(enable = "avx512f")]
52088    unsafe fn test_mm512_shuffle_f32x4() {
52089        let a = _mm512_setr_ps(
52090            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52091        );
52092        let b = _mm512_setr_ps(
52093            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52094        );
52095        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
52096        let e = _mm512_setr_ps(
52097            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
52098        );
52099        assert_eq_m512(r, e);
52100    }
52101
52102    #[simd_test(enable = "avx512f")]
52103    unsafe fn test_mm512_mask_shuffle_f32x4() {
52104        let a = _mm512_setr_ps(
52105            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52106        );
52107        let b = _mm512_setr_ps(
52108            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52109        );
52110        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
52111        assert_eq_m512(r, a);
52112        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
52113        let e = _mm512_setr_ps(
52114            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
52115        );
52116        assert_eq_m512(r, e);
52117    }
52118
52119    #[simd_test(enable = "avx512f")]
52120    unsafe fn test_mm512_maskz_shuffle_f32x4() {
52121        let a = _mm512_setr_ps(
52122            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
52123        );
52124        let b = _mm512_setr_ps(
52125            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
52126        );
52127        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
52128        assert_eq_m512(r, _mm512_setzero_ps());
52129        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
52130        let e = _mm512_setr_ps(
52131            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
52132        );
52133        assert_eq_m512(r, e);
52134    }
52135
52136    #[simd_test(enable = "avx512f,avx512vl")]
52137    unsafe fn test_mm256_shuffle_f32x4() {
52138        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52139        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52140        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
52141        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
52142        assert_eq_m256(r, e);
52143    }
52144
52145    #[simd_test(enable = "avx512f,avx512vl")]
52146    unsafe fn test_mm256_mask_shuffle_f32x4() {
52147        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52148        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52149        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
52150        assert_eq_m256(r, a);
52151        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
52152        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
52153        assert_eq_m256(r, e);
52154    }
52155
52156    #[simd_test(enable = "avx512f,avx512vl")]
52157    unsafe fn test_mm256_maskz_shuffle_f32x4() {
52158        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
52159        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
52160        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
52161        assert_eq_m256(r, _mm256_setzero_ps());
52162        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
52163        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
52164        assert_eq_m256(r, e);
52165    }
52166
52167    #[simd_test(enable = "avx512f")]
52168    unsafe fn test_mm512_extractf32x4_ps() {
52169        let a = _mm512_setr_ps(
52170            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52171        );
52172        let r = _mm512_extractf32x4_ps::<1>(a);
52173        let e = _mm_setr_ps(5., 6., 7., 8.);
52174        assert_eq_m128(r, e);
52175    }
52176
52177    #[simd_test(enable = "avx512f")]
52178    unsafe fn test_mm512_mask_extractf32x4_ps() {
52179        let a = _mm512_setr_ps(
52180            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52181        );
52182        let src = _mm_set1_ps(100.);
52183        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
52184        assert_eq_m128(r, src);
52185        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
52186        let e = _mm_setr_ps(5., 6., 7., 8.);
52187        assert_eq_m128(r, e);
52188    }
52189
52190    #[simd_test(enable = "avx512f")]
52191    unsafe fn test_mm512_maskz_extractf32x4_ps() {
52192        let a = _mm512_setr_ps(
52193            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52194        );
52195        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
52196        assert_eq_m128(r, _mm_setzero_ps());
52197        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
52198        let e = _mm_setr_ps(5., 0., 0., 0.);
52199        assert_eq_m128(r, e);
52200    }
52201
52202    #[simd_test(enable = "avx512f,avx512vl")]
52203    unsafe fn test_mm256_extractf32x4_ps() {
52204        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52205        let r = _mm256_extractf32x4_ps::<1>(a);
52206        let e = _mm_set_ps(1., 2., 3., 4.);
52207        assert_eq_m128(r, e);
52208    }
52209
52210    #[simd_test(enable = "avx512f,avx512vl")]
52211    unsafe fn test_mm256_mask_extractf32x4_ps() {
52212        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52213        let src = _mm_set1_ps(100.);
52214        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
52215        assert_eq_m128(r, src);
52216        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
52217        let e = _mm_set_ps(1., 2., 3., 4.);
52218        assert_eq_m128(r, e);
52219    }
52220
52221    #[simd_test(enable = "avx512f,avx512vl")]
52222    unsafe fn test_mm256_maskz_extractf32x4_ps() {
52223        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52224        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
52225        assert_eq_m128(r, _mm_setzero_ps());
52226        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
52227        let e = _mm_set_ps(1., 2., 3., 4.);
52228        assert_eq_m128(r, e);
52229    }
52230
52231    #[simd_test(enable = "avx512f")]
52232    unsafe fn test_mm512_extracti32x4_epi32() {
52233        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52234        let r = _mm512_extracti32x4_epi32::<1>(a);
52235        let e = _mm_setr_epi32(5, 6, 7, 8);
52236        assert_eq_m128i(r, e);
52237    }
52238
52239    #[simd_test(enable = "avx512f")]
52240    unsafe fn test_mm512_mask_extracti32x4_epi32() {
52241        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52242        let src = _mm_set1_epi32(100);
52243        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
52244        assert_eq_m128i(r, src);
52245        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
52246        let e = _mm_setr_epi32(5, 6, 7, 8);
52247        assert_eq_m128i(r, e);
52248    }
52249
52250    #[simd_test(enable = "avx512f,avx512vl")]
52251    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
52252        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52253        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
52254        assert_eq_m128i(r, _mm_setzero_si128());
52255        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
52256        let e = _mm_setr_epi32(5, 0, 0, 0);
52257        assert_eq_m128i(r, e);
52258    }
52259
52260    #[simd_test(enable = "avx512f,avx512vl")]
52261    unsafe fn test_mm256_extracti32x4_epi32() {
52262        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52263        let r = _mm256_extracti32x4_epi32::<1>(a);
52264        let e = _mm_set_epi32(1, 2, 3, 4);
52265        assert_eq_m128i(r, e);
52266    }
52267
52268    #[simd_test(enable = "avx512f,avx512vl")]
52269    unsafe fn test_mm256_mask_extracti32x4_epi32() {
52270        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52271        let src = _mm_set1_epi32(100);
52272        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
52273        assert_eq_m128i(r, src);
52274        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
52275        let e = _mm_set_epi32(1, 2, 3, 4);
52276        assert_eq_m128i(r, e);
52277    }
52278
52279    #[simd_test(enable = "avx512f,avx512vl")]
52280    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
52281        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52282        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
52283        assert_eq_m128i(r, _mm_setzero_si128());
52284        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
52285        let e = _mm_set_epi32(1, 2, 3, 4);
52286        assert_eq_m128i(r, e);
52287    }
52288
52289    #[simd_test(enable = "avx512f")]
52290    unsafe fn test_mm512_moveldup_ps() {
52291        let a = _mm512_setr_ps(
52292            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52293        );
52294        let r = _mm512_moveldup_ps(a);
52295        let e = _mm512_setr_ps(
52296            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
52297        );
52298        assert_eq_m512(r, e);
52299    }
52300
52301    #[simd_test(enable = "avx512f")]
52302    unsafe fn test_mm512_mask_moveldup_ps() {
52303        let a = _mm512_setr_ps(
52304            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52305        );
52306        let r = _mm512_mask_moveldup_ps(a, 0, a);
52307        assert_eq_m512(r, a);
52308        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
52309        let e = _mm512_setr_ps(
52310            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
52311        );
52312        assert_eq_m512(r, e);
52313    }
52314
52315    #[simd_test(enable = "avx512f")]
52316    unsafe fn test_mm512_maskz_moveldup_ps() {
52317        let a = _mm512_setr_ps(
52318            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52319        );
52320        let r = _mm512_maskz_moveldup_ps(0, a);
52321        assert_eq_m512(r, _mm512_setzero_ps());
52322        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
52323        let e = _mm512_setr_ps(
52324            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
52325        );
52326        assert_eq_m512(r, e);
52327    }
52328
52329    #[simd_test(enable = "avx512f,avx512vl")]
52330    unsafe fn test_mm256_mask_moveldup_ps() {
52331        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52332        let r = _mm256_mask_moveldup_ps(a, 0, a);
52333        assert_eq_m256(r, a);
52334        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
52335        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
52336        assert_eq_m256(r, e);
52337    }
52338
52339    #[simd_test(enable = "avx512f,avx512vl")]
52340    unsafe fn test_mm256_maskz_moveldup_ps() {
52341        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52342        let r = _mm256_maskz_moveldup_ps(0, a);
52343        assert_eq_m256(r, _mm256_setzero_ps());
52344        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
52345        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
52346        assert_eq_m256(r, e);
52347    }
52348
52349    #[simd_test(enable = "avx512f,avx512vl")]
52350    unsafe fn test_mm_mask_moveldup_ps() {
52351        let a = _mm_set_ps(1., 2., 3., 4.);
52352        let r = _mm_mask_moveldup_ps(a, 0, a);
52353        assert_eq_m128(r, a);
52354        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
52355        let e = _mm_set_ps(2., 2., 4., 4.);
52356        assert_eq_m128(r, e);
52357    }
52358
52359    #[simd_test(enable = "avx512f,avx512vl")]
52360    unsafe fn test_mm_maskz_moveldup_ps() {
52361        let a = _mm_set_ps(1., 2., 3., 4.);
52362        let r = _mm_maskz_moveldup_ps(0, a);
52363        assert_eq_m128(r, _mm_setzero_ps());
52364        let r = _mm_maskz_moveldup_ps(0b00001111, a);
52365        let e = _mm_set_ps(2., 2., 4., 4.);
52366        assert_eq_m128(r, e);
52367    }
52368
52369    #[simd_test(enable = "avx512f")]
52370    unsafe fn test_mm512_movehdup_ps() {
52371        let a = _mm512_setr_ps(
52372            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52373        );
52374        let r = _mm512_movehdup_ps(a);
52375        let e = _mm512_setr_ps(
52376            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
52377        );
52378        assert_eq_m512(r, e);
52379    }
52380
52381    #[simd_test(enable = "avx512f")]
52382    unsafe fn test_mm512_mask_movehdup_ps() {
52383        let a = _mm512_setr_ps(
52384            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52385        );
52386        let r = _mm512_mask_movehdup_ps(a, 0, a);
52387        assert_eq_m512(r, a);
52388        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
52389        let e = _mm512_setr_ps(
52390            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
52391        );
52392        assert_eq_m512(r, e);
52393    }
52394
52395    #[simd_test(enable = "avx512f")]
52396    unsafe fn test_mm512_maskz_movehdup_ps() {
52397        let a = _mm512_setr_ps(
52398            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52399        );
52400        let r = _mm512_maskz_movehdup_ps(0, a);
52401        assert_eq_m512(r, _mm512_setzero_ps());
52402        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
52403        let e = _mm512_setr_ps(
52404            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
52405        );
52406        assert_eq_m512(r, e);
52407    }
52408
52409    #[simd_test(enable = "avx512f,avx512vl")]
52410    unsafe fn test_mm256_mask_movehdup_ps() {
52411        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52412        let r = _mm256_mask_movehdup_ps(a, 0, a);
52413        assert_eq_m256(r, a);
52414        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
52415        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
52416        assert_eq_m256(r, e);
52417    }
52418
52419    #[simd_test(enable = "avx512f,avx512vl")]
52420    unsafe fn test_mm256_maskz_movehdup_ps() {
52421        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52422        let r = _mm256_maskz_movehdup_ps(0, a);
52423        assert_eq_m256(r, _mm256_setzero_ps());
52424        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
52425        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
52426        assert_eq_m256(r, e);
52427    }
52428
52429    #[simd_test(enable = "avx512f,avx512vl")]
52430    unsafe fn test_mm_mask_movehdup_ps() {
52431        let a = _mm_set_ps(1., 2., 3., 4.);
52432        let r = _mm_mask_movehdup_ps(a, 0, a);
52433        assert_eq_m128(r, a);
52434        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
52435        let e = _mm_set_ps(1., 1., 3., 3.);
52436        assert_eq_m128(r, e);
52437    }
52438
52439    #[simd_test(enable = "avx512f,avx512vl")]
52440    unsafe fn test_mm_maskz_movehdup_ps() {
52441        let a = _mm_set_ps(1., 2., 3., 4.);
52442        let r = _mm_maskz_movehdup_ps(0, a);
52443        assert_eq_m128(r, _mm_setzero_ps());
52444        let r = _mm_maskz_movehdup_ps(0b00001111, a);
52445        let e = _mm_set_ps(1., 1., 3., 3.);
52446        assert_eq_m128(r, e);
52447    }
52448
52449    #[simd_test(enable = "avx512f")]
52450    unsafe fn test_mm512_inserti32x4() {
52451        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52452        let b = _mm_setr_epi32(17, 18, 19, 20);
52453        let r = _mm512_inserti32x4::<0>(a, b);
52454        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52455        assert_eq_m512i(r, e);
52456    }
52457
52458    #[simd_test(enable = "avx512f")]
52459    unsafe fn test_mm512_mask_inserti32x4() {
52460        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52461        let b = _mm_setr_epi32(17, 18, 19, 20);
52462        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
52463        assert_eq_m512i(r, a);
52464        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
52465        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52466        assert_eq_m512i(r, e);
52467    }
52468
52469    #[simd_test(enable = "avx512f")]
52470    unsafe fn test_mm512_maskz_inserti32x4() {
52471        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52472        let b = _mm_setr_epi32(17, 18, 19, 20);
52473        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
52474        assert_eq_m512i(r, _mm512_setzero_si512());
52475        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
52476        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
52477        assert_eq_m512i(r, e);
52478    }
52479
52480    #[simd_test(enable = "avx512f,avx512vl")]
52481    unsafe fn test_mm256_inserti32x4() {
52482        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52483        let b = _mm_set_epi32(17, 18, 19, 20);
52484        let r = _mm256_inserti32x4::<1>(a, b);
52485        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
52486        assert_eq_m256i(r, e);
52487    }
52488
52489    #[simd_test(enable = "avx512f,avx512vl")]
52490    unsafe fn test_mm256_mask_inserti32x4() {
52491        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52492        let b = _mm_set_epi32(17, 18, 19, 20);
52493        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
52494        assert_eq_m256i(r, a);
52495        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
52496        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
52497        assert_eq_m256i(r, e);
52498    }
52499
52500    #[simd_test(enable = "avx512f,avx512vl")]
52501    unsafe fn test_mm256_maskz_inserti32x4() {
52502        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
52503        let b = _mm_set_epi32(17, 18, 19, 20);
52504        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
52505        assert_eq_m256i(r, _mm256_setzero_si256());
52506        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
52507        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
52508        assert_eq_m256i(r, e);
52509    }
52510
52511    #[simd_test(enable = "avx512f")]
52512    unsafe fn test_mm512_insertf32x4() {
52513        let a = _mm512_setr_ps(
52514            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52515        );
52516        let b = _mm_setr_ps(17., 18., 19., 20.);
52517        let r = _mm512_insertf32x4::<0>(a, b);
52518        let e = _mm512_setr_ps(
52519            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52520        );
52521        assert_eq_m512(r, e);
52522    }
52523
52524    #[simd_test(enable = "avx512f")]
52525    unsafe fn test_mm512_mask_insertf32x4() {
52526        let a = _mm512_setr_ps(
52527            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52528        );
52529        let b = _mm_setr_ps(17., 18., 19., 20.);
52530        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
52531        assert_eq_m512(r, a);
52532        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
52533        let e = _mm512_setr_ps(
52534            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52535        );
52536        assert_eq_m512(r, e);
52537    }
52538
52539    #[simd_test(enable = "avx512f")]
52540    unsafe fn test_mm512_maskz_insertf32x4() {
52541        let a = _mm512_setr_ps(
52542            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
52543        );
52544        let b = _mm_setr_ps(17., 18., 19., 20.);
52545        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
52546        assert_eq_m512(r, _mm512_setzero_ps());
52547        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
52548        let e = _mm512_setr_ps(
52549            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
52550        );
52551        assert_eq_m512(r, e);
52552    }
52553
52554    #[simd_test(enable = "avx512f,avx512vl")]
52555    unsafe fn test_mm256_insertf32x4() {
52556        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52557        let b = _mm_set_ps(17., 18., 19., 20.);
52558        let r = _mm256_insertf32x4::<1>(a, b);
52559        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
52560        assert_eq_m256(r, e);
52561    }
52562
52563    #[simd_test(enable = "avx512f,avx512vl")]
52564    unsafe fn test_mm256_mask_insertf32x4() {
52565        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52566        let b = _mm_set_ps(17., 18., 19., 20.);
52567        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
52568        assert_eq_m256(r, a);
52569        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
52570        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
52571        assert_eq_m256(r, e);
52572    }
52573
52574    #[simd_test(enable = "avx512f,avx512vl")]
52575    unsafe fn test_mm256_maskz_insertf32x4() {
52576        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
52577        let b = _mm_set_ps(17., 18., 19., 20.);
52578        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
52579        assert_eq_m256(r, _mm256_setzero_ps());
52580        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
52581        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
52582        assert_eq_m256(r, e);
52583    }
52584
52585    #[simd_test(enable = "avx512f")]
52586    unsafe fn test_mm512_castps128_ps512() {
52587        let a = _mm_setr_ps(17., 18., 19., 20.);
52588        let r = _mm512_castps128_ps512(a);
52589        assert_eq_m128(_mm512_castps512_ps128(r), a);
52590    }
52591
52592    #[simd_test(enable = "avx512f")]
52593    unsafe fn test_mm512_castps256_ps512() {
52594        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
52595        let r = _mm512_castps256_ps512(a);
52596        assert_eq_m256(_mm512_castps512_ps256(r), a);
52597    }
52598
52599    #[simd_test(enable = "avx512f")]
52600    unsafe fn test_mm512_zextps128_ps512() {
52601        let a = _mm_setr_ps(17., 18., 19., 20.);
52602        let r = _mm512_zextps128_ps512(a);
52603        let e = _mm512_setr_ps(
52604            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
52605        );
52606        assert_eq_m512(r, e);
52607    }
52608
52609    #[simd_test(enable = "avx512f")]
52610    unsafe fn test_mm512_zextps256_ps512() {
52611        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
52612        let r = _mm512_zextps256_ps512(a);
52613        let e = _mm512_setr_ps(
52614            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
52615        );
52616        assert_eq_m512(r, e);
52617    }
52618
52619    #[simd_test(enable = "avx512f")]
52620    unsafe fn test_mm512_castps512_ps128() {
52621        let a = _mm512_setr_ps(
52622            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
52623        );
52624        let r = _mm512_castps512_ps128(a);
52625        let e = _mm_setr_ps(17., 18., 19., 20.);
52626        assert_eq_m128(r, e);
52627    }
52628
52629    #[simd_test(enable = "avx512f")]
52630    unsafe fn test_mm512_castps512_ps256() {
52631        let a = _mm512_setr_ps(
52632            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
52633        );
52634        let r = _mm512_castps512_ps256(a);
52635        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
52636        assert_eq_m256(r, e);
52637    }
52638
52639    #[simd_test(enable = "avx512f")]
52640    unsafe fn test_mm512_castps_pd() {
52641        let a = _mm512_set1_ps(1.);
52642        let r = _mm512_castps_pd(a);
52643        let e = _mm512_set1_pd(0.007812501848093234);
52644        assert_eq_m512d(r, e);
52645    }
52646
52647    #[simd_test(enable = "avx512f")]
52648    unsafe fn test_mm512_castps_si512() {
52649        let a = _mm512_set1_ps(1.);
52650        let r = _mm512_castps_si512(a);
52651        let e = _mm512_set1_epi32(1065353216);
52652        assert_eq_m512i(r, e);
52653    }
52654
52655    #[simd_test(enable = "avx512f")]
52656    unsafe fn test_mm512_broadcastd_epi32() {
52657        let a = _mm_set_epi32(17, 18, 19, 20);
52658        let r = _mm512_broadcastd_epi32(a);
52659        let e = _mm512_set1_epi32(20);
52660        assert_eq_m512i(r, e);
52661    }
52662
52663    #[simd_test(enable = "avx512f")]
52664    unsafe fn test_mm512_mask_broadcastd_epi32() {
52665        let src = _mm512_set1_epi32(20);
52666        let a = _mm_set_epi32(17, 18, 19, 20);
52667        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
52668        assert_eq_m512i(r, src);
52669        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
52670        let e = _mm512_set1_epi32(20);
52671        assert_eq_m512i(r, e);
52672    }
52673
52674    #[simd_test(enable = "avx512f")]
52675    unsafe fn test_mm512_maskz_broadcastd_epi32() {
52676        let a = _mm_set_epi32(17, 18, 19, 20);
52677        let r = _mm512_maskz_broadcastd_epi32(0, a);
52678        assert_eq_m512i(r, _mm512_setzero_si512());
52679        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
52680        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
52681        assert_eq_m512i(r, e);
52682    }
52683
52684    #[simd_test(enable = "avx512f,avx512vl")]
52685    unsafe fn test_mm256_mask_broadcastd_epi32() {
52686        let src = _mm256_set1_epi32(20);
52687        let a = _mm_set_epi32(17, 18, 19, 20);
52688        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
52689        assert_eq_m256i(r, src);
52690        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
52691        let e = _mm256_set1_epi32(20);
52692        assert_eq_m256i(r, e);
52693    }
52694
52695    #[simd_test(enable = "avx512f,avx512vl")]
52696    unsafe fn test_mm256_maskz_broadcastd_epi32() {
52697        let a = _mm_set_epi32(17, 18, 19, 20);
52698        let r = _mm256_maskz_broadcastd_epi32(0, a);
52699        assert_eq_m256i(r, _mm256_setzero_si256());
52700        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
52701        let e = _mm256_set1_epi32(20);
52702        assert_eq_m256i(r, e);
52703    }
52704
52705    #[simd_test(enable = "avx512f,avx512vl")]
52706    unsafe fn test_mm_mask_broadcastd_epi32() {
52707        let src = _mm_set1_epi32(20);
52708        let a = _mm_set_epi32(17, 18, 19, 20);
52709        let r = _mm_mask_broadcastd_epi32(src, 0, a);
52710        assert_eq_m128i(r, src);
52711        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
52712        let e = _mm_set1_epi32(20);
52713        assert_eq_m128i(r, e);
52714    }
52715
52716    #[simd_test(enable = "avx512f,avx512vl")]
52717    unsafe fn test_mm_maskz_broadcastd_epi32() {
52718        let a = _mm_set_epi32(17, 18, 19, 20);
52719        let r = _mm_maskz_broadcastd_epi32(0, a);
52720        assert_eq_m128i(r, _mm_setzero_si128());
52721        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
52722        let e = _mm_set1_epi32(20);
52723        assert_eq_m128i(r, e);
52724    }
52725
52726    #[simd_test(enable = "avx512f")]
52727    unsafe fn test_mm512_broadcastss_ps() {
52728        let a = _mm_set_ps(17., 18., 19., 20.);
52729        let r = _mm512_broadcastss_ps(a);
52730        let e = _mm512_set1_ps(20.);
52731        assert_eq_m512(r, e);
52732    }
52733
52734    #[simd_test(enable = "avx512f")]
52735    unsafe fn test_mm512_mask_broadcastss_ps() {
52736        let src = _mm512_set1_ps(20.);
52737        let a = _mm_set_ps(17., 18., 19., 20.);
52738        let r = _mm512_mask_broadcastss_ps(src, 0, a);
52739        assert_eq_m512(r, src);
52740        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
52741        let e = _mm512_set1_ps(20.);
52742        assert_eq_m512(r, e);
52743    }
52744
52745    #[simd_test(enable = "avx512f")]
52746    unsafe fn test_mm512_maskz_broadcastss_ps() {
52747        let a = _mm_set_ps(17., 18., 19., 20.);
52748        let r = _mm512_maskz_broadcastss_ps(0, a);
52749        assert_eq_m512(r, _mm512_setzero_ps());
52750        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
52751        let e = _mm512_setr_ps(
52752            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
52753        );
52754        assert_eq_m512(r, e);
52755    }
52756
52757    #[simd_test(enable = "avx512f,avx512vl")]
52758    unsafe fn test_mm256_mask_broadcastss_ps() {
52759        let src = _mm256_set1_ps(20.);
52760        let a = _mm_set_ps(17., 18., 19., 20.);
52761        let r = _mm256_mask_broadcastss_ps(src, 0, a);
52762        assert_eq_m256(r, src);
52763        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
52764        let e = _mm256_set1_ps(20.);
52765        assert_eq_m256(r, e);
52766    }
52767
52768    #[simd_test(enable = "avx512f,avx512vl")]
52769    unsafe fn test_mm256_maskz_broadcastss_ps() {
52770        let a = _mm_set_ps(17., 18., 19., 20.);
52771        let r = _mm256_maskz_broadcastss_ps(0, a);
52772        assert_eq_m256(r, _mm256_setzero_ps());
52773        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
52774        let e = _mm256_set1_ps(20.);
52775        assert_eq_m256(r, e);
52776    }
52777
52778    #[simd_test(enable = "avx512f,avx512vl")]
52779    unsafe fn test_mm_mask_broadcastss_ps() {
52780        let src = _mm_set1_ps(20.);
52781        let a = _mm_set_ps(17., 18., 19., 20.);
52782        let r = _mm_mask_broadcastss_ps(src, 0, a);
52783        assert_eq_m128(r, src);
52784        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
52785        let e = _mm_set1_ps(20.);
52786        assert_eq_m128(r, e);
52787    }
52788
52789    #[simd_test(enable = "avx512f,avx512vl")]
52790    unsafe fn test_mm_maskz_broadcastss_ps() {
52791        let a = _mm_set_ps(17., 18., 19., 20.);
52792        let r = _mm_maskz_broadcastss_ps(0, a);
52793        assert_eq_m128(r, _mm_setzero_ps());
52794        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
52795        let e = _mm_set1_ps(20.);
52796        assert_eq_m128(r, e);
52797    }
52798
52799    #[simd_test(enable = "avx512f")]
52800    unsafe fn test_mm512_broadcast_i32x4() {
52801        let a = _mm_set_epi32(17, 18, 19, 20);
52802        let r = _mm512_broadcast_i32x4(a);
52803        let e = _mm512_set_epi32(
52804            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
52805        );
52806        assert_eq_m512i(r, e);
52807    }
52808
52809    #[simd_test(enable = "avx512f")]
52810    unsafe fn test_mm512_mask_broadcast_i32x4() {
52811        let src = _mm512_set1_epi32(20);
52812        let a = _mm_set_epi32(17, 18, 19, 20);
52813        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
52814        assert_eq_m512i(r, src);
52815        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
52816        let e = _mm512_set_epi32(
52817            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
52818        );
52819        assert_eq_m512i(r, e);
52820    }
52821
52822    #[simd_test(enable = "avx512f")]
52823    unsafe fn test_mm512_maskz_broadcast_i32x4() {
52824        let a = _mm_set_epi32(17, 18, 19, 20);
52825        let r = _mm512_maskz_broadcast_i32x4(0, a);
52826        assert_eq_m512i(r, _mm512_setzero_si512());
52827        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
52828        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
52829        assert_eq_m512i(r, e);
52830    }
52831
52832    #[simd_test(enable = "avx512f,avx512vl")]
52833    unsafe fn test_mm256_broadcast_i32x4() {
52834        let a = _mm_set_epi32(17, 18, 19, 20);
52835        let r = _mm256_broadcast_i32x4(a);
52836        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
52837        assert_eq_m256i(r, e);
52838    }
52839
52840    #[simd_test(enable = "avx512f,avx512vl")]
52841    unsafe fn test_mm256_mask_broadcast_i32x4() {
52842        let src = _mm256_set1_epi32(20);
52843        let a = _mm_set_epi32(17, 18, 19, 20);
52844        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
52845        assert_eq_m256i(r, src);
52846        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
52847        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
52848        assert_eq_m256i(r, e);
52849    }
52850
52851    #[simd_test(enable = "avx512f,avx512vl")]
52852    unsafe fn test_mm256_maskz_broadcast_i32x4() {
52853        let a = _mm_set_epi32(17, 18, 19, 20);
52854        let r = _mm256_maskz_broadcast_i32x4(0, a);
52855        assert_eq_m256i(r, _mm256_setzero_si256());
52856        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
52857        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
52858        assert_eq_m256i(r, e);
52859    }
52860
52861    #[simd_test(enable = "avx512f")]
52862    unsafe fn test_mm512_broadcast_f32x4() {
52863        let a = _mm_set_ps(17., 18., 19., 20.);
52864        let r = _mm512_broadcast_f32x4(a);
52865        let e = _mm512_set_ps(
52866            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
52867        );
52868        assert_eq_m512(r, e);
52869    }
52870
52871    #[simd_test(enable = "avx512f")]
52872    unsafe fn test_mm512_mask_broadcast_f32x4() {
52873        let src = _mm512_set1_ps(20.);
52874        let a = _mm_set_ps(17., 18., 19., 20.);
52875        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
52876        assert_eq_m512(r, src);
52877        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
52878        let e = _mm512_set_ps(
52879            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
52880        );
52881        assert_eq_m512(r, e);
52882    }
52883
52884    #[simd_test(enable = "avx512f")]
52885    unsafe fn test_mm512_maskz_broadcast_f32x4() {
52886        let a = _mm_set_ps(17., 18., 19., 20.);
52887        let r = _mm512_maskz_broadcast_f32x4(0, a);
52888        assert_eq_m512(r, _mm512_setzero_ps());
52889        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
52890        let e = _mm512_set_ps(
52891            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
52892        );
52893        assert_eq_m512(r, e);
52894    }
52895
52896    #[simd_test(enable = "avx512f,avx512vl")]
52897    unsafe fn test_mm256_broadcast_f32x4() {
52898        let a = _mm_set_ps(17., 18., 19., 20.);
52899        let r = _mm256_broadcast_f32x4(a);
52900        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
52901        assert_eq_m256(r, e);
52902    }
52903
52904    #[simd_test(enable = "avx512f,avx512vl")]
52905    unsafe fn test_mm256_mask_broadcast_f32x4() {
52906        let src = _mm256_set1_ps(20.);
52907        let a = _mm_set_ps(17., 18., 19., 20.);
52908        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
52909        assert_eq_m256(r, src);
52910        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
52911        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
52912        assert_eq_m256(r, e);
52913    }
52914
52915    #[simd_test(enable = "avx512f,avx512vl")]
52916    unsafe fn test_mm256_maskz_broadcast_f32x4() {
52917        let a = _mm_set_ps(17., 18., 19., 20.);
52918        let r = _mm256_maskz_broadcast_f32x4(0, a);
52919        assert_eq_m256(r, _mm256_setzero_ps());
52920        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
52921        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
52922        assert_eq_m256(r, e);
52923    }
52924
52925    #[simd_test(enable = "avx512f")]
52926    unsafe fn test_mm512_mask_blend_epi32() {
52927        let a = _mm512_set1_epi32(1);
52928        let b = _mm512_set1_epi32(2);
52929        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
52930        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
52931        assert_eq_m512i(r, e);
52932    }
52933
52934    #[simd_test(enable = "avx512f,avx512vl")]
52935    unsafe fn test_mm256_mask_blend_epi32() {
52936        let a = _mm256_set1_epi32(1);
52937        let b = _mm256_set1_epi32(2);
52938        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
52939        let e = _mm256_set1_epi32(2);
52940        assert_eq_m256i(r, e);
52941    }
52942
52943    #[simd_test(enable = "avx512f,avx512vl")]
52944    unsafe fn test_mm_mask_blend_epi32() {
52945        let a = _mm_set1_epi32(1);
52946        let b = _mm_set1_epi32(2);
52947        let r = _mm_mask_blend_epi32(0b00001111, a, b);
52948        let e = _mm_set1_epi32(2);
52949        assert_eq_m128i(r, e);
52950    }
52951
52952    #[simd_test(enable = "avx512f")]
52953    unsafe fn test_mm512_mask_blend_ps() {
52954        let a = _mm512_set1_ps(1.);
52955        let b = _mm512_set1_ps(2.);
52956        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
52957        let e = _mm512_set_ps(
52958            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
52959        );
52960        assert_eq_m512(r, e);
52961    }
52962
52963    #[simd_test(enable = "avx512f,avx512vl")]
52964    unsafe fn test_mm256_mask_blend_ps() {
52965        let a = _mm256_set1_ps(1.);
52966        let b = _mm256_set1_ps(2.);
52967        let r = _mm256_mask_blend_ps(0b11111111, a, b);
52968        let e = _mm256_set1_ps(2.);
52969        assert_eq_m256(r, e);
52970    }
52971
52972    #[simd_test(enable = "avx512f,avx512vl")]
52973    unsafe fn test_mm_mask_blend_ps() {
52974        let a = _mm_set1_ps(1.);
52975        let b = _mm_set1_ps(2.);
52976        let r = _mm_mask_blend_ps(0b00001111, a, b);
52977        let e = _mm_set1_ps(2.);
52978        assert_eq_m128(r, e);
52979    }
52980
52981    #[simd_test(enable = "avx512f")]
52982    unsafe fn test_mm512_unpackhi_epi32() {
52983        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52984        let b = _mm512_set_epi32(
52985            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
52986        );
52987        let r = _mm512_unpackhi_epi32(a, b);
52988        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
52989        assert_eq_m512i(r, e);
52990    }
52991
52992    #[simd_test(enable = "avx512f")]
52993    unsafe fn test_mm512_mask_unpackhi_epi32() {
52994        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52995        let b = _mm512_set_epi32(
52996            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
52997        );
52998        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
52999        assert_eq_m512i(r, a);
53000        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
53001        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
53002        assert_eq_m512i(r, e);
53003    }
53004
53005    #[simd_test(enable = "avx512f")]
53006    unsafe fn test_mm512_maskz_unpackhi_epi32() {
53007        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53008        let b = _mm512_set_epi32(
53009            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53010        );
53011        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
53012        assert_eq_m512i(r, _mm512_setzero_si512());
53013        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
53014        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
53015        assert_eq_m512i(r, e);
53016    }
53017
53018    #[simd_test(enable = "avx512f,avx512vl")]
53019    unsafe fn test_mm256_mask_unpackhi_epi32() {
53020        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53021        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53022        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
53023        assert_eq_m256i(r, a);
53024        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
53025        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
53026        assert_eq_m256i(r, e);
53027    }
53028
53029    #[simd_test(enable = "avx512f,avx512vl")]
53030    unsafe fn test_mm256_maskz_unpackhi_epi32() {
53031        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53032        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53033        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
53034        assert_eq_m256i(r, _mm256_setzero_si256());
53035        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
53036        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
53037        assert_eq_m256i(r, e);
53038    }
53039
53040    #[simd_test(enable = "avx512f,avx512vl")]
53041    unsafe fn test_mm_mask_unpackhi_epi32() {
53042        let a = _mm_set_epi32(1, 2, 3, 4);
53043        let b = _mm_set_epi32(17, 18, 19, 20);
53044        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
53045        assert_eq_m128i(r, a);
53046        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
53047        let e = _mm_set_epi32(17, 1, 18, 2);
53048        assert_eq_m128i(r, e);
53049    }
53050
53051    #[simd_test(enable = "avx512f,avx512vl")]
53052    unsafe fn test_mm_maskz_unpackhi_epi32() {
53053        let a = _mm_set_epi32(1, 2, 3, 4);
53054        let b = _mm_set_epi32(17, 18, 19, 20);
53055        let r = _mm_maskz_unpackhi_epi32(0, a, b);
53056        assert_eq_m128i(r, _mm_setzero_si128());
53057        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
53058        let e = _mm_set_epi32(17, 1, 18, 2);
53059        assert_eq_m128i(r, e);
53060    }
53061
53062    #[simd_test(enable = "avx512f")]
53063    unsafe fn test_mm512_unpackhi_ps() {
53064        let a = _mm512_set_ps(
53065            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53066        );
53067        let b = _mm512_set_ps(
53068            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53069        );
53070        let r = _mm512_unpackhi_ps(a, b);
53071        let e = _mm512_set_ps(
53072            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
53073        );
53074        assert_eq_m512(r, e);
53075    }
53076
53077    #[simd_test(enable = "avx512f")]
53078    unsafe fn test_mm512_mask_unpackhi_ps() {
53079        let a = _mm512_set_ps(
53080            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53081        );
53082        let b = _mm512_set_ps(
53083            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53084        );
53085        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
53086        assert_eq_m512(r, a);
53087        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
53088        let e = _mm512_set_ps(
53089            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
53090        );
53091        assert_eq_m512(r, e);
53092    }
53093
53094    #[simd_test(enable = "avx512f")]
53095    unsafe fn test_mm512_maskz_unpackhi_ps() {
53096        let a = _mm512_set_ps(
53097            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53098        );
53099        let b = _mm512_set_ps(
53100            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53101        );
53102        let r = _mm512_maskz_unpackhi_ps(0, a, b);
53103        assert_eq_m512(r, _mm512_setzero_ps());
53104        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
53105        let e = _mm512_set_ps(
53106            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
53107        );
53108        assert_eq_m512(r, e);
53109    }
53110
53111    #[simd_test(enable = "avx512f,avx512vl")]
53112    unsafe fn test_mm256_mask_unpackhi_ps() {
53113        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53114        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53115        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
53116        assert_eq_m256(r, a);
53117        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
53118        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
53119        assert_eq_m256(r, e);
53120    }
53121
53122    #[simd_test(enable = "avx512f,avx512vl")]
53123    unsafe fn test_mm256_maskz_unpackhi_ps() {
53124        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53125        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53126        let r = _mm256_maskz_unpackhi_ps(0, a, b);
53127        assert_eq_m256(r, _mm256_setzero_ps());
53128        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
53129        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
53130        assert_eq_m256(r, e);
53131    }
53132
53133    #[simd_test(enable = "avx512f,avx512vl")]
53134    unsafe fn test_mm_mask_unpackhi_ps() {
53135        let a = _mm_set_ps(1., 2., 3., 4.);
53136        let b = _mm_set_ps(17., 18., 19., 20.);
53137        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
53138        assert_eq_m128(r, a);
53139        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
53140        let e = _mm_set_ps(17., 1., 18., 2.);
53141        assert_eq_m128(r, e);
53142    }
53143
53144    #[simd_test(enable = "avx512f,avx512vl")]
53145    unsafe fn test_mm_maskz_unpackhi_ps() {
53146        let a = _mm_set_ps(1., 2., 3., 4.);
53147        let b = _mm_set_ps(17., 18., 19., 20.);
53148        let r = _mm_maskz_unpackhi_ps(0, a, b);
53149        assert_eq_m128(r, _mm_setzero_ps());
53150        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
53151        let e = _mm_set_ps(17., 1., 18., 2.);
53152        assert_eq_m128(r, e);
53153    }
53154
53155    #[simd_test(enable = "avx512f")]
53156    unsafe fn test_mm512_unpacklo_epi32() {
53157        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53158        let b = _mm512_set_epi32(
53159            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53160        );
53161        let r = _mm512_unpacklo_epi32(a, b);
53162        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
53163        assert_eq_m512i(r, e);
53164    }
53165
53166    #[simd_test(enable = "avx512f")]
53167    unsafe fn test_mm512_mask_unpacklo_epi32() {
53168        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53169        let b = _mm512_set_epi32(
53170            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53171        );
53172        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
53173        assert_eq_m512i(r, a);
53174        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
53175        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
53176        assert_eq_m512i(r, e);
53177    }
53178
53179    #[simd_test(enable = "avx512f")]
53180    unsafe fn test_mm512_maskz_unpacklo_epi32() {
53181        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
53182        let b = _mm512_set_epi32(
53183            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
53184        );
53185        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
53186        assert_eq_m512i(r, _mm512_setzero_si512());
53187        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
53188        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
53189        assert_eq_m512i(r, e);
53190    }
53191
53192    #[simd_test(enable = "avx512f,avx512vl")]
53193    unsafe fn test_mm256_mask_unpacklo_epi32() {
53194        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53195        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53196        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
53197        assert_eq_m256i(r, a);
53198        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
53199        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
53200        assert_eq_m256i(r, e);
53201    }
53202
53203    #[simd_test(enable = "avx512f,avx512vl")]
53204    unsafe fn test_mm256_maskz_unpacklo_epi32() {
53205        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53206        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
53207        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
53208        assert_eq_m256i(r, _mm256_setzero_si256());
53209        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
53210        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
53211        assert_eq_m256i(r, e);
53212    }
53213
53214    #[simd_test(enable = "avx512f,avx512vl")]
53215    unsafe fn test_mm_mask_unpacklo_epi32() {
53216        let a = _mm_set_epi32(1, 2, 3, 4);
53217        let b = _mm_set_epi32(17, 18, 19, 20);
53218        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
53219        assert_eq_m128i(r, a);
53220        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
53221        let e = _mm_set_epi32(19, 3, 20, 4);
53222        assert_eq_m128i(r, e);
53223    }
53224
53225    #[simd_test(enable = "avx512f,avx512vl")]
53226    unsafe fn test_mm_maskz_unpacklo_epi32() {
53227        let a = _mm_set_epi32(1, 2, 3, 4);
53228        let b = _mm_set_epi32(17, 18, 19, 20);
53229        let r = _mm_maskz_unpacklo_epi32(0, a, b);
53230        assert_eq_m128i(r, _mm_setzero_si128());
53231        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
53232        let e = _mm_set_epi32(19, 3, 20, 4);
53233        assert_eq_m128i(r, e);
53234    }
53235
53236    #[simd_test(enable = "avx512f")]
53237    unsafe fn test_mm512_unpacklo_ps() {
53238        let a = _mm512_set_ps(
53239            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53240        );
53241        let b = _mm512_set_ps(
53242            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53243        );
53244        let r = _mm512_unpacklo_ps(a, b);
53245        let e = _mm512_set_ps(
53246            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
53247        );
53248        assert_eq_m512(r, e);
53249    }
53250
53251    #[simd_test(enable = "avx512f")]
53252    unsafe fn test_mm512_mask_unpacklo_ps() {
53253        let a = _mm512_set_ps(
53254            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53255        );
53256        let b = _mm512_set_ps(
53257            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53258        );
53259        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
53260        assert_eq_m512(r, a);
53261        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
53262        let e = _mm512_set_ps(
53263            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
53264        );
53265        assert_eq_m512(r, e);
53266    }
53267
53268    #[simd_test(enable = "avx512f")]
53269    unsafe fn test_mm512_maskz_unpacklo_ps() {
53270        let a = _mm512_set_ps(
53271            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
53272        );
53273        let b = _mm512_set_ps(
53274            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
53275        );
53276        let r = _mm512_maskz_unpacklo_ps(0, a, b);
53277        assert_eq_m512(r, _mm512_setzero_ps());
53278        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
53279        let e = _mm512_set_ps(
53280            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
53281        );
53282        assert_eq_m512(r, e);
53283    }
53284
53285    #[simd_test(enable = "avx512f,avx512vl")]
53286    unsafe fn test_mm256_mask_unpacklo_ps() {
53287        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53288        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53289        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
53290        assert_eq_m256(r, a);
53291        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
53292        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
53293        assert_eq_m256(r, e);
53294    }
53295
53296    #[simd_test(enable = "avx512f,avx512vl")]
53297    unsafe fn test_mm256_maskz_unpacklo_ps() {
53298        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
53299        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
53300        let r = _mm256_maskz_unpacklo_ps(0, a, b);
53301        assert_eq_m256(r, _mm256_setzero_ps());
53302        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
53303        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
53304        assert_eq_m256(r, e);
53305    }
53306
53307    #[simd_test(enable = "avx512f,avx512vl")]
53308    unsafe fn test_mm_mask_unpacklo_ps() {
53309        let a = _mm_set_ps(1., 2., 3., 4.);
53310        let b = _mm_set_ps(17., 18., 19., 20.);
53311        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
53312        assert_eq_m128(r, a);
53313        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
53314        let e = _mm_set_ps(19., 3., 20., 4.);
53315        assert_eq_m128(r, e);
53316    }
53317
53318    #[simd_test(enable = "avx512f,avx512vl")]
53319    unsafe fn test_mm_maskz_unpacklo_ps() {
53320        let a = _mm_set_ps(1., 2., 3., 4.);
53321        let b = _mm_set_ps(17., 18., 19., 20.);
53322        let r = _mm_maskz_unpacklo_ps(0, a, b);
53323        assert_eq_m128(r, _mm_setzero_ps());
53324        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
53325        let e = _mm_set_ps(19., 3., 20., 4.);
53326        assert_eq_m128(r, e);
53327    }
53328
53329    #[simd_test(enable = "avx512f")]
53330    unsafe fn test_mm512_alignr_epi32() {
53331        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
53332        let b = _mm512_set_epi32(
53333            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
53334        );
53335        let r = _mm512_alignr_epi32::<0>(a, b);
53336        assert_eq_m512i(r, b);
53337        let r = _mm512_alignr_epi32::<16>(a, b);
53338        assert_eq_m512i(r, b);
53339        let r = _mm512_alignr_epi32::<1>(a, b);
53340        let e = _mm512_set_epi32(
53341            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
53342        );
53343        assert_eq_m512i(r, e);
53344    }
53345
53346    #[simd_test(enable = "avx512f")]
53347    unsafe fn test_mm512_mask_alignr_epi32() {
53348        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
53349        let b = _mm512_set_epi32(
53350            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
53351        );
53352        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
53353        assert_eq_m512i(r, a);
53354        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
53355        let e = _mm512_set_epi32(
53356            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
53357        );
53358        assert_eq_m512i(r, e);
53359    }
53360
53361    #[simd_test(enable = "avx512f")]
53362    unsafe fn test_mm512_maskz_alignr_epi32() {
53363        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
53364        let b = _mm512_set_epi32(
53365            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
53366        );
53367        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
53368        assert_eq_m512i(r, _mm512_setzero_si512());
53369        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
53370        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
53371        assert_eq_m512i(r, e);
53372    }
53373
53374    #[simd_test(enable = "avx512f,avx512vl")]
53375    unsafe fn test_mm256_alignr_epi32() {
53376        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
53377        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
53378        let r = _mm256_alignr_epi32::<0>(a, b);
53379        assert_eq_m256i(r, b);
53380        let r = _mm256_alignr_epi32::<1>(a, b);
53381        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
53382        assert_eq_m256i(r, e);
53383    }
53384
53385    #[simd_test(enable = "avx512f,avx512vl")]
53386    unsafe fn test_mm256_mask_alignr_epi32() {
53387        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
53388        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
53389        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
53390        assert_eq_m256i(r, a);
53391        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
53392        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
53393        assert_eq_m256i(r, e);
53394    }
53395
53396    #[simd_test(enable = "avx512f,avx512vl")]
53397    unsafe fn test_mm256_maskz_alignr_epi32() {
53398        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
53399        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
53400        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
53401        assert_eq_m256i(r, _mm256_setzero_si256());
53402        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
53403        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
53404        assert_eq_m256i(r, e);
53405    }
53406
53407    #[simd_test(enable = "avx512f,avx512vl")]
53408    unsafe fn test_mm_alignr_epi32() {
53409        let a = _mm_set_epi32(4, 3, 2, 1);
53410        let b = _mm_set_epi32(8, 7, 6, 5);
53411        let r = _mm_alignr_epi32::<0>(a, b);
53412        assert_eq_m128i(r, b);
53413        let r = _mm_alignr_epi32::<1>(a, b);
53414        let e = _mm_set_epi32(1, 8, 7, 6);
53415        assert_eq_m128i(r, e);
53416    }
53417
53418    #[simd_test(enable = "avx512f,avx512vl")]
53419    unsafe fn test_mm_mask_alignr_epi32() {
53420        let a = _mm_set_epi32(4, 3, 2, 1);
53421        let b = _mm_set_epi32(8, 7, 6, 5);
53422        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
53423        assert_eq_m128i(r, a);
53424        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
53425        let e = _mm_set_epi32(1, 8, 7, 6);
53426        assert_eq_m128i(r, e);
53427    }
53428
53429    #[simd_test(enable = "avx512f,avx512vl")]
53430    unsafe fn test_mm_maskz_alignr_epi32() {
53431        let a = _mm_set_epi32(4, 3, 2, 1);
53432        let b = _mm_set_epi32(8, 7, 6, 5);
53433        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
53434        assert_eq_m128i(r, _mm_setzero_si128());
53435        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
53436        let e = _mm_set_epi32(1, 8, 7, 6);
53437        assert_eq_m128i(r, e);
53438    }
53439
53440    #[simd_test(enable = "avx512f")]
53441    unsafe fn test_mm512_and_epi32() {
53442        #[rustfmt::skip]
53443        let a = _mm512_set_epi32(
53444            1 << 1 | 1 << 2, 0, 0, 0,
53445            0, 0, 0, 0,
53446            0, 0, 0, 0,
53447            0, 0, 0, 1 << 1 | 1 << 3,
53448        );
53449        #[rustfmt::skip]
53450        let b = _mm512_set_epi32(
53451            1 << 1, 0, 0, 0,
53452            0, 0, 0, 0,
53453            0, 0, 0, 0,
53454            0, 0, 0, 1 << 3 | 1 << 4,
53455        );
53456        let r = _mm512_and_epi32(a, b);
53457        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
53458        assert_eq_m512i(r, e);
53459    }
53460
53461    #[simd_test(enable = "avx512f")]
53462    unsafe fn test_mm512_mask_and_epi32() {
53463        #[rustfmt::skip]
53464        let a = _mm512_set_epi32(
53465            1 << 1 | 1 << 2, 0, 0, 0,
53466            0, 0, 0, 0,
53467            0, 0, 0, 0,
53468            0, 0, 0, 1 << 1 | 1 << 3,
53469        );
53470        #[rustfmt::skip]
53471        let b = _mm512_set_epi32(
53472            1 << 1, 0, 0, 0,
53473            0, 0, 0, 0,
53474            0, 0, 0, 0,
53475            0, 0, 0, 1 << 3 | 1 << 4,
53476        );
53477        let r = _mm512_mask_and_epi32(a, 0, a, b);
53478        assert_eq_m512i(r, a);
53479        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
53480        #[rustfmt::skip]
53481        let e = _mm512_set_epi32(
53482            1 << 1 | 1 << 2, 0, 0, 0,
53483            0, 0, 0, 0,
53484            0, 0, 0, 0,
53485            0, 0, 0, 1 << 3,
53486        );
53487        assert_eq_m512i(r, e);
53488    }
53489
53490    #[simd_test(enable = "avx512f")]
53491    unsafe fn test_mm512_maskz_and_epi32() {
53492        #[rustfmt::skip]
53493        let a = _mm512_set_epi32(
53494            1 << 1 | 1 << 2, 0, 0, 0,
53495            0, 0, 0, 0,
53496            0, 0, 0, 0,
53497            0, 0, 0, 1 << 1 | 1 << 3,
53498        );
53499        #[rustfmt::skip]
53500        let b = _mm512_set_epi32(
53501            1 << 1, 0, 0, 0,
53502            0, 0, 0, 0,
53503            0, 0, 0, 0,
53504            0, 0, 0, 1 << 3 | 1 << 4,
53505        );
53506        let r = _mm512_maskz_and_epi32(0, a, b);
53507        assert_eq_m512i(r, _mm512_setzero_si512());
53508        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
53509        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
53510        assert_eq_m512i(r, e);
53511    }
53512
53513    #[simd_test(enable = "avx512f,avx512vl")]
53514    unsafe fn test_mm256_mask_and_epi32() {
53515        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53516        let b = _mm256_set1_epi32(1 << 1);
53517        let r = _mm256_mask_and_epi32(a, 0, a, b);
53518        assert_eq_m256i(r, a);
53519        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
53520        let e = _mm256_set1_epi32(1 << 1);
53521        assert_eq_m256i(r, e);
53522    }
53523
53524    #[simd_test(enable = "avx512f,avx512vl")]
53525    unsafe fn test_mm256_maskz_and_epi32() {
53526        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53527        let b = _mm256_set1_epi32(1 << 1);
53528        let r = _mm256_maskz_and_epi32(0, a, b);
53529        assert_eq_m256i(r, _mm256_setzero_si256());
53530        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
53531        let e = _mm256_set1_epi32(1 << 1);
53532        assert_eq_m256i(r, e);
53533    }
53534
53535    #[simd_test(enable = "avx512f,avx512vl")]
53536    unsafe fn test_mm_mask_and_epi32() {
53537        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53538        let b = _mm_set1_epi32(1 << 1);
53539        let r = _mm_mask_and_epi32(a, 0, a, b);
53540        assert_eq_m128i(r, a);
53541        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
53542        let e = _mm_set1_epi32(1 << 1);
53543        assert_eq_m128i(r, e);
53544    }
53545
53546    #[simd_test(enable = "avx512f,avx512vl")]
53547    unsafe fn test_mm_maskz_and_epi32() {
53548        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53549        let b = _mm_set1_epi32(1 << 1);
53550        let r = _mm_maskz_and_epi32(0, a, b);
53551        assert_eq_m128i(r, _mm_setzero_si128());
53552        let r = _mm_maskz_and_epi32(0b00001111, a, b);
53553        let e = _mm_set1_epi32(1 << 1);
53554        assert_eq_m128i(r, e);
53555    }
53556
53557    #[simd_test(enable = "avx512f")]
53558    unsafe fn test_mm512_and_si512() {
53559        #[rustfmt::skip]
53560        let a = _mm512_set_epi32(
53561            1 << 1 | 1 << 2, 0, 0, 0,
53562            0, 0, 0, 0,
53563            0, 0, 0, 0,
53564            0, 0, 0, 1 << 1 | 1 << 3,
53565        );
53566        #[rustfmt::skip]
53567        let b = _mm512_set_epi32(
53568            1 << 1, 0, 0, 0,
53569            0, 0, 0, 0,
53570            0, 0, 0, 0,
53571            0, 0, 0, 1 << 3 | 1 << 4,
53572        );
53573        let r = _mm512_and_epi32(a, b);
53574        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
53575        assert_eq_m512i(r, e);
53576    }
53577
53578    #[simd_test(enable = "avx512f")]
53579    unsafe fn test_mm512_or_epi32() {
53580        #[rustfmt::skip]
53581        let a = _mm512_set_epi32(
53582            1 << 1 | 1 << 2, 0, 0, 0,
53583            0, 0, 0, 0,
53584            0, 0, 0, 0,
53585            0, 0, 0, 1 << 1 | 1 << 3,
53586        );
53587        #[rustfmt::skip]
53588        let b = _mm512_set_epi32(
53589            1 << 1, 0, 0, 0,
53590            0, 0, 0, 0,
53591            0, 0, 0, 0,
53592            0, 0, 0, 1 << 3 | 1 << 4,
53593        );
53594        let r = _mm512_or_epi32(a, b);
53595        #[rustfmt::skip]
53596        let e = _mm512_set_epi32(
53597            1 << 1 | 1 << 2, 0, 0, 0,
53598            0, 0, 0, 0,
53599            0, 0, 0, 0,
53600            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53601        );
53602        assert_eq_m512i(r, e);
53603    }
53604
53605    #[simd_test(enable = "avx512f")]
53606    unsafe fn test_mm512_mask_or_epi32() {
53607        #[rustfmt::skip]
53608        let a = _mm512_set_epi32(
53609            1 << 1 | 1 << 2, 0, 0, 0,
53610            0, 0, 0, 0,
53611            0, 0, 0, 0,
53612            0, 0, 0, 1 << 1 | 1 << 3,
53613        );
53614        #[rustfmt::skip]
53615        let b = _mm512_set_epi32(
53616            1 << 1, 0, 0, 0,
53617            0, 0, 0, 0,
53618            0, 0, 0, 0,
53619            0, 0, 0, 1 << 3 | 1 << 4,
53620        );
53621        let r = _mm512_mask_or_epi32(a, 0, a, b);
53622        assert_eq_m512i(r, a);
53623        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
53624        #[rustfmt::skip]
53625        let e = _mm512_set_epi32(
53626            1 << 1 | 1 << 2, 0, 0, 0,
53627            0, 0, 0, 0,
53628            0, 0, 0, 0,
53629            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53630        );
53631        assert_eq_m512i(r, e);
53632    }
53633
53634    #[simd_test(enable = "avx512f")]
53635    unsafe fn test_mm512_maskz_or_epi32() {
53636        #[rustfmt::skip]
53637        let a = _mm512_set_epi32(
53638            1 << 1 | 1 << 2, 0, 0, 0,
53639            0, 0, 0, 0,
53640            0, 0, 0, 0,
53641            0, 0, 0, 1 << 1 | 1 << 3,
53642        );
53643        #[rustfmt::skip]
53644        let b = _mm512_set_epi32(
53645            1 << 1, 0, 0, 0,
53646            0, 0, 0, 0,
53647            0, 0, 0, 0,
53648            0, 0, 0, 1 << 3 | 1 << 4,
53649        );
53650        let r = _mm512_maskz_or_epi32(0, a, b);
53651        assert_eq_m512i(r, _mm512_setzero_si512());
53652        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
53653        #[rustfmt::skip]
53654        let e = _mm512_set_epi32(
53655            0, 0, 0, 0,
53656            0, 0, 0, 0,
53657            0, 0, 0, 0,
53658            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53659        );
53660        assert_eq_m512i(r, e);
53661    }
53662
53663    #[simd_test(enable = "avx512f,avx512vl")]
53664    unsafe fn test_mm256_or_epi32() {
53665        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53666        let b = _mm256_set1_epi32(1 << 1);
53667        let r = _mm256_or_epi32(a, b);
53668        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
53669        assert_eq_m256i(r, e);
53670    }
53671
53672    #[simd_test(enable = "avx512f,avx512vl")]
53673    unsafe fn test_mm256_mask_or_epi32() {
53674        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53675        let b = _mm256_set1_epi32(1 << 1);
53676        let r = _mm256_mask_or_epi32(a, 0, a, b);
53677        assert_eq_m256i(r, a);
53678        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
53679        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
53680        assert_eq_m256i(r, e);
53681    }
53682
53683    #[simd_test(enable = "avx512f,avx512vl")]
53684    unsafe fn test_mm256_maskz_or_epi32() {
53685        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53686        let b = _mm256_set1_epi32(1 << 1);
53687        let r = _mm256_maskz_or_epi32(0, a, b);
53688        assert_eq_m256i(r, _mm256_setzero_si256());
53689        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
53690        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
53691        assert_eq_m256i(r, e);
53692    }
53693
53694    #[simd_test(enable = "avx512f,avx512vl")]
53695    unsafe fn test_mm_or_epi32() {
53696        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53697        let b = _mm_set1_epi32(1 << 1);
53698        let r = _mm_or_epi32(a, b);
53699        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
53700        assert_eq_m128i(r, e);
53701    }
53702
53703    #[simd_test(enable = "avx512f,avx512vl")]
53704    unsafe fn test_mm_mask_or_epi32() {
53705        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53706        let b = _mm_set1_epi32(1 << 1);
53707        let r = _mm_mask_or_epi32(a, 0, a, b);
53708        assert_eq_m128i(r, a);
53709        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
53710        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
53711        assert_eq_m128i(r, e);
53712    }
53713
53714    #[simd_test(enable = "avx512f,avx512vl")]
53715    unsafe fn test_mm_maskz_or_epi32() {
53716        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53717        let b = _mm_set1_epi32(1 << 1);
53718        let r = _mm_maskz_or_epi32(0, a, b);
53719        assert_eq_m128i(r, _mm_setzero_si128());
53720        let r = _mm_maskz_or_epi32(0b00001111, a, b);
53721        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
53722        assert_eq_m128i(r, e);
53723    }
53724
53725    #[simd_test(enable = "avx512f")]
53726    unsafe fn test_mm512_or_si512() {
53727        #[rustfmt::skip]
53728        let a = _mm512_set_epi32(
53729            1 << 1 | 1 << 2, 0, 0, 0,
53730            0, 0, 0, 0,
53731            0, 0, 0, 0,
53732            0, 0, 0, 1 << 1 | 1 << 3,
53733        );
53734        #[rustfmt::skip]
53735        let b = _mm512_set_epi32(
53736            1 << 1, 0, 0, 0,
53737            0, 0, 0, 0,
53738            0, 0, 0, 0,
53739            0, 0, 0, 1 << 3 | 1 << 4,
53740        );
53741        let r = _mm512_or_epi32(a, b);
53742        #[rustfmt::skip]
53743        let e = _mm512_set_epi32(
53744            1 << 1 | 1 << 2, 0, 0, 0,
53745            0, 0, 0, 0,
53746            0, 0, 0, 0,
53747            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
53748        );
53749        assert_eq_m512i(r, e);
53750    }
53751
53752    #[simd_test(enable = "avx512f")]
53753    unsafe fn test_mm512_xor_epi32() {
53754        #[rustfmt::skip]
53755        let a = _mm512_set_epi32(
53756            1 << 1 | 1 << 2, 0, 0, 0,
53757            0, 0, 0, 0,
53758            0, 0, 0, 0,
53759            0, 0, 0, 1 << 1 | 1 << 3,
53760        );
53761        #[rustfmt::skip]
53762        let b = _mm512_set_epi32(
53763            1 << 1, 0, 0, 0,
53764            0, 0, 0, 0,
53765            0, 0, 0, 0,
53766            0, 0, 0, 1 << 3 | 1 << 4,
53767        );
53768        let r = _mm512_xor_epi32(a, b);
53769        #[rustfmt::skip]
53770        let e = _mm512_set_epi32(
53771            1 << 2, 0, 0, 0,
53772            0, 0, 0, 0,
53773            0, 0, 0, 0,
53774            0, 0, 0, 1 << 1 | 1 << 4,
53775        );
53776        assert_eq_m512i(r, e);
53777    }
53778
53779    #[simd_test(enable = "avx512f")]
53780    unsafe fn test_mm512_mask_xor_epi32() {
53781        #[rustfmt::skip]
53782        let a = _mm512_set_epi32(
53783            1 << 1 | 1 << 2, 0, 0, 0,
53784            0, 0, 0, 0,
53785            0, 0, 0, 0,
53786            0, 0, 0, 1 << 1 | 1 << 3,
53787        );
53788        #[rustfmt::skip]
53789        let b = _mm512_set_epi32(
53790            1 << 1, 0, 0, 0,
53791            0, 0, 0, 0,
53792            0, 0, 0, 0,
53793            0, 0, 0, 1 << 3 | 1 << 4,
53794        );
53795        let r = _mm512_mask_xor_epi32(a, 0, a, b);
53796        assert_eq_m512i(r, a);
53797        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
53798        #[rustfmt::skip]
53799        let e = _mm512_set_epi32(
53800            1 << 1 | 1 << 2, 0, 0, 0,
53801            0, 0, 0, 0,
53802            0, 0, 0, 0,
53803            0, 0, 0, 1 << 1 | 1 << 4,
53804        );
53805        assert_eq_m512i(r, e);
53806    }
53807
53808    #[simd_test(enable = "avx512f")]
53809    unsafe fn test_mm512_maskz_xor_epi32() {
53810        #[rustfmt::skip]
53811        let a = _mm512_set_epi32(
53812            1 << 1 | 1 << 2, 0, 0, 0,
53813            0, 0, 0, 0,
53814            0, 0, 0, 0,
53815            0, 0, 0, 1 << 1 | 1 << 3,
53816        );
53817        #[rustfmt::skip]
53818        let b = _mm512_set_epi32(
53819            1 << 1, 0, 0, 0,
53820            0, 0, 0, 0,
53821            0, 0, 0, 0,
53822            0, 0, 0, 1 << 3 | 1 << 4,
53823        );
53824        let r = _mm512_maskz_xor_epi32(0, a, b);
53825        assert_eq_m512i(r, _mm512_setzero_si512());
53826        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
53827        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
53828        assert_eq_m512i(r, e);
53829    }
53830
53831    #[simd_test(enable = "avx512f,avx512vl")]
53832    unsafe fn test_mm256_xor_epi32() {
53833        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53834        let b = _mm256_set1_epi32(1 << 1);
53835        let r = _mm256_xor_epi32(a, b);
53836        let e = _mm256_set1_epi32(1 << 2);
53837        assert_eq_m256i(r, e);
53838    }
53839
53840    #[simd_test(enable = "avx512f,avx512vl")]
53841    unsafe fn test_mm256_mask_xor_epi32() {
53842        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53843        let b = _mm256_set1_epi32(1 << 1);
53844        let r = _mm256_mask_xor_epi32(a, 0, a, b);
53845        assert_eq_m256i(r, a);
53846        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
53847        let e = _mm256_set1_epi32(1 << 2);
53848        assert_eq_m256i(r, e);
53849    }
53850
53851    #[simd_test(enable = "avx512f,avx512vl")]
53852    unsafe fn test_mm256_maskz_xor_epi32() {
53853        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53854        let b = _mm256_set1_epi32(1 << 1);
53855        let r = _mm256_maskz_xor_epi32(0, a, b);
53856        assert_eq_m256i(r, _mm256_setzero_si256());
53857        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
53858        let e = _mm256_set1_epi32(1 << 2);
53859        assert_eq_m256i(r, e);
53860    }
53861
53862    #[simd_test(enable = "avx512f,avx512vl")]
53863    unsafe fn test_mm_xor_epi32() {
53864        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53865        let b = _mm_set1_epi32(1 << 1);
53866        let r = _mm_xor_epi32(a, b);
53867        let e = _mm_set1_epi32(1 << 2);
53868        assert_eq_m128i(r, e);
53869    }
53870
53871    #[simd_test(enable = "avx512f,avx512vl")]
53872    unsafe fn test_mm_mask_xor_epi32() {
53873        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53874        let b = _mm_set1_epi32(1 << 1);
53875        let r = _mm_mask_xor_epi32(a, 0, a, b);
53876        assert_eq_m128i(r, a);
53877        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
53878        let e = _mm_set1_epi32(1 << 2);
53879        assert_eq_m128i(r, e);
53880    }
53881
53882    #[simd_test(enable = "avx512f,avx512vl")]
53883    unsafe fn test_mm_maskz_xor_epi32() {
53884        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53885        let b = _mm_set1_epi32(1 << 1);
53886        let r = _mm_maskz_xor_epi32(0, a, b);
53887        assert_eq_m128i(r, _mm_setzero_si128());
53888        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
53889        let e = _mm_set1_epi32(1 << 2);
53890        assert_eq_m128i(r, e);
53891    }
53892
53893    #[simd_test(enable = "avx512f")]
53894    unsafe fn test_mm512_xor_si512() {
53895        #[rustfmt::skip]
53896        let a = _mm512_set_epi32(
53897            1 << 1 | 1 << 2, 0, 0, 0,
53898            0, 0, 0, 0,
53899            0, 0, 0, 0,
53900            0, 0, 0, 1 << 1 | 1 << 3,
53901        );
53902        #[rustfmt::skip]
53903        let b = _mm512_set_epi32(
53904            1 << 1, 0, 0, 0,
53905            0, 0, 0, 0,
53906            0, 0, 0, 0,
53907            0, 0, 0, 1 << 3 | 1 << 4,
53908        );
53909        let r = _mm512_xor_epi32(a, b);
53910        #[rustfmt::skip]
53911        let e = _mm512_set_epi32(
53912            1 << 2, 0, 0, 0,
53913            0, 0, 0, 0,
53914            0, 0, 0, 0,
53915            0, 0, 0, 1 << 1 | 1 << 4,
53916        );
53917        assert_eq_m512i(r, e);
53918    }
53919
53920    #[simd_test(enable = "avx512f")]
53921    unsafe fn test_mm512_andnot_epi32() {
53922        let a = _mm512_set1_epi32(0);
53923        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
53924        let r = _mm512_andnot_epi32(a, b);
53925        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
53926        assert_eq_m512i(r, e);
53927    }
53928
53929    #[simd_test(enable = "avx512f")]
53930    unsafe fn test_mm512_mask_andnot_epi32() {
53931        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
53932        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
53933        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
53934        assert_eq_m512i(r, a);
53935        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
53936        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
53937        assert_eq_m512i(r, e);
53938    }
53939
53940    #[simd_test(enable = "avx512f")]
53941    unsafe fn test_mm512_maskz_andnot_epi32() {
53942        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
53943        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
53944        let r = _mm512_maskz_andnot_epi32(0, a, b);
53945        assert_eq_m512i(r, _mm512_setzero_si512());
53946        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
53947        #[rustfmt::skip]
53948        let e = _mm512_set_epi32(
53949            0, 0, 0, 0,
53950            0, 0, 0, 0,
53951            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
53952            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
53953        );
53954        assert_eq_m512i(r, e);
53955    }
53956
53957    #[simd_test(enable = "avx512f,avx512vl")]
53958    unsafe fn test_mm256_mask_andnot_epi32() {
53959        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53960        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
53961        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
53962        assert_eq_m256i(r, a);
53963        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
53964        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
53965        assert_eq_m256i(r, e);
53966    }
53967
53968    #[simd_test(enable = "avx512f,avx512vl")]
53969    unsafe fn test_mm256_maskz_andnot_epi32() {
53970        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
53971        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
53972        let r = _mm256_maskz_andnot_epi32(0, a, b);
53973        assert_eq_m256i(r, _mm256_setzero_si256());
53974        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
53975        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
53976        assert_eq_m256i(r, e);
53977    }
53978
53979    #[simd_test(enable = "avx512f,avx512vl")]
53980    unsafe fn test_mm_mask_andnot_epi32() {
53981        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53982        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
53983        let r = _mm_mask_andnot_epi32(a, 0, a, b);
53984        assert_eq_m128i(r, a);
53985        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
53986        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
53987        assert_eq_m128i(r, e);
53988    }
53989
53990    #[simd_test(enable = "avx512f,avx512vl")]
53991    unsafe fn test_mm_maskz_andnot_epi32() {
53992        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
53993        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
53994        let r = _mm_maskz_andnot_epi32(0, a, b);
53995        assert_eq_m128i(r, _mm_setzero_si128());
53996        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
53997        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
53998        assert_eq_m128i(r, e);
53999    }
54000
54001    #[simd_test(enable = "avx512f")]
54002    unsafe fn test_cvtmask16_u32() {
54003        let a: __mmask16 = 0b11001100_00110011;
54004        let r = _cvtmask16_u32(a);
54005        let e: u32 = 0b11001100_00110011;
54006        assert_eq!(r, e);
54007    }
54008
54009    #[simd_test(enable = "avx512f")]
54010    unsafe fn test_cvtu32_mask16() {
54011        let a: u32 = 0b11001100_00110011;
54012        let r = _cvtu32_mask16(a);
54013        let e: __mmask16 = 0b11001100_00110011;
54014        assert_eq!(r, e);
54015    }
54016
54017    #[simd_test(enable = "avx512f")]
54018    unsafe fn test_mm512_kand() {
54019        let a: u16 = 0b11001100_00110011;
54020        let b: u16 = 0b11001100_00110011;
54021        let r = _mm512_kand(a, b);
54022        let e: u16 = 0b11001100_00110011;
54023        assert_eq!(r, e);
54024    }
54025
54026    #[simd_test(enable = "avx512f")]
54027    unsafe fn test_kand_mask16() {
54028        let a: u16 = 0b11001100_00110011;
54029        let b: u16 = 0b11001100_00110011;
54030        let r = _kand_mask16(a, b);
54031        let e: u16 = 0b11001100_00110011;
54032        assert_eq!(r, e);
54033    }
54034
54035    #[simd_test(enable = "avx512f")]
54036    unsafe fn test_mm512_kor() {
54037        let a: u16 = 0b11001100_00110011;
54038        let b: u16 = 0b00101110_00001011;
54039        let r = _mm512_kor(a, b);
54040        let e: u16 = 0b11101110_00111011;
54041        assert_eq!(r, e);
54042    }
54043
54044    #[simd_test(enable = "avx512f")]
54045    unsafe fn test_kor_mask16() {
54046        let a: u16 = 0b11001100_00110011;
54047        let b: u16 = 0b00101110_00001011;
54048        let r = _kor_mask16(a, b);
54049        let e: u16 = 0b11101110_00111011;
54050        assert_eq!(r, e);
54051    }
54052
54053    #[simd_test(enable = "avx512f")]
54054    unsafe fn test_mm512_kxor() {
54055        let a: u16 = 0b11001100_00110011;
54056        let b: u16 = 0b00101110_00001011;
54057        let r = _mm512_kxor(a, b);
54058        let e: u16 = 0b11100010_00111000;
54059        assert_eq!(r, e);
54060    }
54061
54062    #[simd_test(enable = "avx512f")]
54063    unsafe fn test_kxor_mask16() {
54064        let a: u16 = 0b11001100_00110011;
54065        let b: u16 = 0b00101110_00001011;
54066        let r = _kxor_mask16(a, b);
54067        let e: u16 = 0b11100010_00111000;
54068        assert_eq!(r, e);
54069    }
54070
54071    #[simd_test(enable = "avx512f")]
54072    unsafe fn test_mm512_knot() {
54073        let a: u16 = 0b11001100_00110011;
54074        let r = _mm512_knot(a);
54075        let e: u16 = 0b00110011_11001100;
54076        assert_eq!(r, e);
54077    }
54078
54079    #[simd_test(enable = "avx512f")]
54080    unsafe fn test_knot_mask16() {
54081        let a: u16 = 0b11001100_00110011;
54082        let r = _knot_mask16(a);
54083        let e: u16 = 0b00110011_11001100;
54084        assert_eq!(r, e);
54085    }
54086
54087    #[simd_test(enable = "avx512f")]
54088    unsafe fn test_mm512_kandn() {
54089        let a: u16 = 0b11001100_00110011;
54090        let b: u16 = 0b00101110_00001011;
54091        let r = _mm512_kandn(a, b);
54092        let e: u16 = 0b00100010_00001000;
54093        assert_eq!(r, e);
54094    }
54095
54096    #[simd_test(enable = "avx512f")]
54097    unsafe fn test_kandn_mask16() {
54098        let a: u16 = 0b11001100_00110011;
54099        let b: u16 = 0b00101110_00001011;
54100        let r = _kandn_mask16(a, b);
54101        let e: u16 = 0b00100010_00001000;
54102        assert_eq!(r, e);
54103    }
54104
54105    #[simd_test(enable = "avx512f")]
54106    unsafe fn test_mm512_kxnor() {
54107        let a: u16 = 0b11001100_00110011;
54108        let b: u16 = 0b00101110_00001011;
54109        let r = _mm512_kxnor(a, b);
54110        let e: u16 = 0b00011101_11000111;
54111        assert_eq!(r, e);
54112    }
54113
54114    #[simd_test(enable = "avx512f")]
54115    unsafe fn test_kxnor_mask16() {
54116        let a: u16 = 0b11001100_00110011;
54117        let b: u16 = 0b00101110_00001011;
54118        let r = _kxnor_mask16(a, b);
54119        let e: u16 = 0b00011101_11000111;
54120        assert_eq!(r, e);
54121    }
54122
54123    #[simd_test(enable = "avx512dq")]
54124    unsafe fn test_kortest_mask16_u8() {
54125        let a: __mmask16 = 0b0110100101101001;
54126        let b: __mmask16 = 0b1011011010110110;
54127        let mut all_ones: u8 = 0;
54128        let r = _kortest_mask16_u8(a, b, &mut all_ones);
54129        assert_eq!(r, 0);
54130        assert_eq!(all_ones, 1);
54131    }
54132
54133    #[simd_test(enable = "avx512dq")]
54134    unsafe fn test_kortestc_mask16_u8() {
54135        let a: __mmask16 = 0b0110100101101001;
54136        let b: __mmask16 = 0b1011011010110110;
54137        let r = _kortestc_mask16_u8(a, b);
54138        assert_eq!(r, 1);
54139    }
54140
54141    #[simd_test(enable = "avx512dq")]
54142    unsafe fn test_kortestz_mask16_u8() {
54143        let a: __mmask16 = 0b0110100101101001;
54144        let b: __mmask16 = 0b1011011010110110;
54145        let r = _kortestz_mask16_u8(a, b);
54146        assert_eq!(r, 0);
54147    }
54148
54149    #[simd_test(enable = "avx512dq")]
54150    unsafe fn test_kshiftli_mask16() {
54151        let a: __mmask16 = 0b1001011011000011;
54152        let r = _kshiftli_mask16::<3>(a);
54153        let e: __mmask16 = 0b1011011000011000;
54154        assert_eq!(r, e);
54155    }
54156
54157    #[simd_test(enable = "avx512dq")]
54158    unsafe fn test_kshiftri_mask16() {
54159        let a: __mmask16 = 0b0110100100111100;
54160        let r = _kshiftri_mask16::<3>(a);
54161        let e: __mmask16 = 0b0000110100100111;
54162        assert_eq!(r, e);
54163    }
54164
54165    #[simd_test(enable = "avx512f")]
54166    unsafe fn test_load_mask16() {
54167        let a: __mmask16 = 0b1001011011000011;
54168        let r = _load_mask16(&a);
54169        let e: __mmask16 = 0b1001011011000011;
54170        assert_eq!(r, e);
54171    }
54172
54173    #[simd_test(enable = "avx512f")]
54174    unsafe fn test_store_mask16() {
54175        let a: __mmask16 = 0b0110100100111100;
54176        let mut r = 0;
54177        _store_mask16(&mut r, a);
54178        let e: __mmask16 = 0b0110100100111100;
54179        assert_eq!(r, e);
54180    }
54181
54182    #[simd_test(enable = "avx512f")]
54183    unsafe fn test_mm512_kmov() {
54184        let a: u16 = 0b11001100_00110011;
54185        let r = _mm512_kmov(a);
54186        let e: u16 = 0b11001100_00110011;
54187        assert_eq!(r, e);
54188    }
54189
54190    #[simd_test(enable = "avx512f")]
54191    unsafe fn test_mm512_int2mask() {
54192        let a: i32 = 0b11001100_00110011;
54193        let r = _mm512_int2mask(a);
54194        let e: u16 = 0b11001100_00110011;
54195        assert_eq!(r, e);
54196    }
54197
54198    #[simd_test(enable = "avx512f")]
54199    unsafe fn test_mm512_mask2int() {
54200        let k1: __mmask16 = 0b11001100_00110011;
54201        let r = _mm512_mask2int(k1);
54202        let e: i32 = 0b11001100_00110011;
54203        assert_eq!(r, e);
54204    }
54205
54206    #[simd_test(enable = "avx512f")]
54207    unsafe fn test_mm512_kunpackb() {
54208        let a: u16 = 0b11001100_00110011;
54209        let b: u16 = 0b00101110_00001011;
54210        let r = _mm512_kunpackb(a, b);
54211        let e: u16 = 0b00110011_00001011;
54212        assert_eq!(r, e);
54213    }
54214
54215    #[simd_test(enable = "avx512f")]
54216    unsafe fn test_mm512_kortestc() {
54217        let a: u16 = 0b11001100_00110011;
54218        let b: u16 = 0b00101110_00001011;
54219        let r = _mm512_kortestc(a, b);
54220        assert_eq!(r, 0);
54221        let b: u16 = 0b11111111_11111111;
54222        let r = _mm512_kortestc(a, b);
54223        assert_eq!(r, 1);
54224    }
54225
54226    #[simd_test(enable = "avx512f")]
54227    unsafe fn test_mm512_kortestz() {
54228        let a: u16 = 0b11001100_00110011;
54229        let b: u16 = 0b00101110_00001011;
54230        let r = _mm512_kortestz(a, b);
54231        assert_eq!(r, 0);
54232        let r = _mm512_kortestz(0, 0);
54233        assert_eq!(r, 1);
54234    }
54235
54236    #[simd_test(enable = "avx512f")]
54237    unsafe fn test_mm512_test_epi32_mask() {
54238        let a = _mm512_set1_epi32(1 << 0);
54239        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
54240        let r = _mm512_test_epi32_mask(a, b);
54241        let e: __mmask16 = 0b11111111_11111111;
54242        assert_eq!(r, e);
54243    }
54244
54245    #[simd_test(enable = "avx512f")]
54246    unsafe fn test_mm512_mask_test_epi32_mask() {
54247        let a = _mm512_set1_epi32(1 << 0);
54248        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
54249        let r = _mm512_mask_test_epi32_mask(0, a, b);
54250        assert_eq!(r, 0);
54251        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
54252        let e: __mmask16 = 0b11111111_11111111;
54253        assert_eq!(r, e);
54254    }
54255
54256    #[simd_test(enable = "avx512f,avx512vl")]
54257    unsafe fn test_mm256_test_epi32_mask() {
54258        let a = _mm256_set1_epi32(1 << 0);
54259        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
54260        let r = _mm256_test_epi32_mask(a, b);
54261        let e: __mmask8 = 0b11111111;
54262        assert_eq!(r, e);
54263    }
54264
54265    #[simd_test(enable = "avx512f,avx512vl")]
54266    unsafe fn test_mm256_mask_test_epi32_mask() {
54267        let a = _mm256_set1_epi32(1 << 0);
54268        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
54269        let r = _mm256_mask_test_epi32_mask(0, a, b);
54270        assert_eq!(r, 0);
54271        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
54272        let e: __mmask8 = 0b11111111;
54273        assert_eq!(r, e);
54274    }
54275
54276    #[simd_test(enable = "avx512f,avx512vl")]
54277    unsafe fn test_mm_test_epi32_mask() {
54278        let a = _mm_set1_epi32(1 << 0);
54279        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
54280        let r = _mm_test_epi32_mask(a, b);
54281        let e: __mmask8 = 0b00001111;
54282        assert_eq!(r, e);
54283    }
54284
54285    #[simd_test(enable = "avx512f,avx512vl")]
54286    unsafe fn test_mm_mask_test_epi32_mask() {
54287        let a = _mm_set1_epi32(1 << 0);
54288        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
54289        let r = _mm_mask_test_epi32_mask(0, a, b);
54290        assert_eq!(r, 0);
54291        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
54292        let e: __mmask8 = 0b00001111;
54293        assert_eq!(r, e);
54294    }
54295
54296    #[simd_test(enable = "avx512f")]
54297    unsafe fn test_mm512_testn_epi32_mask() {
54298        let a = _mm512_set1_epi32(1 << 0);
54299        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
54300        let r = _mm512_testn_epi32_mask(a, b);
54301        let e: __mmask16 = 0b00000000_00000000;
54302        assert_eq!(r, e);
54303    }
54304
54305    #[simd_test(enable = "avx512f")]
54306    unsafe fn test_mm512_mask_testn_epi32_mask() {
54307        let a = _mm512_set1_epi32(1 << 0);
54308        let b = _mm512_set1_epi32(1 << 1);
54309        let r = _mm512_mask_test_epi32_mask(0, a, b);
54310        assert_eq!(r, 0);
54311        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
54312        let e: __mmask16 = 0b11111111_11111111;
54313        assert_eq!(r, e);
54314    }
54315
54316    #[simd_test(enable = "avx512f,avx512vl")]
54317    unsafe fn test_mm256_testn_epi32_mask() {
54318        let a = _mm256_set1_epi32(1 << 0);
54319        let b = _mm256_set1_epi32(1 << 1);
54320        let r = _mm256_testn_epi32_mask(a, b);
54321        let e: __mmask8 = 0b11111111;
54322        assert_eq!(r, e);
54323    }
54324
54325    #[simd_test(enable = "avx512f,avx512vl")]
54326    unsafe fn test_mm256_mask_testn_epi32_mask() {
54327        let a = _mm256_set1_epi32(1 << 0);
54328        let b = _mm256_set1_epi32(1 << 1);
54329        let r = _mm256_mask_test_epi32_mask(0, a, b);
54330        assert_eq!(r, 0);
54331        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
54332        let e: __mmask8 = 0b11111111;
54333        assert_eq!(r, e);
54334    }
54335
54336    #[simd_test(enable = "avx512f,avx512vl")]
54337    unsafe fn test_mm_testn_epi32_mask() {
54338        let a = _mm_set1_epi32(1 << 0);
54339        let b = _mm_set1_epi32(1 << 1);
54340        let r = _mm_testn_epi32_mask(a, b);
54341        let e: __mmask8 = 0b00001111;
54342        assert_eq!(r, e);
54343    }
54344
54345    #[simd_test(enable = "avx512f,avx512vl")]
54346    unsafe fn test_mm_mask_testn_epi32_mask() {
54347        let a = _mm_set1_epi32(1 << 0);
54348        let b = _mm_set1_epi32(1 << 1);
54349        let r = _mm_mask_test_epi32_mask(0, a, b);
54350        assert_eq!(r, 0);
54351        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
54352        let e: __mmask8 = 0b00001111;
54353        assert_eq!(r, e);
54354    }
54355
54356    #[simd_test(enable = "avx512f")]
54357    #[cfg_attr(miri, ignore)]
54358    unsafe fn test_mm512_stream_ps() {
54359        #[repr(align(64))]
54360        struct Memory {
54361            pub data: [f32; 16], // 64 bytes
54362        }
54363        let a = _mm512_set1_ps(7.0);
54364        let mut mem = Memory { data: [-1.0; 16] };
54365
54366        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
54367        for i in 0..16 {
54368            assert_eq!(mem.data[i], get_m512(a, i));
54369        }
54370    }
54371
54372    #[simd_test(enable = "avx512f")]
54373    #[cfg_attr(miri, ignore)]
54374    unsafe fn test_mm512_stream_pd() {
54375        #[repr(align(64))]
54376        struct Memory {
54377            pub data: [f64; 8],
54378        }
54379        let a = _mm512_set1_pd(7.0);
54380        let mut mem = Memory { data: [-1.0; 8] };
54381
54382        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
54383        for i in 0..8 {
54384            assert_eq!(mem.data[i], get_m512d(a, i));
54385        }
54386    }
54387
54388    #[simd_test(enable = "avx512f")]
54389    #[cfg_attr(miri, ignore)]
54390    unsafe fn test_mm512_stream_si512() {
54391        #[repr(align(64))]
54392        struct Memory {
54393            pub data: [i64; 8],
54394        }
54395        let a = _mm512_set1_epi32(7);
54396        let mut mem = Memory { data: [-1; 8] };
54397
54398        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
54399        for i in 0..8 {
54400            assert_eq!(mem.data[i], get_m512i(a, i));
54401        }
54402    }
54403
54404    #[simd_test(enable = "avx512f")]
54405    unsafe fn test_mm512_stream_load_si512() {
54406        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
54407        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
54408        assert_eq_m512i(a, r);
54409    }
54410
54411    #[simd_test(enable = "avx512f")]
54412    unsafe fn test_mm512_reduce_add_epi32() {
54413        let a = _mm512_set1_epi32(1);
54414        let e: i32 = _mm512_reduce_add_epi32(a);
54415        assert_eq!(16, e);
54416    }
54417
54418    #[simd_test(enable = "avx512f")]
54419    unsafe fn test_mm512_mask_reduce_add_epi32() {
54420        let a = _mm512_set1_epi32(1);
54421        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
54422        assert_eq!(8, e);
54423    }
54424
54425    #[simd_test(enable = "avx512f")]
54426    unsafe fn test_mm512_reduce_add_ps() {
54427        let a = _mm512_set1_ps(1.);
54428        let e: f32 = _mm512_reduce_add_ps(a);
54429        assert_eq!(16., e);
54430    }
54431
54432    #[simd_test(enable = "avx512f")]
54433    unsafe fn test_mm512_mask_reduce_add_ps() {
54434        let a = _mm512_set1_ps(1.);
54435        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
54436        assert_eq!(8., e);
54437    }
54438
54439    #[simd_test(enable = "avx512f")]
54440    unsafe fn test_mm512_reduce_mul_epi32() {
54441        let a = _mm512_set1_epi32(2);
54442        let e: i32 = _mm512_reduce_mul_epi32(a);
54443        assert_eq!(65536, e);
54444    }
54445
54446    #[simd_test(enable = "avx512f")]
54447    unsafe fn test_mm512_mask_reduce_mul_epi32() {
54448        let a = _mm512_set1_epi32(2);
54449        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
54450        assert_eq!(256, e);
54451    }
54452
54453    #[simd_test(enable = "avx512f")]
54454    unsafe fn test_mm512_reduce_mul_ps() {
54455        let a = _mm512_set1_ps(2.);
54456        let e: f32 = _mm512_reduce_mul_ps(a);
54457        assert_eq!(65536., e);
54458    }
54459
54460    #[simd_test(enable = "avx512f")]
54461    unsafe fn test_mm512_mask_reduce_mul_ps() {
54462        let a = _mm512_set1_ps(2.);
54463        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
54464        assert_eq!(256., e);
54465    }
54466
54467    #[simd_test(enable = "avx512f")]
54468    unsafe fn test_mm512_reduce_max_epi32() {
54469        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54470        let e: i32 = _mm512_reduce_max_epi32(a);
54471        assert_eq!(15, e);
54472    }
54473
54474    #[simd_test(enable = "avx512f")]
54475    unsafe fn test_mm512_mask_reduce_max_epi32() {
54476        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54477        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
54478        assert_eq!(7, e);
54479    }
54480
54481    #[simd_test(enable = "avx512f")]
54482    unsafe fn test_mm512_reduce_max_epu32() {
54483        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54484        let e: u32 = _mm512_reduce_max_epu32(a);
54485        assert_eq!(15, e);
54486    }
54487
54488    #[simd_test(enable = "avx512f")]
54489    unsafe fn test_mm512_mask_reduce_max_epu32() {
54490        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54491        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
54492        assert_eq!(7, e);
54493    }
54494
54495    #[simd_test(enable = "avx512f")]
54496    unsafe fn test_mm512_reduce_max_ps() {
54497        let a = _mm512_set_ps(
54498            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54499        );
54500        let e: f32 = _mm512_reduce_max_ps(a);
54501        assert_eq!(15., e);
54502    }
54503
54504    #[simd_test(enable = "avx512f")]
54505    unsafe fn test_mm512_mask_reduce_max_ps() {
54506        let a = _mm512_set_ps(
54507            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54508        );
54509        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
54510        assert_eq!(7., e);
54511    }
54512
54513    #[simd_test(enable = "avx512f")]
54514    unsafe fn test_mm512_reduce_min_epi32() {
54515        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54516        let e: i32 = _mm512_reduce_min_epi32(a);
54517        assert_eq!(0, e);
54518    }
54519
54520    #[simd_test(enable = "avx512f")]
54521    unsafe fn test_mm512_mask_reduce_min_epi32() {
54522        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54523        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
54524        assert_eq!(0, e);
54525    }
54526
54527    #[simd_test(enable = "avx512f")]
54528    unsafe fn test_mm512_reduce_min_epu32() {
54529        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54530        let e: u32 = _mm512_reduce_min_epu32(a);
54531        assert_eq!(0, e);
54532    }
54533
54534    #[simd_test(enable = "avx512f")]
54535    unsafe fn test_mm512_mask_reduce_min_epu32() {
54536        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54537        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
54538        assert_eq!(0, e);
54539    }
54540
54541    #[simd_test(enable = "avx512f")]
54542    unsafe fn test_mm512_reduce_min_ps() {
54543        let a = _mm512_set_ps(
54544            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54545        );
54546        let e: f32 = _mm512_reduce_min_ps(a);
54547        assert_eq!(0., e);
54548    }
54549
54550    #[simd_test(enable = "avx512f")]
54551    unsafe fn test_mm512_mask_reduce_min_ps() {
54552        let a = _mm512_set_ps(
54553            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54554        );
54555        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
54556        assert_eq!(0., e);
54557    }
54558
54559    #[simd_test(enable = "avx512f")]
54560    unsafe fn test_mm512_reduce_and_epi32() {
54561        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54562        let e: i32 = _mm512_reduce_and_epi32(a);
54563        assert_eq!(0, e);
54564    }
54565
54566    #[simd_test(enable = "avx512f")]
54567    unsafe fn test_mm512_mask_reduce_and_epi32() {
54568        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54569        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
54570        assert_eq!(1, e);
54571    }
54572
54573    #[simd_test(enable = "avx512f")]
54574    unsafe fn test_mm512_reduce_or_epi32() {
54575        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54576        let e: i32 = _mm512_reduce_or_epi32(a);
54577        assert_eq!(3, e);
54578    }
54579
54580    #[simd_test(enable = "avx512f")]
54581    unsafe fn test_mm512_mask_reduce_or_epi32() {
54582        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
54583        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
54584        assert_eq!(1, e);
54585    }
54586
54587    #[simd_test(enable = "avx512f")]
54588    unsafe fn test_mm512_mask_compress_epi32() {
54589        let src = _mm512_set1_epi32(200);
54590        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54591        let r = _mm512_mask_compress_epi32(src, 0, a);
54592        assert_eq_m512i(r, src);
54593        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
54594        let e = _mm512_set_epi32(
54595            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
54596        );
54597        assert_eq_m512i(r, e);
54598    }
54599
54600    #[simd_test(enable = "avx512f")]
54601    unsafe fn test_mm512_maskz_compress_epi32() {
54602        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54603        let r = _mm512_maskz_compress_epi32(0, a);
54604        assert_eq_m512i(r, _mm512_setzero_si512());
54605        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
54606        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
54607        assert_eq_m512i(r, e);
54608    }
54609
54610    #[simd_test(enable = "avx512f,avx512vl")]
54611    unsafe fn test_mm256_mask_compress_epi32() {
54612        let src = _mm256_set1_epi32(200);
54613        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54614        let r = _mm256_mask_compress_epi32(src, 0, a);
54615        assert_eq_m256i(r, src);
54616        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
54617        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
54618        assert_eq_m256i(r, e);
54619    }
54620
54621    #[simd_test(enable = "avx512f,avx512vl")]
54622    unsafe fn test_mm256_maskz_compress_epi32() {
54623        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54624        let r = _mm256_maskz_compress_epi32(0, a);
54625        assert_eq_m256i(r, _mm256_setzero_si256());
54626        let r = _mm256_maskz_compress_epi32(0b01010101, a);
54627        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
54628        assert_eq_m256i(r, e);
54629    }
54630
54631    #[simd_test(enable = "avx512f,avx512vl")]
54632    unsafe fn test_mm_mask_compress_epi32() {
54633        let src = _mm_set1_epi32(200);
54634        let a = _mm_set_epi32(0, 1, 2, 3);
54635        let r = _mm_mask_compress_epi32(src, 0, a);
54636        assert_eq_m128i(r, src);
54637        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
54638        let e = _mm_set_epi32(200, 200, 1, 3);
54639        assert_eq_m128i(r, e);
54640    }
54641
54642    #[simd_test(enable = "avx512f,avx512vl")]
54643    unsafe fn test_mm_maskz_compress_epi32() {
54644        let a = _mm_set_epi32(0, 1, 2, 3);
54645        let r = _mm_maskz_compress_epi32(0, a);
54646        assert_eq_m128i(r, _mm_setzero_si128());
54647        let r = _mm_maskz_compress_epi32(0b00000101, a);
54648        let e = _mm_set_epi32(0, 0, 1, 3);
54649        assert_eq_m128i(r, e);
54650    }
54651
54652    #[simd_test(enable = "avx512f")]
54653    unsafe fn test_mm512_mask_compress_ps() {
54654        let src = _mm512_set1_ps(200.);
54655        let a = _mm512_set_ps(
54656            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54657        );
54658        let r = _mm512_mask_compress_ps(src, 0, a);
54659        assert_eq_m512(r, src);
54660        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
54661        let e = _mm512_set_ps(
54662            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
54663        );
54664        assert_eq_m512(r, e);
54665    }
54666
54667    #[simd_test(enable = "avx512f")]
54668    unsafe fn test_mm512_maskz_compress_ps() {
54669        let a = _mm512_set_ps(
54670            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54671        );
54672        let r = _mm512_maskz_compress_ps(0, a);
54673        assert_eq_m512(r, _mm512_setzero_ps());
54674        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
54675        let e = _mm512_set_ps(
54676            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
54677        );
54678        assert_eq_m512(r, e);
54679    }
54680
54681    #[simd_test(enable = "avx512f,avx512vl")]
54682    unsafe fn test_mm256_mask_compress_ps() {
54683        let src = _mm256_set1_ps(200.);
54684        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54685        let r = _mm256_mask_compress_ps(src, 0, a);
54686        assert_eq_m256(r, src);
54687        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
54688        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
54689        assert_eq_m256(r, e);
54690    }
54691
54692    #[simd_test(enable = "avx512f,avx512vl")]
54693    unsafe fn test_mm256_maskz_compress_ps() {
54694        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54695        let r = _mm256_maskz_compress_ps(0, a);
54696        assert_eq_m256(r, _mm256_setzero_ps());
54697        let r = _mm256_maskz_compress_ps(0b01010101, a);
54698        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
54699        assert_eq_m256(r, e);
54700    }
54701
54702    #[simd_test(enable = "avx512f,avx512vl")]
54703    unsafe fn test_mm_mask_compress_ps() {
54704        let src = _mm_set1_ps(200.);
54705        let a = _mm_set_ps(0., 1., 2., 3.);
54706        let r = _mm_mask_compress_ps(src, 0, a);
54707        assert_eq_m128(r, src);
54708        let r = _mm_mask_compress_ps(src, 0b00000101, a);
54709        let e = _mm_set_ps(200., 200., 1., 3.);
54710        assert_eq_m128(r, e);
54711    }
54712
54713    #[simd_test(enable = "avx512f,avx512vl")]
54714    unsafe fn test_mm_maskz_compress_ps() {
54715        let a = _mm_set_ps(0., 1., 2., 3.);
54716        let r = _mm_maskz_compress_ps(0, a);
54717        assert_eq_m128(r, _mm_setzero_ps());
54718        let r = _mm_maskz_compress_ps(0b00000101, a);
54719        let e = _mm_set_ps(0., 0., 1., 3.);
54720        assert_eq_m128(r, e);
54721    }
54722
54723    #[simd_test(enable = "avx512f")]
54724    unsafe fn test_mm512_mask_compressstoreu_epi32() {
54725        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54726        let mut r = [0_i32; 16];
54727        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
54728        assert_eq!(&r, &[0_i32; 16]);
54729        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
54730        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
54731    }
54732
54733    #[simd_test(enable = "avx512f,avx512vl")]
54734    unsafe fn test_mm256_mask_compressstoreu_epi32() {
54735        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54736        let mut r = [0_i32; 8];
54737        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
54738        assert_eq!(&r, &[0_i32; 8]);
54739        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
54740        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
54741    }
54742
54743    #[simd_test(enable = "avx512f,avx512vl")]
54744    unsafe fn test_mm_mask_compressstoreu_epi32() {
54745        let a = _mm_setr_epi32(1, 2, 3, 4);
54746        let mut r = [0_i32; 4];
54747        _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
54748        assert_eq!(&r, &[0_i32; 4]);
54749        _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
54750        assert_eq!(&r, &[1, 2, 4, 0]);
54751    }
54752
54753    #[simd_test(enable = "avx512f")]
54754    unsafe fn test_mm512_mask_compressstoreu_epi64() {
54755        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
54756        let mut r = [0_i64; 8];
54757        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
54758        assert_eq!(&r, &[0_i64; 8]);
54759        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
54760        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
54761    }
54762
54763    #[simd_test(enable = "avx512f,avx512vl")]
54764    unsafe fn test_mm256_mask_compressstoreu_epi64() {
54765        let a = _mm256_setr_epi64x(1, 2, 3, 4);
54766        let mut r = [0_i64; 4];
54767        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
54768        assert_eq!(&r, &[0_i64; 4]);
54769        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
54770        assert_eq!(&r, &[1, 2, 4, 0]);
54771    }
54772
54773    #[simd_test(enable = "avx512f,avx512vl")]
54774    unsafe fn test_mm_mask_compressstoreu_epi64() {
54775        let a = _mm_setr_epi64x(1, 2);
54776        let mut r = [0_i64; 2];
54777        _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
54778        assert_eq!(&r, &[0_i64; 2]);
54779        _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
54780        assert_eq!(&r, &[2, 0]);
54781    }
54782
54783    #[simd_test(enable = "avx512f")]
54784    unsafe fn test_mm512_mask_compressstoreu_ps() {
54785        let a = _mm512_setr_ps(
54786            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
54787            13_f32, 14_f32, 15_f32, 16_f32,
54788        );
54789        let mut r = [0_f32; 16];
54790        _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
54791        assert_eq!(&r, &[0_f32; 16]);
54792        _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
54793        assert_eq!(
54794            &r,
54795            &[
54796                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
54797                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
54798            ]
54799        );
54800    }
54801
54802    #[simd_test(enable = "avx512f,avx512vl")]
54803    unsafe fn test_mm256_mask_compressstoreu_ps() {
54804        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
54805        let mut r = [0_f32; 8];
54806        _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
54807        assert_eq!(&r, &[0_f32; 8]);
54808        _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
54809        assert_eq!(
54810            &r,
54811            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
54812        );
54813    }
54814
54815    #[simd_test(enable = "avx512f,avx512vl")]
54816    unsafe fn test_mm_mask_compressstoreu_ps() {
54817        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
54818        let mut r = [0.; 4];
54819        _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
54820        assert_eq!(&r, &[0.; 4]);
54821        _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
54822        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
54823    }
54824
54825    #[simd_test(enable = "avx512f")]
54826    unsafe fn test_mm512_mask_compressstoreu_pd() {
54827        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
54828        let mut r = [0.; 8];
54829        _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
54830        assert_eq!(&r, &[0.; 8]);
54831        _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
54832        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
54833    }
54834
54835    #[simd_test(enable = "avx512f,avx512vl")]
54836    unsafe fn test_mm256_mask_compressstoreu_pd() {
54837        let a = _mm256_setr_pd(1., 2., 3., 4.);
54838        let mut r = [0.; 4];
54839        _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
54840        assert_eq!(&r, &[0.; 4]);
54841        _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
54842        assert_eq!(&r, &[1., 2., 4., 0.]);
54843    }
54844
54845    #[simd_test(enable = "avx512f,avx512vl")]
54846    unsafe fn test_mm_mask_compressstoreu_pd() {
54847        let a = _mm_setr_pd(1., 2.);
54848        let mut r = [0.; 2];
54849        _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
54850        assert_eq!(&r, &[0.; 2]);
54851        _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
54852        assert_eq!(&r, &[2., 0.]);
54853    }
54854
54855    #[simd_test(enable = "avx512f")]
54856    unsafe fn test_mm512_mask_expand_epi32() {
54857        let src = _mm512_set1_epi32(200);
54858        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54859        let r = _mm512_mask_expand_epi32(src, 0, a);
54860        assert_eq_m512i(r, src);
54861        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
54862        let e = _mm512_set_epi32(
54863            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
54864        );
54865        assert_eq_m512i(r, e);
54866    }
54867
54868    #[simd_test(enable = "avx512f")]
54869    unsafe fn test_mm512_maskz_expand_epi32() {
54870        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
54871        let r = _mm512_maskz_expand_epi32(0, a);
54872        assert_eq_m512i(r, _mm512_setzero_si512());
54873        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
54874        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
54875        assert_eq_m512i(r, e);
54876    }
54877
54878    #[simd_test(enable = "avx512f,avx512vl")]
54879    unsafe fn test_mm256_mask_expand_epi32() {
54880        let src = _mm256_set1_epi32(200);
54881        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54882        let r = _mm256_mask_expand_epi32(src, 0, a);
54883        assert_eq_m256i(r, src);
54884        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
54885        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
54886        assert_eq_m256i(r, e);
54887    }
54888
54889    #[simd_test(enable = "avx512f,avx512vl")]
54890    unsafe fn test_mm256_maskz_expand_epi32() {
54891        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
54892        let r = _mm256_maskz_expand_epi32(0, a);
54893        assert_eq_m256i(r, _mm256_setzero_si256());
54894        let r = _mm256_maskz_expand_epi32(0b01010101, a);
54895        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
54896        assert_eq_m256i(r, e);
54897    }
54898
54899    #[simd_test(enable = "avx512f,avx512vl")]
54900    unsafe fn test_mm_mask_expand_epi32() {
54901        let src = _mm_set1_epi32(200);
54902        let a = _mm_set_epi32(0, 1, 2, 3);
54903        let r = _mm_mask_expand_epi32(src, 0, a);
54904        assert_eq_m128i(r, src);
54905        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
54906        let e = _mm_set_epi32(200, 2, 200, 3);
54907        assert_eq_m128i(r, e);
54908    }
54909
54910    #[simd_test(enable = "avx512f,avx512vl")]
54911    unsafe fn test_mm_maskz_expand_epi32() {
54912        let a = _mm_set_epi32(0, 1, 2, 3);
54913        let r = _mm_maskz_expand_epi32(0, a);
54914        assert_eq_m128i(r, _mm_setzero_si128());
54915        let r = _mm_maskz_expand_epi32(0b00000101, a);
54916        let e = _mm_set_epi32(0, 2, 0, 3);
54917        assert_eq_m128i(r, e);
54918    }
54919
54920    #[simd_test(enable = "avx512f")]
54921    unsafe fn test_mm512_mask_expand_ps() {
54922        let src = _mm512_set1_ps(200.);
54923        let a = _mm512_set_ps(
54924            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54925        );
54926        let r = _mm512_mask_expand_ps(src, 0, a);
54927        assert_eq_m512(r, src);
54928        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
54929        let e = _mm512_set_ps(
54930            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
54931        );
54932        assert_eq_m512(r, e);
54933    }
54934
54935    #[simd_test(enable = "avx512f")]
54936    unsafe fn test_mm512_maskz_expand_ps() {
54937        let a = _mm512_set_ps(
54938            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
54939        );
54940        let r = _mm512_maskz_expand_ps(0, a);
54941        assert_eq_m512(r, _mm512_setzero_ps());
54942        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
54943        let e = _mm512_set_ps(
54944            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
54945        );
54946        assert_eq_m512(r, e);
54947    }
54948
54949    #[simd_test(enable = "avx512f,avx512vl")]
54950    unsafe fn test_mm256_mask_expand_ps() {
54951        let src = _mm256_set1_ps(200.);
54952        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54953        let r = _mm256_mask_expand_ps(src, 0, a);
54954        assert_eq_m256(r, src);
54955        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
54956        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
54957        assert_eq_m256(r, e);
54958    }
54959
54960    #[simd_test(enable = "avx512f,avx512vl")]
54961    unsafe fn test_mm256_maskz_expand_ps() {
54962        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
54963        let r = _mm256_maskz_expand_ps(0, a);
54964        assert_eq_m256(r, _mm256_setzero_ps());
54965        let r = _mm256_maskz_expand_ps(0b01010101, a);
54966        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
54967        assert_eq_m256(r, e);
54968    }
54969
54970    #[simd_test(enable = "avx512f,avx512vl")]
54971    unsafe fn test_mm_mask_expand_ps() {
54972        let src = _mm_set1_ps(200.);
54973        let a = _mm_set_ps(0., 1., 2., 3.);
54974        let r = _mm_mask_expand_ps(src, 0, a);
54975        assert_eq_m128(r, src);
54976        let r = _mm_mask_expand_ps(src, 0b00000101, a);
54977        let e = _mm_set_ps(200., 2., 200., 3.);
54978        assert_eq_m128(r, e);
54979    }
54980
54981    #[simd_test(enable = "avx512f,avx512vl")]
54982    unsafe fn test_mm_maskz_expand_ps() {
54983        let a = _mm_set_ps(0., 1., 2., 3.);
54984        let r = _mm_maskz_expand_ps(0, a);
54985        assert_eq_m128(r, _mm_setzero_ps());
54986        let r = _mm_maskz_expand_ps(0b00000101, a);
54987        let e = _mm_set_ps(0., 2., 0., 3.);
54988        assert_eq_m128(r, e);
54989    }
54990
54991    #[simd_test(enable = "avx512f")]
54992    unsafe fn test_mm512_loadu_epi32() {
54993        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
54994        let p = a.as_ptr();
54995        let r = _mm512_loadu_epi32(black_box(p));
54996        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
54997        assert_eq_m512i(r, e);
54998    }
54999
55000    #[simd_test(enable = "avx512f,avx512vl")]
55001    unsafe fn test_mm256_loadu_epi32() {
55002        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
55003        let p = a.as_ptr();
55004        let r = _mm256_loadu_epi32(black_box(p));
55005        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
55006        assert_eq_m256i(r, e);
55007    }
55008
55009    #[simd_test(enable = "avx512f,avx512vl")]
55010    unsafe fn test_mm_loadu_epi32() {
55011        let a = &[4, 3, 2, 5];
55012        let p = a.as_ptr();
55013        let r = _mm_loadu_epi32(black_box(p));
55014        let e = _mm_setr_epi32(4, 3, 2, 5);
55015        assert_eq_m128i(r, e);
55016    }
55017
55018    #[simd_test(enable = "avx512f")]
55019    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
55020        let a = _mm512_set1_epi32(9);
55021        let mut r = _mm256_undefined_si256();
55022        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55023        let e = _mm256_set1_epi16(9);
55024        assert_eq_m256i(r, e);
55025    }
55026
55027    #[simd_test(enable = "avx512f,avx512vl")]
55028    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
55029        let a = _mm256_set1_epi32(9);
55030        let mut r = _mm_undefined_si128();
55031        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55032        let e = _mm_set1_epi16(9);
55033        assert_eq_m128i(r, e);
55034    }
55035
55036    #[simd_test(enable = "avx512f,avx512vl")]
55037    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
55038        let a = _mm_set1_epi32(9);
55039        let mut r = _mm_set1_epi8(0);
55040        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55041        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
55042        assert_eq_m128i(r, e);
55043    }
55044
55045    #[simd_test(enable = "avx512f")]
55046    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
55047        let a = _mm512_set1_epi32(i32::MAX);
55048        let mut r = _mm256_undefined_si256();
55049        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55050        let e = _mm256_set1_epi16(i16::MAX);
55051        assert_eq_m256i(r, e);
55052    }
55053
55054    #[simd_test(enable = "avx512f,avx512vl")]
55055    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
55056        let a = _mm256_set1_epi32(i32::MAX);
55057        let mut r = _mm_undefined_si128();
55058        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55059        let e = _mm_set1_epi16(i16::MAX);
55060        assert_eq_m128i(r, e);
55061    }
55062
55063    #[simd_test(enable = "avx512f,avx512vl")]
55064    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
55065        let a = _mm_set1_epi32(i32::MAX);
55066        let mut r = _mm_set1_epi8(0);
55067        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55068        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
55069        assert_eq_m128i(r, e);
55070    }
55071
55072    #[simd_test(enable = "avx512f")]
55073    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
55074        let a = _mm512_set1_epi32(i32::MAX);
55075        let mut r = _mm256_undefined_si256();
55076        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55077        let e = _mm256_set1_epi16(u16::MAX as i16);
55078        assert_eq_m256i(r, e);
55079    }
55080
55081    #[simd_test(enable = "avx512f,avx512vl")]
55082    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
55083        let a = _mm256_set1_epi32(i32::MAX);
55084        let mut r = _mm_undefined_si128();
55085        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55086        let e = _mm_set1_epi16(u16::MAX as i16);
55087        assert_eq_m128i(r, e);
55088    }
55089
55090    #[simd_test(enable = "avx512f,avx512vl")]
55091    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
55092        let a = _mm_set1_epi32(i32::MAX);
55093        let mut r = _mm_set1_epi8(0);
55094        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
55095        let e = _mm_set_epi16(
55096            0,
55097            0,
55098            0,
55099            0,
55100            u16::MAX as i16,
55101            u16::MAX as i16,
55102            u16::MAX as i16,
55103            u16::MAX as i16,
55104        );
55105        assert_eq_m128i(r, e);
55106    }
55107
55108    #[simd_test(enable = "avx512f")]
55109    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
55110        let a = _mm512_set1_epi32(9);
55111        let mut r = _mm_undefined_si128();
55112        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55113        let e = _mm_set1_epi8(9);
55114        assert_eq_m128i(r, e);
55115    }
55116
55117    #[simd_test(enable = "avx512f,avx512vl")]
55118    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
55119        let a = _mm256_set1_epi32(9);
55120        let mut r = _mm_set1_epi8(0);
55121        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55122        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
55123        assert_eq_m128i(r, e);
55124    }
55125
55126    #[simd_test(enable = "avx512f,avx512vl")]
55127    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
55128        let a = _mm_set1_epi32(9);
55129        let mut r = _mm_set1_epi8(0);
55130        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55131        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
55132        assert_eq_m128i(r, e);
55133    }
55134
55135    #[simd_test(enable = "avx512f")]
55136    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
55137        let a = _mm512_set1_epi32(i32::MAX);
55138        let mut r = _mm_undefined_si128();
55139        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55140        let e = _mm_set1_epi8(i8::MAX);
55141        assert_eq_m128i(r, e);
55142    }
55143
55144    #[simd_test(enable = "avx512f,avx512vl")]
55145    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
55146        let a = _mm256_set1_epi32(i32::MAX);
55147        let mut r = _mm_set1_epi8(0);
55148        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55149        #[rustfmt::skip]
55150        let e = _mm_set_epi8(
55151            0, 0, 0, 0,
55152            0, 0, 0, 0,
55153            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55154            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55155        );
55156        assert_eq_m128i(r, e);
55157    }
55158
55159    #[simd_test(enable = "avx512f,avx512vl")]
55160    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
55161        let a = _mm_set1_epi32(i32::MAX);
55162        let mut r = _mm_set1_epi8(0);
55163        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55164        #[rustfmt::skip]
55165        let e = _mm_set_epi8(
55166            0, 0, 0, 0,
55167            0, 0, 0, 0,
55168            0, 0, 0, 0,
55169            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
55170        );
55171        assert_eq_m128i(r, e);
55172    }
55173
55174    #[simd_test(enable = "avx512f")]
55175    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
55176        let a = _mm512_set1_epi32(i32::MAX);
55177        let mut r = _mm_undefined_si128();
55178        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
55179        let e = _mm_set1_epi8(u8::MAX as i8);
55180        assert_eq_m128i(r, e);
55181    }
55182
55183    #[simd_test(enable = "avx512f,avx512vl")]
55184    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
55185        let a = _mm256_set1_epi32(i32::MAX);
55186        let mut r = _mm_set1_epi8(0);
55187        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55188        #[rustfmt::skip]
55189        let e = _mm_set_epi8(
55190            0, 0, 0, 0,
55191            0, 0, 0, 0,
55192            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55193            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55194        );
55195        assert_eq_m128i(r, e);
55196    }
55197
55198    #[simd_test(enable = "avx512f,avx512vl")]
55199    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
55200        let a = _mm_set1_epi32(i32::MAX);
55201        let mut r = _mm_set1_epi8(0);
55202        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
55203        #[rustfmt::skip]
55204        let e = _mm_set_epi8(
55205            0, 0, 0, 0,
55206            0, 0, 0, 0,
55207            0, 0, 0, 0,
55208            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
55209        );
55210        assert_eq_m128i(r, e);
55211    }
55212
55213    #[simd_test(enable = "avx512f")]
55214    unsafe fn test_mm512_storeu_epi32() {
55215        let a = _mm512_set1_epi32(9);
55216        let mut r = _mm512_undefined_epi32();
55217        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55218        assert_eq_m512i(r, a);
55219    }
55220
55221    #[simd_test(enable = "avx512f,avx512vl")]
55222    unsafe fn test_mm256_storeu_epi32() {
55223        let a = _mm256_set1_epi32(9);
55224        let mut r = _mm256_undefined_si256();
55225        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55226        assert_eq_m256i(r, a);
55227    }
55228
55229    #[simd_test(enable = "avx512f,avx512vl")]
55230    unsafe fn test_mm_storeu_epi32() {
55231        let a = _mm_set1_epi32(9);
55232        let mut r = _mm_undefined_si128();
55233        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
55234        assert_eq_m128i(r, a);
55235    }
55236
55237    #[simd_test(enable = "avx512f")]
55238    unsafe fn test_mm512_loadu_si512() {
55239        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
55240        let p = a.as_ptr();
55241        let r = _mm512_loadu_si512(black_box(p));
55242        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
55243        assert_eq_m512i(r, e);
55244    }
55245
55246    #[simd_test(enable = "avx512f")]
55247    unsafe fn test_mm512_storeu_si512() {
55248        let a = _mm512_set1_epi32(9);
55249        let mut r = _mm512_undefined_epi32();
55250        _mm512_storeu_si512(&mut r as *mut _, a);
55251        assert_eq_m512i(r, a);
55252    }
55253
55254    #[simd_test(enable = "avx512f")]
55255    unsafe fn test_mm512_load_si512() {
55256        #[repr(align(64))]
55257        struct Align {
55258            data: [i32; 16], // 64 bytes
55259        }
55260        let a = Align {
55261            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
55262        };
55263        let p = (a.data).as_ptr();
55264        let r = _mm512_load_si512(black_box(p));
55265        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
55266        assert_eq_m512i(r, e);
55267    }
55268
55269    #[simd_test(enable = "avx512f")]
55270    unsafe fn test_mm512_store_si512() {
55271        let a = _mm512_set1_epi32(9);
55272        let mut r = _mm512_undefined_epi32();
55273        _mm512_store_si512(&mut r as *mut _, a);
55274        assert_eq_m512i(r, a);
55275    }
55276
55277    #[simd_test(enable = "avx512f")]
55278    unsafe fn test_mm512_load_epi32() {
55279        #[repr(align(64))]
55280        struct Align {
55281            data: [i32; 16], // 64 bytes
55282        }
55283        let a = Align {
55284            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
55285        };
55286        let p = (a.data).as_ptr();
55287        let r = _mm512_load_epi32(black_box(p));
55288        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
55289        assert_eq_m512i(r, e);
55290    }
55291
55292    #[simd_test(enable = "avx512f,avx512vl")]
55293    unsafe fn test_mm256_load_epi32() {
55294        #[repr(align(64))]
55295        struct Align {
55296            data: [i32; 8],
55297        }
55298        let a = Align {
55299            data: [4, 3, 2, 5, 8, 9, 64, 50],
55300        };
55301        let p = (a.data).as_ptr();
55302        let r = _mm256_load_epi32(black_box(p));
55303        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
55304        assert_eq_m256i(r, e);
55305    }
55306
55307    #[simd_test(enable = "avx512f,avx512vl")]
55308    unsafe fn test_mm_load_epi32() {
55309        #[repr(align(64))]
55310        struct Align {
55311            data: [i32; 4],
55312        }
55313        let a = Align { data: [4, 3, 2, 5] };
55314        let p = (a.data).as_ptr();
55315        let r = _mm_load_epi32(black_box(p));
55316        let e = _mm_setr_epi32(4, 3, 2, 5);
55317        assert_eq_m128i(r, e);
55318    }
55319
55320    #[simd_test(enable = "avx512f")]
55321    unsafe fn test_mm512_store_epi32() {
55322        let a = _mm512_set1_epi32(9);
55323        let mut r = _mm512_undefined_epi32();
55324        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
55325        assert_eq_m512i(r, a);
55326    }
55327
55328    #[simd_test(enable = "avx512f,avx512vl")]
55329    unsafe fn test_mm256_store_epi32() {
55330        let a = _mm256_set1_epi32(9);
55331        let mut r = _mm256_undefined_si256();
55332        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
55333        assert_eq_m256i(r, a);
55334    }
55335
55336    #[simd_test(enable = "avx512f,avx512vl")]
55337    unsafe fn test_mm_store_epi32() {
55338        let a = _mm_set1_epi32(9);
55339        let mut r = _mm_undefined_si128();
55340        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
55341        assert_eq_m128i(r, a);
55342    }
55343
55344    #[simd_test(enable = "avx512f")]
55345    unsafe fn test_mm512_load_ps() {
55346        #[repr(align(64))]
55347        struct Align {
55348            data: [f32; 16], // 64 bytes
55349        }
55350        let a = Align {
55351            data: [
55352                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
55353            ],
55354        };
55355        let p = (a.data).as_ptr();
55356        let r = _mm512_load_ps(black_box(p));
55357        let e = _mm512_setr_ps(
55358            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
55359        );
55360        assert_eq_m512(r, e);
55361    }
55362
55363    #[simd_test(enable = "avx512f")]
55364    unsafe fn test_mm512_store_ps() {
55365        let a = _mm512_set1_ps(9.);
55366        let mut r = _mm512_undefined_ps();
55367        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
55368        assert_eq_m512(r, a);
55369    }
55370
55371    #[simd_test(enable = "avx512f")]
55372    unsafe fn test_mm512_mask_set1_epi32() {
55373        let src = _mm512_set1_epi32(2);
55374        let a: i32 = 11;
55375        let r = _mm512_mask_set1_epi32(src, 0, a);
55376        assert_eq_m512i(r, src);
55377        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
55378        let e = _mm512_set1_epi32(11);
55379        assert_eq_m512i(r, e);
55380    }
55381
55382    #[simd_test(enable = "avx512f")]
55383    unsafe fn test_mm512_maskz_set1_epi32() {
55384        let a: i32 = 11;
55385        let r = _mm512_maskz_set1_epi32(0, a);
55386        assert_eq_m512i(r, _mm512_setzero_si512());
55387        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
55388        let e = _mm512_set1_epi32(11);
55389        assert_eq_m512i(r, e);
55390    }
55391
55392    #[simd_test(enable = "avx512f,avx512vl")]
55393    unsafe fn test_mm256_mask_set1_epi32() {
55394        let src = _mm256_set1_epi32(2);
55395        let a: i32 = 11;
55396        let r = _mm256_mask_set1_epi32(src, 0, a);
55397        assert_eq_m256i(r, src);
55398        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
55399        let e = _mm256_set1_epi32(11);
55400        assert_eq_m256i(r, e);
55401    }
55402
55403    #[simd_test(enable = "avx512f")]
55404    unsafe fn test_mm256_maskz_set1_epi32() {
55405        let a: i32 = 11;
55406        let r = _mm256_maskz_set1_epi32(0, a);
55407        assert_eq_m256i(r, _mm256_setzero_si256());
55408        let r = _mm256_maskz_set1_epi32(0b11111111, a);
55409        let e = _mm256_set1_epi32(11);
55410        assert_eq_m256i(r, e);
55411    }
55412
55413    #[simd_test(enable = "avx512f,avx512vl")]
55414    unsafe fn test_mm_mask_set1_epi32() {
55415        let src = _mm_set1_epi32(2);
55416        let a: i32 = 11;
55417        let r = _mm_mask_set1_epi32(src, 0, a);
55418        assert_eq_m128i(r, src);
55419        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
55420        let e = _mm_set1_epi32(11);
55421        assert_eq_m128i(r, e);
55422    }
55423
55424    #[simd_test(enable = "avx512f")]
55425    unsafe fn test_mm_maskz_set1_epi32() {
55426        let a: i32 = 11;
55427        let r = _mm_maskz_set1_epi32(0, a);
55428        assert_eq_m128i(r, _mm_setzero_si128());
55429        let r = _mm_maskz_set1_epi32(0b00001111, a);
55430        let e = _mm_set1_epi32(11);
55431        assert_eq_m128i(r, e);
55432    }
55433
55434    #[simd_test(enable = "avx512f")]
55435    unsafe fn test_mm_mask_move_ss() {
55436        let src = _mm_set_ps(10., 11., 100., 110.);
55437        let a = _mm_set_ps(1., 2., 10., 20.);
55438        let b = _mm_set_ps(3., 4., 30., 40.);
55439        let r = _mm_mask_move_ss(src, 0, a, b);
55440        let e = _mm_set_ps(1., 2., 10., 110.);
55441        assert_eq_m128(r, e);
55442        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
55443        let e = _mm_set_ps(1., 2., 10., 40.);
55444        assert_eq_m128(r, e);
55445    }
55446
55447    #[simd_test(enable = "avx512f")]
55448    unsafe fn test_mm_maskz_move_ss() {
55449        let a = _mm_set_ps(1., 2., 10., 20.);
55450        let b = _mm_set_ps(3., 4., 30., 40.);
55451        let r = _mm_maskz_move_ss(0, a, b);
55452        let e = _mm_set_ps(1., 2., 10., 0.);
55453        assert_eq_m128(r, e);
55454        let r = _mm_maskz_move_ss(0b11111111, a, b);
55455        let e = _mm_set_ps(1., 2., 10., 40.);
55456        assert_eq_m128(r, e);
55457    }
55458
55459    #[simd_test(enable = "avx512f")]
55460    unsafe fn test_mm_mask_move_sd() {
55461        let src = _mm_set_pd(10., 11.);
55462        let a = _mm_set_pd(1., 2.);
55463        let b = _mm_set_pd(3., 4.);
55464        let r = _mm_mask_move_sd(src, 0, a, b);
55465        let e = _mm_set_pd(1., 11.);
55466        assert_eq_m128d(r, e);
55467        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
55468        let e = _mm_set_pd(1., 4.);
55469        assert_eq_m128d(r, e);
55470    }
55471
55472    #[simd_test(enable = "avx512f")]
55473    unsafe fn test_mm_maskz_move_sd() {
55474        let a = _mm_set_pd(1., 2.);
55475        let b = _mm_set_pd(3., 4.);
55476        let r = _mm_maskz_move_sd(0, a, b);
55477        let e = _mm_set_pd(1., 0.);
55478        assert_eq_m128d(r, e);
55479        let r = _mm_maskz_move_sd(0b11111111, a, b);
55480        let e = _mm_set_pd(1., 4.);
55481        assert_eq_m128d(r, e);
55482    }
55483
55484    #[simd_test(enable = "avx512f")]
55485    unsafe fn test_mm_mask_add_ss() {
55486        let src = _mm_set_ps(10., 11., 100., 110.);
55487        let a = _mm_set_ps(1., 2., 10., 20.);
55488        let b = _mm_set_ps(3., 4., 30., 40.);
55489        let r = _mm_mask_add_ss(src, 0, a, b);
55490        let e = _mm_set_ps(1., 2., 10., 110.);
55491        assert_eq_m128(r, e);
55492        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
55493        let e = _mm_set_ps(1., 2., 10., 60.);
55494        assert_eq_m128(r, e);
55495    }
55496
55497    #[simd_test(enable = "avx512f")]
55498    unsafe fn test_mm_maskz_add_ss() {
55499        let a = _mm_set_ps(1., 2., 10., 20.);
55500        let b = _mm_set_ps(3., 4., 30., 40.);
55501        let r = _mm_maskz_add_ss(0, a, b);
55502        let e = _mm_set_ps(1., 2., 10., 0.);
55503        assert_eq_m128(r, e);
55504        let r = _mm_maskz_add_ss(0b11111111, a, b);
55505        let e = _mm_set_ps(1., 2., 10., 60.);
55506        assert_eq_m128(r, e);
55507    }
55508
55509    #[simd_test(enable = "avx512f")]
55510    unsafe fn test_mm_mask_add_sd() {
55511        let src = _mm_set_pd(10., 11.);
55512        let a = _mm_set_pd(1., 2.);
55513        let b = _mm_set_pd(3., 4.);
55514        let r = _mm_mask_add_sd(src, 0, a, b);
55515        let e = _mm_set_pd(1., 11.);
55516        assert_eq_m128d(r, e);
55517        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
55518        let e = _mm_set_pd(1., 6.);
55519        assert_eq_m128d(r, e);
55520    }
55521
55522    #[simd_test(enable = "avx512f")]
55523    unsafe fn test_mm_maskz_add_sd() {
55524        let a = _mm_set_pd(1., 2.);
55525        let b = _mm_set_pd(3., 4.);
55526        let r = _mm_maskz_add_sd(0, a, b);
55527        let e = _mm_set_pd(1., 0.);
55528        assert_eq_m128d(r, e);
55529        let r = _mm_maskz_add_sd(0b11111111, a, b);
55530        let e = _mm_set_pd(1., 6.);
55531        assert_eq_m128d(r, e);
55532    }
55533
55534    #[simd_test(enable = "avx512f")]
55535    unsafe fn test_mm_mask_sub_ss() {
55536        let src = _mm_set_ps(10., 11., 100., 110.);
55537        let a = _mm_set_ps(1., 2., 10., 20.);
55538        let b = _mm_set_ps(3., 4., 30., 40.);
55539        let r = _mm_mask_sub_ss(src, 0, a, b);
55540        let e = _mm_set_ps(1., 2., 10., 110.);
55541        assert_eq_m128(r, e);
55542        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
55543        let e = _mm_set_ps(1., 2., 10., -20.);
55544        assert_eq_m128(r, e);
55545    }
55546
55547    #[simd_test(enable = "avx512f")]
55548    unsafe fn test_mm_maskz_sub_ss() {
55549        let a = _mm_set_ps(1., 2., 10., 20.);
55550        let b = _mm_set_ps(3., 4., 30., 40.);
55551        let r = _mm_maskz_sub_ss(0, a, b);
55552        let e = _mm_set_ps(1., 2., 10., 0.);
55553        assert_eq_m128(r, e);
55554        let r = _mm_maskz_sub_ss(0b11111111, a, b);
55555        let e = _mm_set_ps(1., 2., 10., -20.);
55556        assert_eq_m128(r, e);
55557    }
55558
55559    #[simd_test(enable = "avx512f")]
55560    unsafe fn test_mm_mask_sub_sd() {
55561        let src = _mm_set_pd(10., 11.);
55562        let a = _mm_set_pd(1., 2.);
55563        let b = _mm_set_pd(3., 4.);
55564        let r = _mm_mask_sub_sd(src, 0, a, b);
55565        let e = _mm_set_pd(1., 11.);
55566        assert_eq_m128d(r, e);
55567        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
55568        let e = _mm_set_pd(1., -2.);
55569        assert_eq_m128d(r, e);
55570    }
55571
55572    #[simd_test(enable = "avx512f")]
55573    unsafe fn test_mm_maskz_sub_sd() {
55574        let a = _mm_set_pd(1., 2.);
55575        let b = _mm_set_pd(3., 4.);
55576        let r = _mm_maskz_sub_sd(0, a, b);
55577        let e = _mm_set_pd(1., 0.);
55578        assert_eq_m128d(r, e);
55579        let r = _mm_maskz_sub_sd(0b11111111, a, b);
55580        let e = _mm_set_pd(1., -2.);
55581        assert_eq_m128d(r, e);
55582    }
55583
55584    #[simd_test(enable = "avx512f")]
55585    unsafe fn test_mm_mask_mul_ss() {
55586        let src = _mm_set_ps(10., 11., 100., 110.);
55587        let a = _mm_set_ps(1., 2., 10., 20.);
55588        let b = _mm_set_ps(3., 4., 30., 40.);
55589        let r = _mm_mask_mul_ss(src, 0, a, b);
55590        let e = _mm_set_ps(1., 2., 10., 110.);
55591        assert_eq_m128(r, e);
55592        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
55593        let e = _mm_set_ps(1., 2., 10., 800.);
55594        assert_eq_m128(r, e);
55595    }
55596
55597    #[simd_test(enable = "avx512f")]
55598    unsafe fn test_mm_maskz_mul_ss() {
55599        let a = _mm_set_ps(1., 2., 10., 20.);
55600        let b = _mm_set_ps(3., 4., 30., 40.);
55601        let r = _mm_maskz_mul_ss(0, a, b);
55602        let e = _mm_set_ps(1., 2., 10., 0.);
55603        assert_eq_m128(r, e);
55604        let r = _mm_maskz_mul_ss(0b11111111, a, b);
55605        let e = _mm_set_ps(1., 2., 10., 800.);
55606        assert_eq_m128(r, e);
55607    }
55608
55609    #[simd_test(enable = "avx512f")]
55610    unsafe fn test_mm_mask_mul_sd() {
55611        let src = _mm_set_pd(10., 11.);
55612        let a = _mm_set_pd(1., 2.);
55613        let b = _mm_set_pd(3., 4.);
55614        let r = _mm_mask_mul_sd(src, 0, a, b);
55615        let e = _mm_set_pd(1., 11.);
55616        assert_eq_m128d(r, e);
55617        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
55618        let e = _mm_set_pd(1., 8.);
55619        assert_eq_m128d(r, e);
55620    }
55621
55622    #[simd_test(enable = "avx512f")]
55623    unsafe fn test_mm_maskz_mul_sd() {
55624        let a = _mm_set_pd(1., 2.);
55625        let b = _mm_set_pd(3., 4.);
55626        let r = _mm_maskz_mul_sd(0, a, b);
55627        let e = _mm_set_pd(1., 0.);
55628        assert_eq_m128d(r, e);
55629        let r = _mm_maskz_mul_sd(0b11111111, a, b);
55630        let e = _mm_set_pd(1., 8.);
55631        assert_eq_m128d(r, e);
55632    }
55633
55634    #[simd_test(enable = "avx512f")]
55635    unsafe fn test_mm_mask_div_ss() {
55636        let src = _mm_set_ps(10., 11., 100., 110.);
55637        let a = _mm_set_ps(1., 2., 10., 20.);
55638        let b = _mm_set_ps(3., 4., 30., 40.);
55639        let r = _mm_mask_div_ss(src, 0, a, b);
55640        let e = _mm_set_ps(1., 2., 10., 110.);
55641        assert_eq_m128(r, e);
55642        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
55643        let e = _mm_set_ps(1., 2., 10., 0.5);
55644        assert_eq_m128(r, e);
55645    }
55646
55647    #[simd_test(enable = "avx512f")]
55648    unsafe fn test_mm_maskz_div_ss() {
55649        let a = _mm_set_ps(1., 2., 10., 20.);
55650        let b = _mm_set_ps(3., 4., 30., 40.);
55651        let r = _mm_maskz_div_ss(0, a, b);
55652        let e = _mm_set_ps(1., 2., 10., 0.);
55653        assert_eq_m128(r, e);
55654        let r = _mm_maskz_div_ss(0b11111111, a, b);
55655        let e = _mm_set_ps(1., 2., 10., 0.5);
55656        assert_eq_m128(r, e);
55657    }
55658
55659    #[simd_test(enable = "avx512f")]
55660    unsafe fn test_mm_mask_div_sd() {
55661        let src = _mm_set_pd(10., 11.);
55662        let a = _mm_set_pd(1., 2.);
55663        let b = _mm_set_pd(3., 4.);
55664        let r = _mm_mask_div_sd(src, 0, a, b);
55665        let e = _mm_set_pd(1., 11.);
55666        assert_eq_m128d(r, e);
55667        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
55668        let e = _mm_set_pd(1., 0.5);
55669        assert_eq_m128d(r, e);
55670    }
55671
55672    #[simd_test(enable = "avx512f")]
55673    unsafe fn test_mm_maskz_div_sd() {
55674        let a = _mm_set_pd(1., 2.);
55675        let b = _mm_set_pd(3., 4.);
55676        let r = _mm_maskz_div_sd(0, a, b);
55677        let e = _mm_set_pd(1., 0.);
55678        assert_eq_m128d(r, e);
55679        let r = _mm_maskz_div_sd(0b11111111, a, b);
55680        let e = _mm_set_pd(1., 0.5);
55681        assert_eq_m128d(r, e);
55682    }
55683
55684    #[simd_test(enable = "avx512f")]
55685    unsafe fn test_mm_mask_max_ss() {
55686        let a = _mm_set_ps(0., 1., 2., 3.);
55687        let b = _mm_set_ps(4., 5., 6., 7.);
55688        let r = _mm_mask_max_ss(a, 0, a, b);
55689        let e = _mm_set_ps(0., 1., 2., 3.);
55690        assert_eq_m128(r, e);
55691        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
55692        let e = _mm_set_ps(0., 1., 2., 7.);
55693        assert_eq_m128(r, e);
55694    }
55695
55696    #[simd_test(enable = "avx512f")]
55697    unsafe fn test_mm_maskz_max_ss() {
55698        let a = _mm_set_ps(0., 1., 2., 3.);
55699        let b = _mm_set_ps(4., 5., 6., 7.);
55700        let r = _mm_maskz_max_ss(0, a, b);
55701        let e = _mm_set_ps(0., 1., 2., 0.);
55702        assert_eq_m128(r, e);
55703        let r = _mm_maskz_max_ss(0b11111111, a, b);
55704        let e = _mm_set_ps(0., 1., 2., 7.);
55705        assert_eq_m128(r, e);
55706    }
55707
55708    #[simd_test(enable = "avx512f")]
55709    unsafe fn test_mm_mask_max_sd() {
55710        let a = _mm_set_pd(0., 1.);
55711        let b = _mm_set_pd(2., 3.);
55712        let r = _mm_mask_max_sd(a, 0, a, b);
55713        let e = _mm_set_pd(0., 1.);
55714        assert_eq_m128d(r, e);
55715        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
55716        let e = _mm_set_pd(0., 3.);
55717        assert_eq_m128d(r, e);
55718    }
55719
55720    #[simd_test(enable = "avx512f")]
55721    unsafe fn test_mm_maskz_max_sd() {
55722        let a = _mm_set_pd(0., 1.);
55723        let b = _mm_set_pd(2., 3.);
55724        let r = _mm_maskz_max_sd(0, a, b);
55725        let e = _mm_set_pd(0., 0.);
55726        assert_eq_m128d(r, e);
55727        let r = _mm_maskz_max_sd(0b11111111, a, b);
55728        let e = _mm_set_pd(0., 3.);
55729        assert_eq_m128d(r, e);
55730    }
55731
55732    #[simd_test(enable = "avx512f")]
55733    unsafe fn test_mm_mask_min_ss() {
55734        let a = _mm_set_ps(0., 1., 2., 3.);
55735        let b = _mm_set_ps(4., 5., 6., 7.);
55736        let r = _mm_mask_min_ss(a, 0, a, b);
55737        let e = _mm_set_ps(0., 1., 2., 3.);
55738        assert_eq_m128(r, e);
55739        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
55740        let e = _mm_set_ps(0., 1., 2., 3.);
55741        assert_eq_m128(r, e);
55742    }
55743
55744    #[simd_test(enable = "avx512f")]
55745    unsafe fn test_mm_maskz_min_ss() {
55746        let a = _mm_set_ps(0., 1., 2., 3.);
55747        let b = _mm_set_ps(4., 5., 6., 7.);
55748        let r = _mm_maskz_min_ss(0, a, b);
55749        let e = _mm_set_ps(0., 1., 2., 0.);
55750        assert_eq_m128(r, e);
55751        let r = _mm_maskz_min_ss(0b11111111, a, b);
55752        let e = _mm_set_ps(0., 1., 2., 3.);
55753        assert_eq_m128(r, e);
55754    }
55755
55756    #[simd_test(enable = "avx512f")]
55757    unsafe fn test_mm_mask_min_sd() {
55758        let a = _mm_set_pd(0., 1.);
55759        let b = _mm_set_pd(2., 3.);
55760        let r = _mm_mask_min_sd(a, 0, a, b);
55761        let e = _mm_set_pd(0., 1.);
55762        assert_eq_m128d(r, e);
55763        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
55764        let e = _mm_set_pd(0., 1.);
55765        assert_eq_m128d(r, e);
55766    }
55767
55768    #[simd_test(enable = "avx512f")]
55769    unsafe fn test_mm_maskz_min_sd() {
55770        let a = _mm_set_pd(0., 1.);
55771        let b = _mm_set_pd(2., 3.);
55772        let r = _mm_maskz_min_sd(0, a, b);
55773        let e = _mm_set_pd(0., 0.);
55774        assert_eq_m128d(r, e);
55775        let r = _mm_maskz_min_sd(0b11111111, a, b);
55776        let e = _mm_set_pd(0., 1.);
55777        assert_eq_m128d(r, e);
55778    }
55779
55780    #[simd_test(enable = "avx512f")]
55781    unsafe fn test_mm_mask_sqrt_ss() {
55782        let src = _mm_set_ps(10., 11., 100., 110.);
55783        let a = _mm_set_ps(1., 2., 10., 20.);
55784        let b = _mm_set_ps(3., 4., 30., 4.);
55785        let r = _mm_mask_sqrt_ss(src, 0, a, b);
55786        let e = _mm_set_ps(1., 2., 10., 110.);
55787        assert_eq_m128(r, e);
55788        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
55789        let e = _mm_set_ps(1., 2., 10., 2.);
55790        assert_eq_m128(r, e);
55791    }
55792
55793    #[simd_test(enable = "avx512f")]
55794    unsafe fn test_mm_maskz_sqrt_ss() {
55795        let a = _mm_set_ps(1., 2., 10., 20.);
55796        let b = _mm_set_ps(3., 4., 30., 4.);
55797        let r = _mm_maskz_sqrt_ss(0, a, b);
55798        let e = _mm_set_ps(1., 2., 10., 0.);
55799        assert_eq_m128(r, e);
55800        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
55801        let e = _mm_set_ps(1., 2., 10., 2.);
55802        assert_eq_m128(r, e);
55803    }
55804
55805    #[simd_test(enable = "avx512f")]
55806    unsafe fn test_mm_mask_sqrt_sd() {
55807        let src = _mm_set_pd(10., 11.);
55808        let a = _mm_set_pd(1., 2.);
55809        let b = _mm_set_pd(3., 4.);
55810        let r = _mm_mask_sqrt_sd(src, 0, a, b);
55811        let e = _mm_set_pd(1., 11.);
55812        assert_eq_m128d(r, e);
55813        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
55814        let e = _mm_set_pd(1., 2.);
55815        assert_eq_m128d(r, e);
55816    }
55817
55818    #[simd_test(enable = "avx512f")]
55819    unsafe fn test_mm_maskz_sqrt_sd() {
55820        let a = _mm_set_pd(1., 2.);
55821        let b = _mm_set_pd(3., 4.);
55822        let r = _mm_maskz_sqrt_sd(0, a, b);
55823        let e = _mm_set_pd(1., 0.);
55824        assert_eq_m128d(r, e);
55825        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
55826        let e = _mm_set_pd(1., 2.);
55827        assert_eq_m128d(r, e);
55828    }
55829
55830    #[simd_test(enable = "avx512f")]
55831    unsafe fn test_mm_rsqrt14_ss() {
55832        let a = _mm_set_ps(1., 2., 10., 20.);
55833        let b = _mm_set_ps(3., 4., 30., 4.);
55834        let r = _mm_rsqrt14_ss(a, b);
55835        let e = _mm_set_ps(1., 2., 10., 0.5);
55836        assert_eq_m128(r, e);
55837    }
55838
55839    #[simd_test(enable = "avx512f")]
55840    unsafe fn test_mm_mask_rsqrt14_ss() {
55841        let src = _mm_set_ps(10., 11., 100., 110.);
55842        let a = _mm_set_ps(1., 2., 10., 20.);
55843        let b = _mm_set_ps(3., 4., 30., 4.);
55844        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
55845        let e = _mm_set_ps(1., 2., 10., 110.);
55846        assert_eq_m128(r, e);
55847        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
55848        let e = _mm_set_ps(1., 2., 10., 0.5);
55849        assert_eq_m128(r, e);
55850    }
55851
55852    #[simd_test(enable = "avx512f")]
55853    unsafe fn test_mm_maskz_rsqrt14_ss() {
55854        let a = _mm_set_ps(1., 2., 10., 20.);
55855        let b = _mm_set_ps(3., 4., 30., 4.);
55856        let r = _mm_maskz_rsqrt14_ss(0, a, b);
55857        let e = _mm_set_ps(1., 2., 10., 0.);
55858        assert_eq_m128(r, e);
55859        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
55860        let e = _mm_set_ps(1., 2., 10., 0.5);
55861        assert_eq_m128(r, e);
55862    }
55863
55864    #[simd_test(enable = "avx512f")]
55865    unsafe fn test_mm_rsqrt14_sd() {
55866        let a = _mm_set_pd(1., 2.);
55867        let b = _mm_set_pd(3., 4.);
55868        let r = _mm_rsqrt14_sd(a, b);
55869        let e = _mm_set_pd(1., 0.5);
55870        assert_eq_m128d(r, e);
55871    }
55872
55873    #[simd_test(enable = "avx512f")]
55874    unsafe fn test_mm_mask_rsqrt14_sd() {
55875        let src = _mm_set_pd(10., 11.);
55876        let a = _mm_set_pd(1., 2.);
55877        let b = _mm_set_pd(3., 4.);
55878        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
55879        let e = _mm_set_pd(1., 11.);
55880        assert_eq_m128d(r, e);
55881        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
55882        let e = _mm_set_pd(1., 0.5);
55883        assert_eq_m128d(r, e);
55884    }
55885
55886    #[simd_test(enable = "avx512f")]
55887    unsafe fn test_mm_maskz_rsqrt14_sd() {
55888        let a = _mm_set_pd(1., 2.);
55889        let b = _mm_set_pd(3., 4.);
55890        let r = _mm_maskz_rsqrt14_sd(0, a, b);
55891        let e = _mm_set_pd(1., 0.);
55892        assert_eq_m128d(r, e);
55893        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
55894        let e = _mm_set_pd(1., 0.5);
55895        assert_eq_m128d(r, e);
55896    }
55897
55898    #[simd_test(enable = "avx512f")]
55899    unsafe fn test_mm_rcp14_ss() {
55900        let a = _mm_set_ps(1., 2., 10., 20.);
55901        let b = _mm_set_ps(3., 4., 30., 4.);
55902        let r = _mm_rcp14_ss(a, b);
55903        let e = _mm_set_ps(1., 2., 10., 0.25);
55904        assert_eq_m128(r, e);
55905    }
55906
55907    #[simd_test(enable = "avx512f")]
55908    unsafe fn test_mm_mask_rcp14_ss() {
55909        let src = _mm_set_ps(10., 11., 100., 110.);
55910        let a = _mm_set_ps(1., 2., 10., 20.);
55911        let b = _mm_set_ps(3., 4., 30., 4.);
55912        let r = _mm_mask_rcp14_ss(src, 0, a, b);
55913        let e = _mm_set_ps(1., 2., 10., 110.);
55914        assert_eq_m128(r, e);
55915        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
55916        let e = _mm_set_ps(1., 2., 10., 0.25);
55917        assert_eq_m128(r, e);
55918    }
55919
55920    #[simd_test(enable = "avx512f")]
55921    unsafe fn test_mm_maskz_rcp14_ss() {
55922        let a = _mm_set_ps(1., 2., 10., 20.);
55923        let b = _mm_set_ps(3., 4., 30., 4.);
55924        let r = _mm_maskz_rcp14_ss(0, a, b);
55925        let e = _mm_set_ps(1., 2., 10., 0.);
55926        assert_eq_m128(r, e);
55927        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
55928        let e = _mm_set_ps(1., 2., 10., 0.25);
55929        assert_eq_m128(r, e);
55930    }
55931
55932    #[simd_test(enable = "avx512f")]
55933    unsafe fn test_mm_rcp14_sd() {
55934        let a = _mm_set_pd(1., 2.);
55935        let b = _mm_set_pd(3., 4.);
55936        let r = _mm_rcp14_sd(a, b);
55937        let e = _mm_set_pd(1., 0.25);
55938        assert_eq_m128d(r, e);
55939    }
55940
55941    #[simd_test(enable = "avx512f")]
55942    unsafe fn test_mm_mask_rcp14_sd() {
55943        let src = _mm_set_pd(10., 11.);
55944        let a = _mm_set_pd(1., 2.);
55945        let b = _mm_set_pd(3., 4.);
55946        let r = _mm_mask_rcp14_sd(src, 0, a, b);
55947        let e = _mm_set_pd(1., 11.);
55948        assert_eq_m128d(r, e);
55949        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
55950        let e = _mm_set_pd(1., 0.25);
55951        assert_eq_m128d(r, e);
55952    }
55953
55954    #[simd_test(enable = "avx512f")]
55955    unsafe fn test_mm_maskz_rcp14_sd() {
55956        let a = _mm_set_pd(1., 2.);
55957        let b = _mm_set_pd(3., 4.);
55958        let r = _mm_maskz_rcp14_sd(0, a, b);
55959        let e = _mm_set_pd(1., 0.);
55960        assert_eq_m128d(r, e);
55961        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
55962        let e = _mm_set_pd(1., 0.25);
55963        assert_eq_m128d(r, e);
55964    }
55965
55966    #[simd_test(enable = "avx512f")]
55967    unsafe fn test_mm_getexp_ss() {
55968        let a = _mm_set1_ps(2.);
55969        let b = _mm_set1_ps(3.);
55970        let r = _mm_getexp_ss(a, b);
55971        let e = _mm_set_ps(2., 2., 2., 1.);
55972        assert_eq_m128(r, e);
55973    }
55974
55975    #[simd_test(enable = "avx512f")]
55976    unsafe fn test_mm_mask_getexp_ss() {
55977        let a = _mm_set1_ps(2.);
55978        let b = _mm_set1_ps(3.);
55979        let r = _mm_mask_getexp_ss(a, 0, a, b);
55980        let e = _mm_set_ps(2., 2., 2., 2.);
55981        assert_eq_m128(r, e);
55982        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
55983        let e = _mm_set_ps(2., 2., 2., 1.);
55984        assert_eq_m128(r, e);
55985    }
55986
55987    #[simd_test(enable = "avx512f")]
55988    unsafe fn test_mm_maskz_getexp_ss() {
55989        let a = _mm_set1_ps(2.);
55990        let b = _mm_set1_ps(3.);
55991        let r = _mm_maskz_getexp_ss(0, a, b);
55992        let e = _mm_set_ps(2., 2., 2., 0.);
55993        assert_eq_m128(r, e);
55994        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
55995        let e = _mm_set_ps(2., 2., 2., 1.);
55996        assert_eq_m128(r, e);
55997    }
55998
55999    #[simd_test(enable = "avx512f")]
56000    unsafe fn test_mm_getexp_sd() {
56001        let a = _mm_set1_pd(2.);
56002        let b = _mm_set1_pd(3.);
56003        let r = _mm_getexp_sd(a, b);
56004        let e = _mm_set_pd(2., 1.);
56005        assert_eq_m128d(r, e);
56006    }
56007
56008    #[simd_test(enable = "avx512f")]
56009    unsafe fn test_mm_mask_getexp_sd() {
56010        let a = _mm_set1_pd(2.);
56011        let b = _mm_set1_pd(3.);
56012        let r = _mm_mask_getexp_sd(a, 0, a, b);
56013        let e = _mm_set_pd(2., 2.);
56014        assert_eq_m128d(r, e);
56015        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
56016        let e = _mm_set_pd(2., 1.);
56017        assert_eq_m128d(r, e);
56018    }
56019
56020    #[simd_test(enable = "avx512f")]
56021    unsafe fn test_mm_maskz_getexp_sd() {
56022        let a = _mm_set1_pd(2.);
56023        let b = _mm_set1_pd(3.);
56024        let r = _mm_maskz_getexp_sd(0, a, b);
56025        let e = _mm_set_pd(2., 0.);
56026        assert_eq_m128d(r, e);
56027        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
56028        let e = _mm_set_pd(2., 1.);
56029        assert_eq_m128d(r, e);
56030    }
56031
56032    #[simd_test(enable = "avx512f")]
56033    unsafe fn test_mm_getmant_ss() {
56034        let a = _mm_set1_ps(20.);
56035        let b = _mm_set1_ps(10.);
56036        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
56037        let e = _mm_set_ps(20., 20., 20., 1.25);
56038        assert_eq_m128(r, e);
56039    }
56040
56041    #[simd_test(enable = "avx512f")]
56042    unsafe fn test_mm_mask_getmant_ss() {
56043        let a = _mm_set1_ps(20.);
56044        let b = _mm_set1_ps(10.);
56045        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
56046        let e = _mm_set_ps(20., 20., 20., 20.);
56047        assert_eq_m128(r, e);
56048        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
56049        let e = _mm_set_ps(20., 20., 20., 1.25);
56050        assert_eq_m128(r, e);
56051    }
56052
56053    #[simd_test(enable = "avx512f")]
56054    unsafe fn test_mm_maskz_getmant_ss() {
56055        let a = _mm_set1_ps(20.);
56056        let b = _mm_set1_ps(10.);
56057        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
56058        let e = _mm_set_ps(20., 20., 20., 0.);
56059        assert_eq_m128(r, e);
56060        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
56061        let e = _mm_set_ps(20., 20., 20., 1.25);
56062        assert_eq_m128(r, e);
56063    }
56064
56065    #[simd_test(enable = "avx512f")]
56066    unsafe fn test_mm_getmant_sd() {
56067        let a = _mm_set1_pd(20.);
56068        let b = _mm_set1_pd(10.);
56069        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
56070        let e = _mm_set_pd(20., 1.25);
56071        assert_eq_m128d(r, e);
56072    }
56073
56074    #[simd_test(enable = "avx512f")]
56075    unsafe fn test_mm_mask_getmant_sd() {
56076        let a = _mm_set1_pd(20.);
56077        let b = _mm_set1_pd(10.);
56078        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
56079        let e = _mm_set_pd(20., 20.);
56080        assert_eq_m128d(r, e);
56081        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
56082        let e = _mm_set_pd(20., 1.25);
56083        assert_eq_m128d(r, e);
56084    }
56085
56086    #[simd_test(enable = "avx512f")]
56087    unsafe fn test_mm_maskz_getmant_sd() {
56088        let a = _mm_set1_pd(20.);
56089        let b = _mm_set1_pd(10.);
56090        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
56091        let e = _mm_set_pd(20., 0.);
56092        assert_eq_m128d(r, e);
56093        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
56094        let e = _mm_set_pd(20., 1.25);
56095        assert_eq_m128d(r, e);
56096    }
56097
56098    #[simd_test(enable = "avx512f")]
56099    unsafe fn test_mm_roundscale_ss() {
56100        let a = _mm_set1_ps(2.2);
56101        let b = _mm_set1_ps(1.1);
56102        let r = _mm_roundscale_ss::<0>(a, b);
56103        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
56104        assert_eq_m128(r, e);
56105    }
56106
56107    #[simd_test(enable = "avx512f")]
56108    unsafe fn test_mm_mask_roundscale_ss() {
56109        let a = _mm_set1_ps(2.2);
56110        let b = _mm_set1_ps(1.1);
56111        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
56112        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
56113        assert_eq_m128(r, e);
56114        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
56115        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
56116        assert_eq_m128(r, e);
56117    }
56118
56119    #[simd_test(enable = "avx512f")]
56120    unsafe fn test_mm_maskz_roundscale_ss() {
56121        let a = _mm_set1_ps(2.2);
56122        let b = _mm_set1_ps(1.1);
56123        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
56124        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
56125        assert_eq_m128(r, e);
56126        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
56127        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
56128        assert_eq_m128(r, e);
56129    }
56130
56131    #[simd_test(enable = "avx512f")]
56132    unsafe fn test_mm_roundscale_sd() {
56133        let a = _mm_set1_pd(2.2);
56134        let b = _mm_set1_pd(1.1);
56135        let r = _mm_roundscale_sd::<0>(a, b);
56136        let e = _mm_set_pd(2.2, 1.0);
56137        assert_eq_m128d(r, e);
56138    }
56139
56140    #[simd_test(enable = "avx512f")]
56141    unsafe fn test_mm_mask_roundscale_sd() {
56142        let a = _mm_set1_pd(2.2);
56143        let b = _mm_set1_pd(1.1);
56144        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
56145        let e = _mm_set_pd(2.2, 2.2);
56146        assert_eq_m128d(r, e);
56147        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
56148        let e = _mm_set_pd(2.2, 1.0);
56149        assert_eq_m128d(r, e);
56150    }
56151
56152    #[simd_test(enable = "avx512f")]
56153    unsafe fn test_mm_maskz_roundscale_sd() {
56154        let a = _mm_set1_pd(2.2);
56155        let b = _mm_set1_pd(1.1);
56156        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
56157        let e = _mm_set_pd(2.2, 0.0);
56158        assert_eq_m128d(r, e);
56159        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
56160        let e = _mm_set_pd(2.2, 1.0);
56161        assert_eq_m128d(r, e);
56162    }
56163
56164    #[simd_test(enable = "avx512f")]
56165    unsafe fn test_mm_scalef_ss() {
56166        let a = _mm_set1_ps(1.);
56167        let b = _mm_set1_ps(3.);
56168        let r = _mm_scalef_ss(a, b);
56169        let e = _mm_set_ps(1., 1., 1., 8.);
56170        assert_eq_m128(r, e);
56171    }
56172
56173    #[simd_test(enable = "avx512f")]
56174    unsafe fn test_mm_mask_scalef_ss() {
56175        let a = _mm_set1_ps(1.);
56176        let b = _mm_set1_ps(3.);
56177        let r = _mm_mask_scalef_ss(a, 0, a, b);
56178        let e = _mm_set_ps(1., 1., 1., 1.);
56179        assert_eq_m128(r, e);
56180        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
56181        let e = _mm_set_ps(1., 1., 1., 8.);
56182        assert_eq_m128(r, e);
56183    }
56184
56185    #[simd_test(enable = "avx512f")]
56186    unsafe fn test_mm_maskz_scalef_ss() {
56187        let a = _mm_set1_ps(1.);
56188        let b = _mm_set1_ps(3.);
56189        let r = _mm_maskz_scalef_ss(0, a, b);
56190        let e = _mm_set_ps(1., 1., 1., 0.);
56191        assert_eq_m128(r, e);
56192        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
56193        let e = _mm_set_ps(1., 1., 1., 8.);
56194        assert_eq_m128(r, e);
56195    }
56196
56197    #[simd_test(enable = "avx512f")]
56198    unsafe fn test_mm_scalef_sd() {
56199        let a = _mm_set1_pd(1.);
56200        let b = _mm_set1_pd(3.);
56201        let r = _mm_scalef_sd(a, b);
56202        let e = _mm_set_pd(1., 8.);
56203        assert_eq_m128d(r, e);
56204    }
56205
56206    #[simd_test(enable = "avx512f")]
56207    unsafe fn test_mm_mask_scalef_sd() {
56208        let a = _mm_set1_pd(1.);
56209        let b = _mm_set1_pd(3.);
56210        let r = _mm_mask_scalef_sd(a, 0, a, b);
56211        let e = _mm_set_pd(1., 1.);
56212        assert_eq_m128d(r, e);
56213        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
56214        let e = _mm_set_pd(1., 8.);
56215        assert_eq_m128d(r, e);
56216    }
56217
56218    #[simd_test(enable = "avx512f")]
56219    unsafe fn test_mm_maskz_scalef_sd() {
56220        let a = _mm_set1_pd(1.);
56221        let b = _mm_set1_pd(3.);
56222        let r = _mm_maskz_scalef_sd(0, a, b);
56223        let e = _mm_set_pd(1., 0.);
56224        assert_eq_m128d(r, e);
56225        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
56226        let e = _mm_set_pd(1., 8.);
56227        assert_eq_m128d(r, e);
56228    }
56229
56230    #[simd_test(enable = "avx512f")]
56231    unsafe fn test_mm_mask_fmadd_ss() {
56232        let a = _mm_set1_ps(1.);
56233        let b = _mm_set1_ps(2.);
56234        let c = _mm_set1_ps(3.);
56235        let r = _mm_mask_fmadd_ss(a, 0, b, c);
56236        assert_eq_m128(r, a);
56237        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
56238        let e = _mm_set_ps(1., 1., 1., 5.);
56239        assert_eq_m128(r, e);
56240    }
56241
56242    #[simd_test(enable = "avx512f")]
56243    unsafe fn test_mm_maskz_fmadd_ss() {
56244        let a = _mm_set1_ps(1.);
56245        let b = _mm_set1_ps(2.);
56246        let c = _mm_set1_ps(3.);
56247        let r = _mm_maskz_fmadd_ss(0, a, b, c);
56248        let e = _mm_set_ps(1., 1., 1., 0.);
56249        assert_eq_m128(r, e);
56250        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
56251        let e = _mm_set_ps(1., 1., 1., 5.);
56252        assert_eq_m128(r, e);
56253    }
56254
56255    #[simd_test(enable = "avx512f")]
56256    unsafe fn test_mm_mask3_fmadd_ss() {
56257        let a = _mm_set1_ps(1.);
56258        let b = _mm_set1_ps(2.);
56259        let c = _mm_set1_ps(3.);
56260        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
56261        assert_eq_m128(r, c);
56262        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
56263        let e = _mm_set_ps(3., 3., 3., 5.);
56264        assert_eq_m128(r, e);
56265    }
56266
56267    #[simd_test(enable = "avx512f")]
56268    unsafe fn test_mm_mask_fmadd_sd() {
56269        let a = _mm_set1_pd(1.);
56270        let b = _mm_set1_pd(2.);
56271        let c = _mm_set1_pd(3.);
56272        let r = _mm_mask_fmadd_sd(a, 0, b, c);
56273        assert_eq_m128d(r, a);
56274        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
56275        let e = _mm_set_pd(1., 5.);
56276        assert_eq_m128d(r, e);
56277    }
56278
56279    #[simd_test(enable = "avx512f")]
56280    unsafe fn test_mm_maskz_fmadd_sd() {
56281        let a = _mm_set1_pd(1.);
56282        let b = _mm_set1_pd(2.);
56283        let c = _mm_set1_pd(3.);
56284        let r = _mm_maskz_fmadd_sd(0, a, b, c);
56285        let e = _mm_set_pd(1., 0.);
56286        assert_eq_m128d(r, e);
56287        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
56288        let e = _mm_set_pd(1., 5.);
56289        assert_eq_m128d(r, e);
56290    }
56291
56292    #[simd_test(enable = "avx512f")]
56293    unsafe fn test_mm_mask3_fmadd_sd() {
56294        let a = _mm_set1_pd(1.);
56295        let b = _mm_set1_pd(2.);
56296        let c = _mm_set1_pd(3.);
56297        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
56298        assert_eq_m128d(r, c);
56299        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
56300        let e = _mm_set_pd(3., 5.);
56301        assert_eq_m128d(r, e);
56302    }
56303
56304    #[simd_test(enable = "avx512f")]
56305    unsafe fn test_mm_mask_fmsub_ss() {
56306        let a = _mm_set1_ps(1.);
56307        let b = _mm_set1_ps(2.);
56308        let c = _mm_set1_ps(3.);
56309        let r = _mm_mask_fmsub_ss(a, 0, b, c);
56310        assert_eq_m128(r, a);
56311        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
56312        let e = _mm_set_ps(1., 1., 1., -1.);
56313        assert_eq_m128(r, e);
56314    }
56315
56316    #[simd_test(enable = "avx512f")]
56317    unsafe fn test_mm_maskz_fmsub_ss() {
56318        let a = _mm_set1_ps(1.);
56319        let b = _mm_set1_ps(2.);
56320        let c = _mm_set1_ps(3.);
56321        let r = _mm_maskz_fmsub_ss(0, a, b, c);
56322        let e = _mm_set_ps(1., 1., 1., 0.);
56323        assert_eq_m128(r, e);
56324        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
56325        let e = _mm_set_ps(1., 1., 1., -1.);
56326        assert_eq_m128(r, e);
56327    }
56328
56329    #[simd_test(enable = "avx512f")]
56330    unsafe fn test_mm_mask3_fmsub_ss() {
56331        let a = _mm_set1_ps(1.);
56332        let b = _mm_set1_ps(2.);
56333        let c = _mm_set1_ps(3.);
56334        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
56335        assert_eq_m128(r, c);
56336        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
56337        let e = _mm_set_ps(3., 3., 3., -1.);
56338        assert_eq_m128(r, e);
56339    }
56340
56341    #[simd_test(enable = "avx512f")]
56342    unsafe fn test_mm_mask_fmsub_sd() {
56343        let a = _mm_set1_pd(1.);
56344        let b = _mm_set1_pd(2.);
56345        let c = _mm_set1_pd(3.);
56346        let r = _mm_mask_fmsub_sd(a, 0, b, c);
56347        assert_eq_m128d(r, a);
56348        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
56349        let e = _mm_set_pd(1., -1.);
56350        assert_eq_m128d(r, e);
56351    }
56352
56353    #[simd_test(enable = "avx512f")]
56354    unsafe fn test_mm_maskz_fmsub_sd() {
56355        let a = _mm_set1_pd(1.);
56356        let b = _mm_set1_pd(2.);
56357        let c = _mm_set1_pd(3.);
56358        let r = _mm_maskz_fmsub_sd(0, a, b, c);
56359        let e = _mm_set_pd(1., 0.);
56360        assert_eq_m128d(r, e);
56361        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
56362        let e = _mm_set_pd(1., -1.);
56363        assert_eq_m128d(r, e);
56364    }
56365
56366    #[simd_test(enable = "avx512f")]
56367    unsafe fn test_mm_mask3_fmsub_sd() {
56368        let a = _mm_set1_pd(1.);
56369        let b = _mm_set1_pd(2.);
56370        let c = _mm_set1_pd(3.);
56371        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
56372        assert_eq_m128d(r, c);
56373        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
56374        let e = _mm_set_pd(3., -1.);
56375        assert_eq_m128d(r, e);
56376    }
56377
56378    #[simd_test(enable = "avx512f")]
56379    unsafe fn test_mm_mask_fnmadd_ss() {
56380        let a = _mm_set1_ps(1.);
56381        let b = _mm_set1_ps(2.);
56382        let c = _mm_set1_ps(3.);
56383        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
56384        assert_eq_m128(r, a);
56385        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
56386        let e = _mm_set_ps(1., 1., 1., 1.);
56387        assert_eq_m128(r, e);
56388    }
56389
56390    #[simd_test(enable = "avx512f")]
56391    unsafe fn test_mm_maskz_fnmadd_ss() {
56392        let a = _mm_set1_ps(1.);
56393        let b = _mm_set1_ps(2.);
56394        let c = _mm_set1_ps(3.);
56395        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
56396        let e = _mm_set_ps(1., 1., 1., 0.);
56397        assert_eq_m128(r, e);
56398        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
56399        let e = _mm_set_ps(1., 1., 1., 1.);
56400        assert_eq_m128(r, e);
56401    }
56402
56403    #[simd_test(enable = "avx512f")]
56404    unsafe fn test_mm_mask3_fnmadd_ss() {
56405        let a = _mm_set1_ps(1.);
56406        let b = _mm_set1_ps(2.);
56407        let c = _mm_set1_ps(3.);
56408        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
56409        assert_eq_m128(r, c);
56410        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
56411        let e = _mm_set_ps(3., 3., 3., 1.);
56412        assert_eq_m128(r, e);
56413    }
56414
56415    #[simd_test(enable = "avx512f")]
56416    unsafe fn test_mm_mask_fnmadd_sd() {
56417        let a = _mm_set1_pd(1.);
56418        let b = _mm_set1_pd(2.);
56419        let c = _mm_set1_pd(3.);
56420        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
56421        assert_eq_m128d(r, a);
56422        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
56423        let e = _mm_set_pd(1., 1.);
56424        assert_eq_m128d(r, e);
56425    }
56426
56427    #[simd_test(enable = "avx512f")]
56428    unsafe fn test_mm_maskz_fnmadd_sd() {
56429        let a = _mm_set1_pd(1.);
56430        let b = _mm_set1_pd(2.);
56431        let c = _mm_set1_pd(3.);
56432        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
56433        let e = _mm_set_pd(1., 0.);
56434        assert_eq_m128d(r, e);
56435        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
56436        let e = _mm_set_pd(1., 1.);
56437        assert_eq_m128d(r, e);
56438    }
56439
56440    #[simd_test(enable = "avx512f")]
56441    unsafe fn test_mm_mask3_fnmadd_sd() {
56442        let a = _mm_set1_pd(1.);
56443        let b = _mm_set1_pd(2.);
56444        let c = _mm_set1_pd(3.);
56445        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
56446        assert_eq_m128d(r, c);
56447        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
56448        let e = _mm_set_pd(3., 1.);
56449        assert_eq_m128d(r, e);
56450    }
56451
56452    #[simd_test(enable = "avx512f")]
56453    unsafe fn test_mm_mask_fnmsub_ss() {
56454        let a = _mm_set1_ps(1.);
56455        let b = _mm_set1_ps(2.);
56456        let c = _mm_set1_ps(3.);
56457        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
56458        assert_eq_m128(r, a);
56459        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
56460        let e = _mm_set_ps(1., 1., 1., -5.);
56461        assert_eq_m128(r, e);
56462    }
56463
56464    #[simd_test(enable = "avx512f")]
56465    unsafe fn test_mm_maskz_fnmsub_ss() {
56466        let a = _mm_set1_ps(1.);
56467        let b = _mm_set1_ps(2.);
56468        let c = _mm_set1_ps(3.);
56469        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
56470        let e = _mm_set_ps(1., 1., 1., 0.);
56471        assert_eq_m128(r, e);
56472        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
56473        let e = _mm_set_ps(1., 1., 1., -5.);
56474        assert_eq_m128(r, e);
56475    }
56476
56477    #[simd_test(enable = "avx512f")]
56478    unsafe fn test_mm_mask3_fnmsub_ss() {
56479        let a = _mm_set1_ps(1.);
56480        let b = _mm_set1_ps(2.);
56481        let c = _mm_set1_ps(3.);
56482        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
56483        assert_eq_m128(r, c);
56484        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
56485        let e = _mm_set_ps(3., 3., 3., -5.);
56486        assert_eq_m128(r, e);
56487    }
56488
56489    #[simd_test(enable = "avx512f")]
56490    unsafe fn test_mm_mask_fnmsub_sd() {
56491        let a = _mm_set1_pd(1.);
56492        let b = _mm_set1_pd(2.);
56493        let c = _mm_set1_pd(3.);
56494        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
56495        assert_eq_m128d(r, a);
56496        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
56497        let e = _mm_set_pd(1., -5.);
56498        assert_eq_m128d(r, e);
56499    }
56500
56501    #[simd_test(enable = "avx512f")]
56502    unsafe fn test_mm_maskz_fnmsub_sd() {
56503        let a = _mm_set1_pd(1.);
56504        let b = _mm_set1_pd(2.);
56505        let c = _mm_set1_pd(3.);
56506        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
56507        let e = _mm_set_pd(1., 0.);
56508        assert_eq_m128d(r, e);
56509        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
56510        let e = _mm_set_pd(1., -5.);
56511        assert_eq_m128d(r, e);
56512    }
56513
56514    #[simd_test(enable = "avx512f")]
56515    unsafe fn test_mm_mask3_fnmsub_sd() {
56516        let a = _mm_set1_pd(1.);
56517        let b = _mm_set1_pd(2.);
56518        let c = _mm_set1_pd(3.);
56519        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
56520        assert_eq_m128d(r, c);
56521        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
56522        let e = _mm_set_pd(3., -5.);
56523        assert_eq_m128d(r, e);
56524    }
56525
56526    #[simd_test(enable = "avx512f")]
56527    unsafe fn test_mm_add_round_ss() {
56528        let a = _mm_set_ps(1., 2., 10., 20.);
56529        let b = _mm_set_ps(3., 4., 30., 40.);
56530        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56531        let e = _mm_set_ps(1., 2., 10., 60.);
56532        assert_eq_m128(r, e);
56533    }
56534
56535    #[simd_test(enable = "avx512f")]
56536    unsafe fn test_mm_mask_add_round_ss() {
56537        let src = _mm_set_ps(10., 11., 100., 110.);
56538        let a = _mm_set_ps(1., 2., 10., 20.);
56539        let b = _mm_set_ps(3., 4., 30., 40.);
56540        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56541        let e = _mm_set_ps(1., 2., 10., 110.);
56542        assert_eq_m128(r, e);
56543        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56544            src, 0b11111111, a, b,
56545        );
56546        let e = _mm_set_ps(1., 2., 10., 60.);
56547        assert_eq_m128(r, e);
56548    }
56549
56550    #[simd_test(enable = "avx512f")]
56551    unsafe fn test_mm_maskz_add_round_ss() {
56552        let a = _mm_set_ps(1., 2., 10., 20.);
56553        let b = _mm_set_ps(3., 4., 30., 40.);
56554        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56555        let e = _mm_set_ps(1., 2., 10., 0.);
56556        assert_eq_m128(r, e);
56557        let r =
56558            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56559        let e = _mm_set_ps(1., 2., 10., 60.);
56560        assert_eq_m128(r, e);
56561    }
56562
56563    #[simd_test(enable = "avx512f")]
56564    unsafe fn test_mm_add_round_sd() {
56565        let a = _mm_set_pd(1., 2.);
56566        let b = _mm_set_pd(3., 4.);
56567        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56568        let e = _mm_set_pd(1., 6.);
56569        assert_eq_m128d(r, e);
56570    }
56571
56572    #[simd_test(enable = "avx512f")]
56573    unsafe fn test_mm_mask_add_round_sd() {
56574        let src = _mm_set_pd(10., 11.);
56575        let a = _mm_set_pd(1., 2.);
56576        let b = _mm_set_pd(3., 4.);
56577        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56578        let e = _mm_set_pd(1., 11.);
56579        assert_eq_m128d(r, e);
56580        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56581            src, 0b11111111, a, b,
56582        );
56583        let e = _mm_set_pd(1., 6.);
56584        assert_eq_m128d(r, e);
56585    }
56586
56587    #[simd_test(enable = "avx512f")]
56588    unsafe fn test_mm_maskz_add_round_sd() {
56589        let a = _mm_set_pd(1., 2.);
56590        let b = _mm_set_pd(3., 4.);
56591        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56592        let e = _mm_set_pd(1., 0.);
56593        assert_eq_m128d(r, e);
56594        let r =
56595            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56596        let e = _mm_set_pd(1., 6.);
56597        assert_eq_m128d(r, e);
56598    }
56599
56600    #[simd_test(enable = "avx512f")]
56601    unsafe fn test_mm_sub_round_ss() {
56602        let a = _mm_set_ps(1., 2., 10., 20.);
56603        let b = _mm_set_ps(3., 4., 30., 40.);
56604        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56605        let e = _mm_set_ps(1., 2., 10., -20.);
56606        assert_eq_m128(r, e);
56607    }
56608
56609    #[simd_test(enable = "avx512f")]
56610    unsafe fn test_mm_mask_sub_round_ss() {
56611        let src = _mm_set_ps(10., 11., 100., 110.);
56612        let a = _mm_set_ps(1., 2., 10., 20.);
56613        let b = _mm_set_ps(3., 4., 30., 40.);
56614        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56615        let e = _mm_set_ps(1., 2., 10., 110.);
56616        assert_eq_m128(r, e);
56617        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56618            src, 0b11111111, a, b,
56619        );
56620        let e = _mm_set_ps(1., 2., 10., -20.);
56621        assert_eq_m128(r, e);
56622    }
56623
56624    #[simd_test(enable = "avx512f")]
56625    unsafe fn test_mm_maskz_sub_round_ss() {
56626        let a = _mm_set_ps(1., 2., 10., 20.);
56627        let b = _mm_set_ps(3., 4., 30., 40.);
56628        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56629        let e = _mm_set_ps(1., 2., 10., 0.);
56630        assert_eq_m128(r, e);
56631        let r =
56632            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56633        let e = _mm_set_ps(1., 2., 10., -20.);
56634        assert_eq_m128(r, e);
56635    }
56636
56637    #[simd_test(enable = "avx512f")]
56638    unsafe fn test_mm_sub_round_sd() {
56639        let a = _mm_set_pd(1., 2.);
56640        let b = _mm_set_pd(3., 4.);
56641        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56642        let e = _mm_set_pd(1., -2.);
56643        assert_eq_m128d(r, e);
56644    }
56645
56646    #[simd_test(enable = "avx512f")]
56647    unsafe fn test_mm_mask_sub_round_sd() {
56648        let src = _mm_set_pd(10., 11.);
56649        let a = _mm_set_pd(1., 2.);
56650        let b = _mm_set_pd(3., 4.);
56651        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56652        let e = _mm_set_pd(1., 11.);
56653        assert_eq_m128d(r, e);
56654        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56655            src, 0b11111111, a, b,
56656        );
56657        let e = _mm_set_pd(1., -2.);
56658        assert_eq_m128d(r, e);
56659    }
56660
56661    #[simd_test(enable = "avx512f")]
56662    unsafe fn test_mm_maskz_sub_round_sd() {
56663        let a = _mm_set_pd(1., 2.);
56664        let b = _mm_set_pd(3., 4.);
56665        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56666        let e = _mm_set_pd(1., 0.);
56667        assert_eq_m128d(r, e);
56668        let r =
56669            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56670        let e = _mm_set_pd(1., -2.);
56671        assert_eq_m128d(r, e);
56672    }
56673
56674    #[simd_test(enable = "avx512f")]
56675    unsafe fn test_mm_mul_round_ss() {
56676        let a = _mm_set_ps(1., 2., 10., 20.);
56677        let b = _mm_set_ps(3., 4., 30., 40.);
56678        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56679        let e = _mm_set_ps(1., 2., 10., 800.);
56680        assert_eq_m128(r, e);
56681    }
56682
56683    #[simd_test(enable = "avx512f")]
56684    unsafe fn test_mm_mask_mul_round_ss() {
56685        let src = _mm_set_ps(10., 11., 100., 110.);
56686        let a = _mm_set_ps(1., 2., 10., 20.);
56687        let b = _mm_set_ps(3., 4., 30., 40.);
56688        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56689        let e = _mm_set_ps(1., 2., 10., 110.);
56690        assert_eq_m128(r, e);
56691        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56692            src, 0b11111111, a, b,
56693        );
56694        let e = _mm_set_ps(1., 2., 10., 800.);
56695        assert_eq_m128(r, e);
56696    }
56697
56698    #[simd_test(enable = "avx512f")]
56699    unsafe fn test_mm_maskz_mul_round_ss() {
56700        let a = _mm_set_ps(1., 2., 10., 20.);
56701        let b = _mm_set_ps(3., 4., 30., 40.);
56702        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56703        let e = _mm_set_ps(1., 2., 10., 0.);
56704        assert_eq_m128(r, e);
56705        let r =
56706            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56707        let e = _mm_set_ps(1., 2., 10., 800.);
56708        assert_eq_m128(r, e);
56709    }
56710
56711    #[simd_test(enable = "avx512f")]
56712    unsafe fn test_mm_mul_round_sd() {
56713        let a = _mm_set_pd(1., 2.);
56714        let b = _mm_set_pd(3., 4.);
56715        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56716        let e = _mm_set_pd(1., 8.);
56717        assert_eq_m128d(r, e);
56718    }
56719
56720    #[simd_test(enable = "avx512f")]
56721    unsafe fn test_mm_mask_mul_round_sd() {
56722        let src = _mm_set_pd(10., 11.);
56723        let a = _mm_set_pd(1., 2.);
56724        let b = _mm_set_pd(3., 4.);
56725        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56726        let e = _mm_set_pd(1., 11.);
56727        assert_eq_m128d(r, e);
56728        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56729            src, 0b11111111, a, b,
56730        );
56731        let e = _mm_set_pd(1., 8.);
56732        assert_eq_m128d(r, e);
56733    }
56734
56735    #[simd_test(enable = "avx512f")]
56736    unsafe fn test_mm_maskz_mul_round_sd() {
56737        let a = _mm_set_pd(1., 2.);
56738        let b = _mm_set_pd(3., 4.);
56739        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56740        let e = _mm_set_pd(1., 0.);
56741        assert_eq_m128d(r, e);
56742        let r =
56743            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56744        let e = _mm_set_pd(1., 8.);
56745        assert_eq_m128d(r, e);
56746    }
56747
56748    #[simd_test(enable = "avx512f")]
56749    unsafe fn test_mm_div_round_ss() {
56750        let a = _mm_set_ps(1., 2., 10., 20.);
56751        let b = _mm_set_ps(3., 4., 30., 40.);
56752        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56753        let e = _mm_set_ps(1., 2., 10., 0.5);
56754        assert_eq_m128(r, e);
56755    }
56756
56757    #[simd_test(enable = "avx512f")]
56758    unsafe fn test_mm_mask_div_round_ss() {
56759        let src = _mm_set_ps(10., 11., 100., 110.);
56760        let a = _mm_set_ps(1., 2., 10., 20.);
56761        let b = _mm_set_ps(3., 4., 30., 40.);
56762        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56763        let e = _mm_set_ps(1., 2., 10., 110.);
56764        assert_eq_m128(r, e);
56765        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56766            src, 0b11111111, a, b,
56767        );
56768        let e = _mm_set_ps(1., 2., 10., 0.5);
56769        assert_eq_m128(r, e);
56770    }
56771
56772    #[simd_test(enable = "avx512f")]
56773    unsafe fn test_mm_maskz_div_round_ss() {
56774        let a = _mm_set_ps(1., 2., 10., 20.);
56775        let b = _mm_set_ps(3., 4., 30., 40.);
56776        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56777        let e = _mm_set_ps(1., 2., 10., 0.);
56778        assert_eq_m128(r, e);
56779        let r =
56780            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56781        let e = _mm_set_ps(1., 2., 10., 0.5);
56782        assert_eq_m128(r, e);
56783    }
56784
56785    #[simd_test(enable = "avx512f")]
56786    unsafe fn test_mm_div_round_sd() {
56787        let a = _mm_set_pd(1., 2.);
56788        let b = _mm_set_pd(3., 4.);
56789        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56790        let e = _mm_set_pd(1., 0.5);
56791        assert_eq_m128d(r, e);
56792    }
56793
56794    #[simd_test(enable = "avx512f")]
56795    unsafe fn test_mm_mask_div_round_sd() {
56796        let src = _mm_set_pd(10., 11.);
56797        let a = _mm_set_pd(1., 2.);
56798        let b = _mm_set_pd(3., 4.);
56799        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56800        let e = _mm_set_pd(1., 11.);
56801        assert_eq_m128d(r, e);
56802        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56803            src, 0b11111111, a, b,
56804        );
56805        let e = _mm_set_pd(1., 0.5);
56806        assert_eq_m128d(r, e);
56807    }
56808
56809    #[simd_test(enable = "avx512f")]
56810    unsafe fn test_mm_maskz_div_round_sd() {
56811        let a = _mm_set_pd(1., 2.);
56812        let b = _mm_set_pd(3., 4.);
56813        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56814        let e = _mm_set_pd(1., 0.);
56815        assert_eq_m128d(r, e);
56816        let r =
56817            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56818        let e = _mm_set_pd(1., 0.5);
56819        assert_eq_m128d(r, e);
56820    }
56821
56822    #[simd_test(enable = "avx512f")]
56823    unsafe fn test_mm_max_round_ss() {
56824        let a = _mm_set_ps(0., 1., 2., 3.);
56825        let b = _mm_set_ps(4., 5., 6., 7.);
56826        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
56827        let e = _mm_set_ps(0., 1., 2., 7.);
56828        assert_eq_m128(r, e);
56829    }
56830
56831    #[simd_test(enable = "avx512f")]
56832    unsafe fn test_mm_mask_max_round_ss() {
56833        let a = _mm_set_ps(0., 1., 2., 3.);
56834        let b = _mm_set_ps(4., 5., 6., 7.);
56835        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56836        let e = _mm_set_ps(0., 1., 2., 3.);
56837        assert_eq_m128(r, e);
56838        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56839        let e = _mm_set_ps(0., 1., 2., 7.);
56840        assert_eq_m128(r, e);
56841    }
56842
56843    #[simd_test(enable = "avx512f")]
56844    unsafe fn test_mm_maskz_max_round_ss() {
56845        let a = _mm_set_ps(0., 1., 2., 3.);
56846        let b = _mm_set_ps(4., 5., 6., 7.);
56847        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56848        let e = _mm_set_ps(0., 1., 2., 0.);
56849        assert_eq_m128(r, e);
56850        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56851        let e = _mm_set_ps(0., 1., 2., 7.);
56852        assert_eq_m128(r, e);
56853    }
56854
56855    #[simd_test(enable = "avx512f")]
56856    unsafe fn test_mm_max_round_sd() {
56857        let a = _mm_set_pd(0., 1.);
56858        let b = _mm_set_pd(2., 3.);
56859        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
56860        let e = _mm_set_pd(0., 3.);
56861        assert_eq_m128d(r, e);
56862    }
56863
56864    #[simd_test(enable = "avx512f")]
56865    unsafe fn test_mm_mask_max_round_sd() {
56866        let a = _mm_set_pd(0., 1.);
56867        let b = _mm_set_pd(2., 3.);
56868        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56869        let e = _mm_set_pd(0., 1.);
56870        assert_eq_m128d(r, e);
56871        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56872        let e = _mm_set_pd(0., 3.);
56873        assert_eq_m128d(r, e);
56874    }
56875
56876    #[simd_test(enable = "avx512f")]
56877    unsafe fn test_mm_maskz_max_round_sd() {
56878        let a = _mm_set_pd(0., 1.);
56879        let b = _mm_set_pd(2., 3.);
56880        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56881        let e = _mm_set_pd(0., 0.);
56882        assert_eq_m128d(r, e);
56883        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56884        let e = _mm_set_pd(0., 3.);
56885        assert_eq_m128d(r, e);
56886    }
56887
56888    #[simd_test(enable = "avx512f")]
56889    unsafe fn test_mm_min_round_ss() {
56890        let a = _mm_set_ps(0., 1., 2., 3.);
56891        let b = _mm_set_ps(4., 5., 6., 7.);
56892        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
56893        let e = _mm_set_ps(0., 1., 2., 3.);
56894        assert_eq_m128(r, e);
56895    }
56896
56897    #[simd_test(enable = "avx512f")]
56898    unsafe fn test_mm_mask_min_round_ss() {
56899        let a = _mm_set_ps(0., 1., 2., 3.);
56900        let b = _mm_set_ps(4., 5., 6., 7.);
56901        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56902        let e = _mm_set_ps(0., 1., 2., 3.);
56903        assert_eq_m128(r, e);
56904        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56905        let e = _mm_set_ps(0., 1., 2., 3.);
56906        assert_eq_m128(r, e);
56907    }
56908
56909    #[simd_test(enable = "avx512f")]
56910    unsafe fn test_mm_maskz_min_round_ss() {
56911        let a = _mm_set_ps(0., 1., 2., 3.);
56912        let b = _mm_set_ps(4., 5., 6., 7.);
56913        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56914        let e = _mm_set_ps(0., 1., 2., 0.);
56915        assert_eq_m128(r, e);
56916        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56917        let e = _mm_set_ps(0., 1., 2., 3.);
56918        assert_eq_m128(r, e);
56919    }
56920
56921    #[simd_test(enable = "avx512f")]
56922    unsafe fn test_mm_min_round_sd() {
56923        let a = _mm_set_pd(0., 1.);
56924        let b = _mm_set_pd(2., 3.);
56925        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
56926        let e = _mm_set_pd(0., 1.);
56927        assert_eq_m128d(r, e);
56928    }
56929
56930    #[simd_test(enable = "avx512f")]
56931    unsafe fn test_mm_mask_min_round_sd() {
56932        let a = _mm_set_pd(0., 1.);
56933        let b = _mm_set_pd(2., 3.);
56934        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
56935        let e = _mm_set_pd(0., 1.);
56936        assert_eq_m128d(r, e);
56937        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
56938        let e = _mm_set_pd(0., 1.);
56939        assert_eq_m128d(r, e);
56940    }
56941
56942    #[simd_test(enable = "avx512f")]
56943    unsafe fn test_mm_maskz_min_round_sd() {
56944        let a = _mm_set_pd(0., 1.);
56945        let b = _mm_set_pd(2., 3.);
56946        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
56947        let e = _mm_set_pd(0., 0.);
56948        assert_eq_m128d(r, e);
56949        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
56950        let e = _mm_set_pd(0., 1.);
56951        assert_eq_m128d(r, e);
56952    }
56953
56954    #[simd_test(enable = "avx512f")]
56955    unsafe fn test_mm_sqrt_round_ss() {
56956        let a = _mm_set_ps(1., 2., 10., 20.);
56957        let b = _mm_set_ps(3., 4., 30., 4.);
56958        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56959        let e = _mm_set_ps(1., 2., 10., 2.);
56960        assert_eq_m128(r, e);
56961    }
56962
56963    #[simd_test(enable = "avx512f")]
56964    unsafe fn test_mm_mask_sqrt_round_ss() {
56965        let src = _mm_set_ps(10., 11., 100., 110.);
56966        let a = _mm_set_ps(1., 2., 10., 20.);
56967        let b = _mm_set_ps(3., 4., 30., 4.);
56968        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
56969        let e = _mm_set_ps(1., 2., 10., 110.);
56970        assert_eq_m128(r, e);
56971        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
56972            src, 0b11111111, a, b,
56973        );
56974        let e = _mm_set_ps(1., 2., 10., 2.);
56975        assert_eq_m128(r, e);
56976    }
56977
56978    #[simd_test(enable = "avx512f")]
56979    unsafe fn test_mm_maskz_sqrt_round_ss() {
56980        let a = _mm_set_ps(1., 2., 10., 20.);
56981        let b = _mm_set_ps(3., 4., 30., 4.);
56982        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
56983        let e = _mm_set_ps(1., 2., 10., 0.);
56984        assert_eq_m128(r, e);
56985        let r =
56986            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
56987        let e = _mm_set_ps(1., 2., 10., 2.);
56988        assert_eq_m128(r, e);
56989    }
56990
56991    #[simd_test(enable = "avx512f")]
56992    unsafe fn test_mm_sqrt_round_sd() {
56993        let a = _mm_set_pd(1., 2.);
56994        let b = _mm_set_pd(3., 4.);
56995        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
56996        let e = _mm_set_pd(1., 2.);
56997        assert_eq_m128d(r, e);
56998    }
56999
57000    #[simd_test(enable = "avx512f")]
57001    unsafe fn test_mm_mask_sqrt_round_sd() {
57002        let src = _mm_set_pd(10., 11.);
57003        let a = _mm_set_pd(1., 2.);
57004        let b = _mm_set_pd(3., 4.);
57005        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
57006        let e = _mm_set_pd(1., 11.);
57007        assert_eq_m128d(r, e);
57008        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
57009            src, 0b11111111, a, b,
57010        );
57011        let e = _mm_set_pd(1., 2.);
57012        assert_eq_m128d(r, e);
57013    }
57014
57015    #[simd_test(enable = "avx512f")]
57016    unsafe fn test_mm_maskz_sqrt_round_sd() {
57017        let a = _mm_set_pd(1., 2.);
57018        let b = _mm_set_pd(3., 4.);
57019        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
57020        let e = _mm_set_pd(1., 0.);
57021        assert_eq_m128d(r, e);
57022        let r =
57023            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
57024        let e = _mm_set_pd(1., 2.);
57025        assert_eq_m128d(r, e);
57026    }
57027
57028    #[simd_test(enable = "avx512f")]
57029    unsafe fn test_mm_getexp_round_ss() {
57030        let a = _mm_set1_ps(2.);
57031        let b = _mm_set1_ps(3.);
57032        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
57033        let e = _mm_set_ps(2., 2., 2., 1.);
57034        assert_eq_m128(r, e);
57035    }
57036
57037    #[simd_test(enable = "avx512f")]
57038    unsafe fn test_mm_mask_getexp_round_ss() {
57039        let a = _mm_set1_ps(2.);
57040        let b = _mm_set1_ps(3.);
57041        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57042        let e = _mm_set_ps(2., 2., 2., 2.);
57043        assert_eq_m128(r, e);
57044        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57045        let e = _mm_set_ps(2., 2., 2., 1.);
57046        assert_eq_m128(r, e);
57047    }
57048
57049    #[simd_test(enable = "avx512f")]
57050    unsafe fn test_mm_maskz_getexp_round_ss() {
57051        let a = _mm_set1_ps(2.);
57052        let b = _mm_set1_ps(3.);
57053        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
57054        let e = _mm_set_ps(2., 2., 2., 0.);
57055        assert_eq_m128(r, e);
57056        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57057        let e = _mm_set_ps(2., 2., 2., 1.);
57058        assert_eq_m128(r, e);
57059    }
57060
57061    #[simd_test(enable = "avx512f")]
57062    unsafe fn test_mm_getexp_round_sd() {
57063        let a = _mm_set1_pd(2.);
57064        let b = _mm_set1_pd(3.);
57065        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
57066        let e = _mm_set_pd(2., 1.);
57067        assert_eq_m128d(r, e);
57068    }
57069
57070    #[simd_test(enable = "avx512f")]
57071    unsafe fn test_mm_mask_getexp_round_sd() {
57072        let a = _mm_set1_pd(2.);
57073        let b = _mm_set1_pd(3.);
57074        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57075        let e = _mm_set_pd(2., 2.);
57076        assert_eq_m128d(r, e);
57077        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57078        let e = _mm_set_pd(2., 1.);
57079        assert_eq_m128d(r, e);
57080    }
57081
57082    #[simd_test(enable = "avx512f")]
57083    unsafe fn test_mm_maskz_getexp_round_sd() {
57084        let a = _mm_set1_pd(2.);
57085        let b = _mm_set1_pd(3.);
57086        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
57087        let e = _mm_set_pd(2., 0.);
57088        assert_eq_m128d(r, e);
57089        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57090        let e = _mm_set_pd(2., 1.);
57091        assert_eq_m128d(r, e);
57092    }
57093
57094    #[simd_test(enable = "avx512f")]
57095    unsafe fn test_mm_getmant_round_ss() {
57096        let a = _mm_set1_ps(20.);
57097        let b = _mm_set1_ps(10.);
57098        let r =
57099            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
57100                a, b,
57101            );
57102        let e = _mm_set_ps(20., 20., 20., 1.25);
57103        assert_eq_m128(r, e);
57104    }
57105
57106    #[simd_test(enable = "avx512f")]
57107    unsafe fn test_mm_mask_getmant_round_ss() {
57108        let a = _mm_set1_ps(20.);
57109        let b = _mm_set1_ps(10.);
57110        let r = _mm_mask_getmant_round_ss::<
57111            _MM_MANT_NORM_1_2,
57112            _MM_MANT_SIGN_SRC,
57113            _MM_FROUND_CUR_DIRECTION,
57114        >(a, 0, a, b);
57115        let e = _mm_set_ps(20., 20., 20., 20.);
57116        assert_eq_m128(r, e);
57117        let r = _mm_mask_getmant_round_ss::<
57118            _MM_MANT_NORM_1_2,
57119            _MM_MANT_SIGN_SRC,
57120            _MM_FROUND_CUR_DIRECTION,
57121        >(a, 0b11111111, a, b);
57122        let e = _mm_set_ps(20., 20., 20., 1.25);
57123        assert_eq_m128(r, e);
57124    }
57125
57126    #[simd_test(enable = "avx512f")]
57127    unsafe fn test_mm_maskz_getmant_round_ss() {
57128        let a = _mm_set1_ps(20.);
57129        let b = _mm_set1_ps(10.);
57130        let r = _mm_maskz_getmant_round_ss::<
57131            _MM_MANT_NORM_1_2,
57132            _MM_MANT_SIGN_SRC,
57133            _MM_FROUND_CUR_DIRECTION,
57134        >(0, a, b);
57135        let e = _mm_set_ps(20., 20., 20., 0.);
57136        assert_eq_m128(r, e);
57137        let r = _mm_maskz_getmant_round_ss::<
57138            _MM_MANT_NORM_1_2,
57139            _MM_MANT_SIGN_SRC,
57140            _MM_FROUND_CUR_DIRECTION,
57141        >(0b11111111, a, b);
57142        let e = _mm_set_ps(20., 20., 20., 1.25);
57143        assert_eq_m128(r, e);
57144    }
57145
57146    #[simd_test(enable = "avx512f")]
57147    unsafe fn test_mm_getmant_round_sd() {
57148        let a = _mm_set1_pd(20.);
57149        let b = _mm_set1_pd(10.);
57150        let r =
57151            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
57152                a, b,
57153            );
57154        let e = _mm_set_pd(20., 1.25);
57155        assert_eq_m128d(r, e);
57156    }
57157
57158    #[simd_test(enable = "avx512f")]
57159    unsafe fn test_mm_mask_getmant_round_sd() {
57160        let a = _mm_set1_pd(20.);
57161        let b = _mm_set1_pd(10.);
57162        let r = _mm_mask_getmant_round_sd::<
57163            _MM_MANT_NORM_1_2,
57164            _MM_MANT_SIGN_SRC,
57165            _MM_FROUND_CUR_DIRECTION,
57166        >(a, 0, a, b);
57167        let e = _mm_set_pd(20., 20.);
57168        assert_eq_m128d(r, e);
57169        let r = _mm_mask_getmant_round_sd::<
57170            _MM_MANT_NORM_1_2,
57171            _MM_MANT_SIGN_SRC,
57172            _MM_FROUND_CUR_DIRECTION,
57173        >(a, 0b11111111, a, b);
57174        let e = _mm_set_pd(20., 1.25);
57175        assert_eq_m128d(r, e);
57176    }
57177
57178    #[simd_test(enable = "avx512f")]
57179    unsafe fn test_mm_maskz_getmant_round_sd() {
57180        let a = _mm_set1_pd(20.);
57181        let b = _mm_set1_pd(10.);
57182        let r = _mm_maskz_getmant_round_sd::<
57183            _MM_MANT_NORM_1_2,
57184            _MM_MANT_SIGN_SRC,
57185            _MM_FROUND_CUR_DIRECTION,
57186        >(0, a, b);
57187        let e = _mm_set_pd(20., 0.);
57188        assert_eq_m128d(r, e);
57189        let r = _mm_maskz_getmant_round_sd::<
57190            _MM_MANT_NORM_1_2,
57191            _MM_MANT_SIGN_SRC,
57192            _MM_FROUND_CUR_DIRECTION,
57193        >(0b11111111, a, b);
57194        let e = _mm_set_pd(20., 1.25);
57195        assert_eq_m128d(r, e);
57196    }
57197
57198    #[simd_test(enable = "avx512f")]
57199    unsafe fn test_mm_roundscale_round_ss() {
57200        let a = _mm_set1_ps(2.2);
57201        let b = _mm_set1_ps(1.1);
57202        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
57203        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
57204        assert_eq_m128(r, e);
57205    }
57206
57207    #[simd_test(enable = "avx512f")]
57208    unsafe fn test_mm_mask_roundscale_round_ss() {
57209        let a = _mm_set1_ps(2.2);
57210        let b = _mm_set1_ps(1.1);
57211        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57212        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
57213        assert_eq_m128(r, e);
57214        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57215        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
57216        assert_eq_m128(r, e);
57217    }
57218
57219    #[simd_test(enable = "avx512f")]
57220    unsafe fn test_mm_maskz_roundscale_round_ss() {
57221        let a = _mm_set1_ps(2.2);
57222        let b = _mm_set1_ps(1.1);
57223        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
57224        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
57225        assert_eq_m128(r, e);
57226        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57227        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
57228        assert_eq_m128(r, e);
57229    }
57230
57231    #[simd_test(enable = "avx512f")]
57232    unsafe fn test_mm_roundscale_round_sd() {
57233        let a = _mm_set1_pd(2.2);
57234        let b = _mm_set1_pd(1.1);
57235        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
57236        let e = _mm_set_pd(2.2, 1.0);
57237        assert_eq_m128d(r, e);
57238    }
57239
57240    #[simd_test(enable = "avx512f")]
57241    unsafe fn test_mm_mask_roundscale_round_sd() {
57242        let a = _mm_set1_pd(2.2);
57243        let b = _mm_set1_pd(1.1);
57244        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
57245        let e = _mm_set_pd(2.2, 2.2);
57246        assert_eq_m128d(r, e);
57247        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
57248        let e = _mm_set_pd(2.2, 1.0);
57249        assert_eq_m128d(r, e);
57250    }
57251
57252    #[simd_test(enable = "avx512f")]
57253    unsafe fn test_mm_maskz_roundscale_round_sd() {
57254        let a = _mm_set1_pd(2.2);
57255        let b = _mm_set1_pd(1.1);
57256        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
57257        let e = _mm_set_pd(2.2, 0.0);
57258        assert_eq_m128d(r, e);
57259        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
57260        let e = _mm_set_pd(2.2, 1.0);
57261        assert_eq_m128d(r, e);
57262    }
57263
57264    #[simd_test(enable = "avx512f")]
57265    unsafe fn test_mm_scalef_round_ss() {
57266        let a = _mm_set1_ps(1.);
57267        let b = _mm_set1_ps(3.);
57268        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
57269        let e = _mm_set_ps(1., 1., 1., 8.);
57270        assert_eq_m128(r, e);
57271    }
57272
57273    #[simd_test(enable = "avx512f")]
57274    unsafe fn test_mm_mask_scalef_round_ss() {
57275        let a = _mm_set1_ps(1.);
57276        let b = _mm_set1_ps(3.);
57277        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57278            a, 0, a, b,
57279        );
57280        let e = _mm_set_ps(1., 1., 1., 1.);
57281        assert_eq_m128(r, e);
57282        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57283            a, 0b11111111, a, b,
57284        );
57285        let e = _mm_set_ps(1., 1., 1., 8.);
57286        assert_eq_m128(r, e);
57287    }
57288
57289    #[simd_test(enable = "avx512f")]
57290    unsafe fn test_mm_maskz_scalef_round_ss() {
57291        let a = _mm_set1_ps(1.);
57292        let b = _mm_set1_ps(3.);
57293        let r =
57294            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
57295        let e = _mm_set_ps(1., 1., 1., 0.);
57296        assert_eq_m128(r, e);
57297        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57298            0b11111111, a, b,
57299        );
57300        let e = _mm_set_ps(1., 1., 1., 8.);
57301        assert_eq_m128(r, e);
57302    }
57303
57304    #[simd_test(enable = "avx512f")]
57305    unsafe fn test_mm_scalef_round_sd() {
57306        let a = _mm_set1_pd(1.);
57307        let b = _mm_set1_pd(3.);
57308        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
57309        let e = _mm_set_pd(1., 8.);
57310        assert_eq_m128d(r, e);
57311    }
57312
57313    #[simd_test(enable = "avx512f")]
57314    unsafe fn test_mm_mask_scalef_round_sd() {
57315        let a = _mm_set1_pd(1.);
57316        let b = _mm_set1_pd(3.);
57317        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57318            a, 0, a, b,
57319        );
57320        let e = _mm_set_pd(1., 1.);
57321        assert_eq_m128d(r, e);
57322        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57323            a, 0b11111111, a, b,
57324        );
57325        let e = _mm_set_pd(1., 8.);
57326        assert_eq_m128d(r, e);
57327    }
57328
57329    #[simd_test(enable = "avx512f")]
57330    unsafe fn test_mm_maskz_scalef_round_sd() {
57331        let a = _mm_set1_pd(1.);
57332        let b = _mm_set1_pd(3.);
57333        let r =
57334            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
57335        let e = _mm_set_pd(1., 0.);
57336        assert_eq_m128d(r, e);
57337        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57338            0b11111111, a, b,
57339        );
57340        let e = _mm_set_pd(1., 8.);
57341        assert_eq_m128d(r, e);
57342    }
57343
57344    #[simd_test(enable = "avx512f")]
57345    unsafe fn test_mm_fmadd_round_ss() {
57346        let a = _mm_set1_ps(1.);
57347        let b = _mm_set1_ps(2.);
57348        let c = _mm_set1_ps(3.);
57349        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57350        let e = _mm_set_ps(1., 1., 1., 5.);
57351        assert_eq_m128(r, e);
57352    }
57353
57354    #[simd_test(enable = "avx512f")]
57355    unsafe fn test_mm_mask_fmadd_round_ss() {
57356        let a = _mm_set1_ps(1.);
57357        let b = _mm_set1_ps(2.);
57358        let c = _mm_set1_ps(3.);
57359        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57360            a, 0, b, c,
57361        );
57362        assert_eq_m128(r, a);
57363        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57364            a, 0b11111111, b, c,
57365        );
57366        let e = _mm_set_ps(1., 1., 1., 5.);
57367        assert_eq_m128(r, e);
57368    }
57369
57370    #[simd_test(enable = "avx512f")]
57371    unsafe fn test_mm_maskz_fmadd_round_ss() {
57372        let a = _mm_set1_ps(1.);
57373        let b = _mm_set1_ps(2.);
57374        let c = _mm_set1_ps(3.);
57375        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57376            0, a, b, c,
57377        );
57378        let e = _mm_set_ps(1., 1., 1., 0.);
57379        assert_eq_m128(r, e);
57380        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57381            0b11111111, a, b, c,
57382        );
57383        let e = _mm_set_ps(1., 1., 1., 5.);
57384        assert_eq_m128(r, e);
57385    }
57386
57387    #[simd_test(enable = "avx512f")]
57388    unsafe fn test_mm_mask3_fmadd_round_ss() {
57389        let a = _mm_set1_ps(1.);
57390        let b = _mm_set1_ps(2.);
57391        let c = _mm_set1_ps(3.);
57392        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57393            a, b, c, 0,
57394        );
57395        assert_eq_m128(r, c);
57396        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57397            a, b, c, 0b11111111,
57398        );
57399        let e = _mm_set_ps(3., 3., 3., 5.);
57400        assert_eq_m128(r, e);
57401    }
57402
57403    #[simd_test(enable = "avx512f")]
57404    unsafe fn test_mm_fmadd_round_sd() {
57405        let a = _mm_set1_pd(1.);
57406        let b = _mm_set1_pd(2.);
57407        let c = _mm_set1_pd(3.);
57408        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57409        let e = _mm_set_pd(1., 5.);
57410        assert_eq_m128d(r, e);
57411    }
57412
57413    #[simd_test(enable = "avx512f")]
57414    unsafe fn test_mm_mask_fmadd_round_sd() {
57415        let a = _mm_set1_pd(1.);
57416        let b = _mm_set1_pd(2.);
57417        let c = _mm_set1_pd(3.);
57418        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57419            a, 0, b, c,
57420        );
57421        assert_eq_m128d(r, a);
57422        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57423            a, 0b11111111, b, c,
57424        );
57425        let e = _mm_set_pd(1., 5.);
57426        assert_eq_m128d(r, e);
57427    }
57428
57429    #[simd_test(enable = "avx512f")]
57430    unsafe fn test_mm_maskz_fmadd_round_sd() {
57431        let a = _mm_set1_pd(1.);
57432        let b = _mm_set1_pd(2.);
57433        let c = _mm_set1_pd(3.);
57434        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57435            0, a, b, c,
57436        );
57437        let e = _mm_set_pd(1., 0.);
57438        assert_eq_m128d(r, e);
57439        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57440            0b11111111, a, b, c,
57441        );
57442        let e = _mm_set_pd(1., 5.);
57443        assert_eq_m128d(r, e);
57444    }
57445
57446    #[simd_test(enable = "avx512f")]
57447    unsafe fn test_mm_mask3_fmadd_round_sd() {
57448        let a = _mm_set1_pd(1.);
57449        let b = _mm_set1_pd(2.);
57450        let c = _mm_set1_pd(3.);
57451        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57452            a, b, c, 0,
57453        );
57454        assert_eq_m128d(r, c);
57455        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57456            a, b, c, 0b11111111,
57457        );
57458        let e = _mm_set_pd(3., 5.);
57459        assert_eq_m128d(r, e);
57460    }
57461
57462    #[simd_test(enable = "avx512f")]
57463    unsafe fn test_mm_fmsub_round_ss() {
57464        let a = _mm_set1_ps(1.);
57465        let b = _mm_set1_ps(2.);
57466        let c = _mm_set1_ps(3.);
57467        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57468        let e = _mm_set_ps(1., 1., 1., -1.);
57469        assert_eq_m128(r, e);
57470    }
57471
57472    #[simd_test(enable = "avx512f")]
57473    unsafe fn test_mm_mask_fmsub_round_ss() {
57474        let a = _mm_set1_ps(1.);
57475        let b = _mm_set1_ps(2.);
57476        let c = _mm_set1_ps(3.);
57477        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57478            a, 0, b, c,
57479        );
57480        assert_eq_m128(r, a);
57481        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57482            a, 0b11111111, b, c,
57483        );
57484        let e = _mm_set_ps(1., 1., 1., -1.);
57485        assert_eq_m128(r, e);
57486    }
57487
57488    #[simd_test(enable = "avx512f")]
57489    unsafe fn test_mm_maskz_fmsub_round_ss() {
57490        let a = _mm_set1_ps(1.);
57491        let b = _mm_set1_ps(2.);
57492        let c = _mm_set1_ps(3.);
57493        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57494            0, a, b, c,
57495        );
57496        let e = _mm_set_ps(1., 1., 1., 0.);
57497        assert_eq_m128(r, e);
57498        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57499            0b11111111, a, b, c,
57500        );
57501        let e = _mm_set_ps(1., 1., 1., -1.);
57502        assert_eq_m128(r, e);
57503    }
57504
57505    #[simd_test(enable = "avx512f")]
57506    unsafe fn test_mm_mask3_fmsub_round_ss() {
57507        let a = _mm_set1_ps(1.);
57508        let b = _mm_set1_ps(2.);
57509        let c = _mm_set1_ps(3.);
57510        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57511            a, b, c, 0,
57512        );
57513        assert_eq_m128(r, c);
57514        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57515            a, b, c, 0b11111111,
57516        );
57517        let e = _mm_set_ps(3., 3., 3., -1.);
57518        assert_eq_m128(r, e);
57519    }
57520
57521    #[simd_test(enable = "avx512f")]
57522    unsafe fn test_mm_fmsub_round_sd() {
57523        let a = _mm_set1_pd(1.);
57524        let b = _mm_set1_pd(2.);
57525        let c = _mm_set1_pd(3.);
57526        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57527        let e = _mm_set_pd(1., -1.);
57528        assert_eq_m128d(r, e);
57529    }
57530
57531    #[simd_test(enable = "avx512f")]
57532    unsafe fn test_mm_mask_fmsub_round_sd() {
57533        let a = _mm_set1_pd(1.);
57534        let b = _mm_set1_pd(2.);
57535        let c = _mm_set1_pd(3.);
57536        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57537            a, 0, b, c,
57538        );
57539        assert_eq_m128d(r, a);
57540        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57541            a, 0b11111111, b, c,
57542        );
57543        let e = _mm_set_pd(1., -1.);
57544        assert_eq_m128d(r, e);
57545    }
57546
57547    #[simd_test(enable = "avx512f")]
57548    unsafe fn test_mm_maskz_fmsub_round_sd() {
57549        let a = _mm_set1_pd(1.);
57550        let b = _mm_set1_pd(2.);
57551        let c = _mm_set1_pd(3.);
57552        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57553            0, a, b, c,
57554        );
57555        let e = _mm_set_pd(1., 0.);
57556        assert_eq_m128d(r, e);
57557        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57558            0b11111111, a, b, c,
57559        );
57560        let e = _mm_set_pd(1., -1.);
57561        assert_eq_m128d(r, e);
57562    }
57563
57564    #[simd_test(enable = "avx512f")]
57565    unsafe fn test_mm_mask3_fmsub_round_sd() {
57566        let a = _mm_set1_pd(1.);
57567        let b = _mm_set1_pd(2.);
57568        let c = _mm_set1_pd(3.);
57569        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57570            a, b, c, 0,
57571        );
57572        assert_eq_m128d(r, c);
57573        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57574            a, b, c, 0b11111111,
57575        );
57576        let e = _mm_set_pd(3., -1.);
57577        assert_eq_m128d(r, e);
57578    }
57579
57580    #[simd_test(enable = "avx512f")]
57581    unsafe fn test_mm_fnmadd_round_ss() {
57582        let a = _mm_set1_ps(1.);
57583        let b = _mm_set1_ps(2.);
57584        let c = _mm_set1_ps(3.);
57585        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57586        let e = _mm_set_ps(1., 1., 1., 1.);
57587        assert_eq_m128(r, e);
57588    }
57589
57590    #[simd_test(enable = "avx512f")]
57591    unsafe fn test_mm_mask_fnmadd_round_ss() {
57592        let a = _mm_set1_ps(1.);
57593        let b = _mm_set1_ps(2.);
57594        let c = _mm_set1_ps(3.);
57595        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57596            a, 0, b, c,
57597        );
57598        assert_eq_m128(r, a);
57599        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57600            a, 0b11111111, b, c,
57601        );
57602        let e = _mm_set_ps(1., 1., 1., 1.);
57603        assert_eq_m128(r, e);
57604    }
57605
57606    #[simd_test(enable = "avx512f")]
57607    unsafe fn test_mm_maskz_fnmadd_round_ss() {
57608        let a = _mm_set1_ps(1.);
57609        let b = _mm_set1_ps(2.);
57610        let c = _mm_set1_ps(3.);
57611        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57612            0, a, b, c,
57613        );
57614        let e = _mm_set_ps(1., 1., 1., 0.);
57615        assert_eq_m128(r, e);
57616        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57617            0b11111111, a, b, c,
57618        );
57619        let e = _mm_set_ps(1., 1., 1., 1.);
57620        assert_eq_m128(r, e);
57621    }
57622
57623    #[simd_test(enable = "avx512f")]
57624    unsafe fn test_mm_mask3_fnmadd_round_ss() {
57625        let a = _mm_set1_ps(1.);
57626        let b = _mm_set1_ps(2.);
57627        let c = _mm_set1_ps(3.);
57628        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57629            a, b, c, 0,
57630        );
57631        assert_eq_m128(r, c);
57632        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57633            a, b, c, 0b11111111,
57634        );
57635        let e = _mm_set_ps(3., 3., 3., 1.);
57636        assert_eq_m128(r, e);
57637    }
57638
57639    #[simd_test(enable = "avx512f")]
57640    unsafe fn test_mm_fnmadd_round_sd() {
57641        let a = _mm_set1_pd(1.);
57642        let b = _mm_set1_pd(2.);
57643        let c = _mm_set1_pd(3.);
57644        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57645        let e = _mm_set_pd(1., 1.);
57646        assert_eq_m128d(r, e);
57647    }
57648
57649    #[simd_test(enable = "avx512f")]
57650    unsafe fn test_mm_mask_fnmadd_round_sd() {
57651        let a = _mm_set1_pd(1.);
57652        let b = _mm_set1_pd(2.);
57653        let c = _mm_set1_pd(3.);
57654        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57655            a, 0, b, c,
57656        );
57657        assert_eq_m128d(r, a);
57658        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57659            a, 0b11111111, b, c,
57660        );
57661        let e = _mm_set_pd(1., 1.);
57662        assert_eq_m128d(r, e);
57663    }
57664
57665    #[simd_test(enable = "avx512f")]
57666    unsafe fn test_mm_maskz_fnmadd_round_sd() {
57667        let a = _mm_set1_pd(1.);
57668        let b = _mm_set1_pd(2.);
57669        let c = _mm_set1_pd(3.);
57670        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57671            0, a, b, c,
57672        );
57673        let e = _mm_set_pd(1., 0.);
57674        assert_eq_m128d(r, e);
57675        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57676            0b11111111, a, b, c,
57677        );
57678        let e = _mm_set_pd(1., 1.);
57679        assert_eq_m128d(r, e);
57680    }
57681
57682    #[simd_test(enable = "avx512f")]
57683    unsafe fn test_mm_mask3_fnmadd_round_sd() {
57684        let a = _mm_set1_pd(1.);
57685        let b = _mm_set1_pd(2.);
57686        let c = _mm_set1_pd(3.);
57687        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57688            a, b, c, 0,
57689        );
57690        assert_eq_m128d(r, c);
57691        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57692            a, b, c, 0b11111111,
57693        );
57694        let e = _mm_set_pd(3., 1.);
57695        assert_eq_m128d(r, e);
57696    }
57697
57698    #[simd_test(enable = "avx512f")]
57699    unsafe fn test_mm_fnmsub_round_ss() {
57700        let a = _mm_set1_ps(1.);
57701        let b = _mm_set1_ps(2.);
57702        let c = _mm_set1_ps(3.);
57703        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57704        let e = _mm_set_ps(1., 1., 1., -5.);
57705        assert_eq_m128(r, e);
57706    }
57707
57708    #[simd_test(enable = "avx512f")]
57709    unsafe fn test_mm_mask_fnmsub_round_ss() {
57710        let a = _mm_set1_ps(1.);
57711        let b = _mm_set1_ps(2.);
57712        let c = _mm_set1_ps(3.);
57713        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57714            a, 0, b, c,
57715        );
57716        assert_eq_m128(r, a);
57717        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57718            a, 0b11111111, b, c,
57719        );
57720        let e = _mm_set_ps(1., 1., 1., -5.);
57721        assert_eq_m128(r, e);
57722    }
57723
57724    #[simd_test(enable = "avx512f")]
57725    unsafe fn test_mm_maskz_fnmsub_round_ss() {
57726        let a = _mm_set1_ps(1.);
57727        let b = _mm_set1_ps(2.);
57728        let c = _mm_set1_ps(3.);
57729        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57730            0, a, b, c,
57731        );
57732        let e = _mm_set_ps(1., 1., 1., 0.);
57733        assert_eq_m128(r, e);
57734        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57735            0b11111111, a, b, c,
57736        );
57737        let e = _mm_set_ps(1., 1., 1., -5.);
57738        assert_eq_m128(r, e);
57739    }
57740
57741    #[simd_test(enable = "avx512f")]
57742    unsafe fn test_mm_mask3_fnmsub_round_ss() {
57743        let a = _mm_set1_ps(1.);
57744        let b = _mm_set1_ps(2.);
57745        let c = _mm_set1_ps(3.);
57746        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57747            a, b, c, 0,
57748        );
57749        assert_eq_m128(r, c);
57750        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57751            a, b, c, 0b11111111,
57752        );
57753        let e = _mm_set_ps(3., 3., 3., -5.);
57754        assert_eq_m128(r, e);
57755    }
57756
57757    #[simd_test(enable = "avx512f")]
57758    unsafe fn test_mm_fnmsub_round_sd() {
57759        let a = _mm_set1_pd(1.);
57760        let b = _mm_set1_pd(2.);
57761        let c = _mm_set1_pd(3.);
57762        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
57763        let e = _mm_set_pd(1., -5.);
57764        assert_eq_m128d(r, e);
57765    }
57766
57767    #[simd_test(enable = "avx512f")]
57768    unsafe fn test_mm_mask_fnmsub_round_sd() {
57769        let a = _mm_set1_pd(1.);
57770        let b = _mm_set1_pd(2.);
57771        let c = _mm_set1_pd(3.);
57772        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57773            a, 0, b, c,
57774        );
57775        assert_eq_m128d(r, a);
57776        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57777            a, 0b11111111, b, c,
57778        );
57779        let e = _mm_set_pd(1., -5.);
57780        assert_eq_m128d(r, e);
57781    }
57782
57783    #[simd_test(enable = "avx512f")]
57784    unsafe fn test_mm_maskz_fnmsub_round_sd() {
57785        let a = _mm_set1_pd(1.);
57786        let b = _mm_set1_pd(2.);
57787        let c = _mm_set1_pd(3.);
57788        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57789            0, a, b, c,
57790        );
57791        let e = _mm_set_pd(1., 0.);
57792        assert_eq_m128d(r, e);
57793        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57794            0b11111111, a, b, c,
57795        );
57796        let e = _mm_set_pd(1., -5.);
57797        assert_eq_m128d(r, e);
57798    }
57799
57800    #[simd_test(enable = "avx512f")]
57801    unsafe fn test_mm_mask3_fnmsub_round_sd() {
57802        let a = _mm_set1_pd(1.);
57803        let b = _mm_set1_pd(2.);
57804        let c = _mm_set1_pd(3.);
57805        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57806            a, b, c, 0,
57807        );
57808        assert_eq_m128d(r, c);
57809        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
57810            a, b, c, 0b11111111,
57811        );
57812        let e = _mm_set_pd(3., -5.);
57813        assert_eq_m128d(r, e);
57814    }
57815
57816    #[simd_test(enable = "avx512f")]
57817    unsafe fn test_mm_fixupimm_ss() {
57818        let a = _mm_set_ps(0., 0., 0., f32::NAN);
57819        let b = _mm_set1_ps(f32::MAX);
57820        let c = _mm_set1_epi32(i32::MAX);
57821        let r = _mm_fixupimm_ss::<5>(a, b, c);
57822        let e = _mm_set_ps(0., 0., 0., -0.0);
57823        assert_eq_m128(r, e);
57824    }
57825
57826    #[simd_test(enable = "avx512f")]
57827    unsafe fn test_mm_mask_fixupimm_ss() {
57828        let a = _mm_set_ps(0., 0., 0., f32::NAN);
57829        let b = _mm_set1_ps(f32::MAX);
57830        let c = _mm_set1_epi32(i32::MAX);
57831        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
57832        let e = _mm_set_ps(0., 0., 0., -0.0);
57833        assert_eq_m128(r, e);
57834    }
57835
57836    #[simd_test(enable = "avx512f")]
57837    unsafe fn test_mm_maskz_fixupimm_ss() {
57838        let a = _mm_set_ps(0., 0., 0., f32::NAN);
57839        let b = _mm_set1_ps(f32::MAX);
57840        let c = _mm_set1_epi32(i32::MAX);
57841        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
57842        let e = _mm_set_ps(0., 0., 0., 0.0);
57843        assert_eq_m128(r, e);
57844        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
57845        let e = _mm_set_ps(0., 0., 0., -0.0);
57846        assert_eq_m128(r, e);
57847    }
57848
57849    #[simd_test(enable = "avx512f")]
57850    unsafe fn test_mm_fixupimm_sd() {
57851        let a = _mm_set_pd(0., f64::NAN);
57852        let b = _mm_set1_pd(f64::MAX);
57853        let c = _mm_set1_epi64x(i32::MAX as i64);
57854        let r = _mm_fixupimm_sd::<5>(a, b, c);
57855        let e = _mm_set_pd(0., -0.0);
57856        assert_eq_m128d(r, e);
57857    }
57858
57859    #[simd_test(enable = "avx512f")]
57860    unsafe fn test_mm_mask_fixupimm_sd() {
57861        let a = _mm_set_pd(0., f64::NAN);
57862        let b = _mm_set1_pd(f64::MAX);
57863        let c = _mm_set1_epi64x(i32::MAX as i64);
57864        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
57865        let e = _mm_set_pd(0., -0.0);
57866        assert_eq_m128d(r, e);
57867    }
57868
57869    #[simd_test(enable = "avx512f")]
57870    unsafe fn test_mm_maskz_fixupimm_sd() {
57871        let a = _mm_set_pd(0., f64::NAN);
57872        let b = _mm_set1_pd(f64::MAX);
57873        let c = _mm_set1_epi64x(i32::MAX as i64);
57874        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
57875        let e = _mm_set_pd(0., 0.0);
57876        assert_eq_m128d(r, e);
57877        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
57878        let e = _mm_set_pd(0., -0.0);
57879        assert_eq_m128d(r, e);
57880    }
57881
57882    #[simd_test(enable = "avx512f")]
57883    unsafe fn test_mm_fixupimm_round_ss() {
57884        let a = _mm_set_ps(1., 0., 0., f32::NAN);
57885        let b = _mm_set1_ps(f32::MAX);
57886        let c = _mm_set1_epi32(i32::MAX);
57887        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
57888        let e = _mm_set_ps(1., 0., 0., -0.0);
57889        assert_eq_m128(r, e);
57890    }
57891
57892    #[simd_test(enable = "avx512f")]
57893    unsafe fn test_mm_mask_fixupimm_round_ss() {
57894        let a = _mm_set_ps(0., 0., 0., f32::NAN);
57895        let b = _mm_set1_ps(f32::MAX);
57896        let c = _mm_set1_epi32(i32::MAX);
57897        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
57898        let e = _mm_set_ps(0., 0., 0., -0.0);
57899        assert_eq_m128(r, e);
57900    }
57901
57902    #[simd_test(enable = "avx512f")]
57903    unsafe fn test_mm_maskz_fixupimm_round_ss() {
57904        let a = _mm_set_ps(0., 0., 0., f32::NAN);
57905        let b = _mm_set1_ps(f32::MAX);
57906        let c = _mm_set1_epi32(i32::MAX);
57907        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
57908        let e = _mm_set_ps(0., 0., 0., 0.0);
57909        assert_eq_m128(r, e);
57910        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
57911        let e = _mm_set_ps(0., 0., 0., -0.0);
57912        assert_eq_m128(r, e);
57913    }
57914
57915    #[simd_test(enable = "avx512f")]
57916    unsafe fn test_mm_fixupimm_round_sd() {
57917        let a = _mm_set_pd(0., f64::NAN);
57918        let b = _mm_set1_pd(f64::MAX);
57919        let c = _mm_set1_epi64x(i32::MAX as i64);
57920        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
57921        let e = _mm_set_pd(0., -0.0);
57922        assert_eq_m128d(r, e);
57923    }
57924
57925    #[simd_test(enable = "avx512f")]
57926    unsafe fn test_mm_mask_fixupimm_round_sd() {
57927        let a = _mm_set_pd(0., f64::NAN);
57928        let b = _mm_set1_pd(f64::MAX);
57929        let c = _mm_set1_epi64x(i32::MAX as i64);
57930        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
57931        let e = _mm_set_pd(0., -0.0);
57932        assert_eq_m128d(r, e);
57933    }
57934
57935    #[simd_test(enable = "avx512f")]
57936    unsafe fn test_mm_maskz_fixupimm_round_sd() {
57937        let a = _mm_set_pd(0., f64::NAN);
57938        let b = _mm_set1_pd(f64::MAX);
57939        let c = _mm_set1_epi64x(i32::MAX as i64);
57940        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
57941        let e = _mm_set_pd(0., 0.0);
57942        assert_eq_m128d(r, e);
57943        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
57944        let e = _mm_set_pd(0., -0.0);
57945        assert_eq_m128d(r, e);
57946    }
57947
57948    #[simd_test(enable = "avx512f")]
57949    unsafe fn test_mm_mask_cvtss_sd() {
57950        let a = _mm_set_pd(6., -7.5);
57951        let b = _mm_set_ps(0., -0.5, 1., -1.5);
57952        let r = _mm_mask_cvtss_sd(a, 0, a, b);
57953        assert_eq_m128d(r, a);
57954        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
57955        let e = _mm_set_pd(6., -1.5);
57956        assert_eq_m128d(r, e);
57957    }
57958
57959    #[simd_test(enable = "avx512f")]
57960    unsafe fn test_mm_maskz_cvtss_sd() {
57961        let a = _mm_set_pd(6., -7.5);
57962        let b = _mm_set_ps(0., -0.5, 1., -1.5);
57963        let r = _mm_maskz_cvtss_sd(0, a, b);
57964        let e = _mm_set_pd(6., 0.);
57965        assert_eq_m128d(r, e);
57966        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
57967        let e = _mm_set_pd(6., -1.5);
57968        assert_eq_m128d(r, e);
57969    }
57970
57971    #[simd_test(enable = "avx512f")]
57972    unsafe fn test_mm_mask_cvtsd_ss() {
57973        let a = _mm_set_ps(0., -0.5, 1., -1.5);
57974        let b = _mm_set_pd(6., -7.5);
57975        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
57976        assert_eq_m128(r, a);
57977        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
57978        let e = _mm_set_ps(0., -0.5, 1., -7.5);
57979        assert_eq_m128(r, e);
57980    }
57981
57982    #[simd_test(enable = "avx512f")]
57983    unsafe fn test_mm_maskz_cvtsd_ss() {
57984        let a = _mm_set_ps(0., -0.5, 1., -1.5);
57985        let b = _mm_set_pd(6., -7.5);
57986        let r = _mm_maskz_cvtsd_ss(0, a, b);
57987        let e = _mm_set_ps(0., -0.5, 1., 0.);
57988        assert_eq_m128(r, e);
57989        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
57990        let e = _mm_set_ps(0., -0.5, 1., -7.5);
57991        assert_eq_m128(r, e);
57992    }
57993
57994    #[simd_test(enable = "avx512f")]
57995    unsafe fn test_mm_cvt_roundss_sd() {
57996        let a = _mm_set_pd(6., -7.5);
57997        let b = _mm_set_ps(0., -0.5, 1., -1.5);
57998        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
57999        let e = _mm_set_pd(6., -1.5);
58000        assert_eq_m128d(r, e);
58001    }
58002
58003    #[simd_test(enable = "avx512f")]
58004    unsafe fn test_mm_mask_cvt_roundss_sd() {
58005        let a = _mm_set_pd(6., -7.5);
58006        let b = _mm_set_ps(0., -0.5, 1., -1.5);
58007        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58008        assert_eq_m128d(r, a);
58009        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58010        let e = _mm_set_pd(6., -1.5);
58011        assert_eq_m128d(r, e);
58012    }
58013
58014    #[simd_test(enable = "avx512f")]
58015    unsafe fn test_mm_maskz_cvt_roundss_sd() {
58016        let a = _mm_set_pd(6., -7.5);
58017        let b = _mm_set_ps(0., -0.5, 1., -1.5);
58018        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58019        let e = _mm_set_pd(6., 0.);
58020        assert_eq_m128d(r, e);
58021        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58022        let e = _mm_set_pd(6., -1.5);
58023        assert_eq_m128d(r, e);
58024    }
58025
58026    #[simd_test(enable = "avx512f")]
58027    unsafe fn test_mm_cvt_roundsd_ss() {
58028        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58029        let b = _mm_set_pd(6., -7.5);
58030        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58031        let e = _mm_set_ps(0., -0.5, 1., -7.5);
58032        assert_eq_m128(r, e);
58033    }
58034
58035    #[simd_test(enable = "avx512f")]
58036    unsafe fn test_mm_mask_cvt_roundsd_ss() {
58037        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58038        let b = _mm_set_pd(6., -7.5);
58039        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
58040        assert_eq_m128(r, a);
58041        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58042            a, 0b11111111, a, b,
58043        );
58044        let e = _mm_set_ps(0., -0.5, 1., -7.5);
58045        assert_eq_m128(r, e);
58046    }
58047
58048    #[simd_test(enable = "avx512f")]
58049    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
58050        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58051        let b = _mm_set_pd(6., -7.5);
58052        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58053        let e = _mm_set_ps(0., -0.5, 1., 0.);
58054        assert_eq_m128(r, e);
58055        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58056            0b11111111, a, b,
58057        );
58058        let e = _mm_set_ps(0., -0.5, 1., -7.5);
58059        assert_eq_m128(r, e);
58060    }
58061
58062    #[simd_test(enable = "avx512f")]
58063    unsafe fn test_mm_cvt_roundss_si32() {
58064        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58065        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58066        let e: i32 = -1;
58067        assert_eq!(r, e);
58068    }
58069
58070    #[simd_test(enable = "avx512f")]
58071    unsafe fn test_mm_cvt_roundss_i32() {
58072        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58073        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58074        let e: i32 = -1;
58075        assert_eq!(r, e);
58076    }
58077
58078    #[simd_test(enable = "avx512f")]
58079    unsafe fn test_mm_cvt_roundss_u32() {
58080        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58081        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58082        let e: u32 = u32::MAX;
58083        assert_eq!(r, e);
58084    }
58085
58086    #[simd_test(enable = "avx512f")]
58087    unsafe fn test_mm_cvtss_i32() {
58088        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58089        let r = _mm_cvtss_i32(a);
58090        let e: i32 = -2;
58091        assert_eq!(r, e);
58092    }
58093
58094    #[simd_test(enable = "avx512f")]
58095    unsafe fn test_mm_cvtss_u32() {
58096        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58097        let r = _mm_cvtss_u32(a);
58098        let e: u32 = u32::MAX;
58099        assert_eq!(r, e);
58100    }
58101
58102    #[simd_test(enable = "avx512f")]
58103    unsafe fn test_mm_cvt_roundsd_si32() {
58104        let a = _mm_set_pd(1., -1.5);
58105        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58106        let e: i32 = -1;
58107        assert_eq!(r, e);
58108    }
58109
58110    #[simd_test(enable = "avx512f")]
58111    unsafe fn test_mm_cvt_roundsd_i32() {
58112        let a = _mm_set_pd(1., -1.5);
58113        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58114        let e: i32 = -1;
58115        assert_eq!(r, e);
58116    }
58117
58118    #[simd_test(enable = "avx512f")]
58119    unsafe fn test_mm_cvt_roundsd_u32() {
58120        let a = _mm_set_pd(1., -1.5);
58121        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
58122        let e: u32 = u32::MAX;
58123        assert_eq!(r, e);
58124    }
58125
58126    #[simd_test(enable = "avx512f")]
58127    unsafe fn test_mm_cvtsd_i32() {
58128        let a = _mm_set_pd(1., -1.5);
58129        let r = _mm_cvtsd_i32(a);
58130        let e: i32 = -2;
58131        assert_eq!(r, e);
58132    }
58133
58134    #[simd_test(enable = "avx512f")]
58135    unsafe fn test_mm_cvtsd_u32() {
58136        let a = _mm_set_pd(1., -1.5);
58137        let r = _mm_cvtsd_u32(a);
58138        let e: u32 = u32::MAX;
58139        assert_eq!(r, e);
58140    }
58141
58142    #[simd_test(enable = "avx512f")]
58143    unsafe fn test_mm_cvt_roundi32_ss() {
58144        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58145        let b: i32 = 9;
58146        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58147        let e = _mm_set_ps(0., -0.5, 1., 9.);
58148        assert_eq_m128(r, e);
58149    }
58150
58151    #[simd_test(enable = "avx512f")]
58152    unsafe fn test_mm_cvt_roundsi32_ss() {
58153        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58154        let b: i32 = 9;
58155        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58156        let e = _mm_set_ps(0., -0.5, 1., 9.);
58157        assert_eq_m128(r, e);
58158    }
58159
58160    #[simd_test(enable = "avx512f")]
58161    unsafe fn test_mm_cvt_roundu32_ss() {
58162        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58163        let b: u32 = 9;
58164        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58165        let e = _mm_set_ps(0., -0.5, 1., 9.);
58166        assert_eq_m128(r, e);
58167    }
58168
58169    #[simd_test(enable = "avx512f")]
58170    unsafe fn test_mm_cvti32_ss() {
58171        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58172        let b: i32 = 9;
58173        let r = _mm_cvti32_ss(a, b);
58174        let e = _mm_set_ps(0., -0.5, 1., 9.);
58175        assert_eq_m128(r, e);
58176    }
58177
58178    #[simd_test(enable = "avx512f")]
58179    unsafe fn test_mm_cvti32_sd() {
58180        let a = _mm_set_pd(1., -1.5);
58181        let b: i32 = 9;
58182        let r = _mm_cvti32_sd(a, b);
58183        let e = _mm_set_pd(1., 9.);
58184        assert_eq_m128d(r, e);
58185    }
58186
58187    #[simd_test(enable = "avx512f")]
58188    unsafe fn test_mm_cvtt_roundss_si32() {
58189        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58190        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
58191        let e: i32 = -1;
58192        assert_eq!(r, e);
58193    }
58194
58195    #[simd_test(enable = "avx512f")]
58196    unsafe fn test_mm_cvtt_roundss_i32() {
58197        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58198        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
58199        let e: i32 = -1;
58200        assert_eq!(r, e);
58201    }
58202
58203    #[simd_test(enable = "avx512f")]
58204    unsafe fn test_mm_cvtt_roundss_u32() {
58205        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58206        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
58207        let e: u32 = u32::MAX;
58208        assert_eq!(r, e);
58209    }
58210
58211    #[simd_test(enable = "avx512f")]
58212    unsafe fn test_mm_cvttss_i32() {
58213        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58214        let r = _mm_cvttss_i32(a);
58215        let e: i32 = -1;
58216        assert_eq!(r, e);
58217    }
58218
58219    #[simd_test(enable = "avx512f")]
58220    unsafe fn test_mm_cvttss_u32() {
58221        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58222        let r = _mm_cvttss_u32(a);
58223        let e: u32 = u32::MAX;
58224        assert_eq!(r, e);
58225    }
58226
58227    #[simd_test(enable = "avx512f")]
58228    unsafe fn test_mm_cvtt_roundsd_si32() {
58229        let a = _mm_set_pd(1., -1.5);
58230        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
58231        let e: i32 = -1;
58232        assert_eq!(r, e);
58233    }
58234
58235    #[simd_test(enable = "avx512f")]
58236    unsafe fn test_mm_cvtt_roundsd_i32() {
58237        let a = _mm_set_pd(1., -1.5);
58238        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
58239        let e: i32 = -1;
58240        assert_eq!(r, e);
58241    }
58242
58243    #[simd_test(enable = "avx512f")]
58244    unsafe fn test_mm_cvtt_roundsd_u32() {
58245        let a = _mm_set_pd(1., -1.5);
58246        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
58247        let e: u32 = u32::MAX;
58248        assert_eq!(r, e);
58249    }
58250
58251    #[simd_test(enable = "avx512f")]
58252    unsafe fn test_mm_cvttsd_i32() {
58253        let a = _mm_set_pd(1., -1.5);
58254        let r = _mm_cvttsd_i32(a);
58255        let e: i32 = -1;
58256        assert_eq!(r, e);
58257    }
58258
58259    #[simd_test(enable = "avx512f")]
58260    unsafe fn test_mm_cvttsd_u32() {
58261        let a = _mm_set_pd(1., -1.5);
58262        let r = _mm_cvttsd_u32(a);
58263        let e: u32 = u32::MAX;
58264        assert_eq!(r, e);
58265    }
58266
58267    #[simd_test(enable = "avx512f")]
58268    unsafe fn test_mm_cvtu32_ss() {
58269        let a = _mm_set_ps(0., -0.5, 1., -1.5);
58270        let b: u32 = 9;
58271        let r = _mm_cvtu32_ss(a, b);
58272        let e = _mm_set_ps(0., -0.5, 1., 9.);
58273        assert_eq_m128(r, e);
58274    }
58275
58276    #[simd_test(enable = "avx512f")]
58277    unsafe fn test_mm_cvtu32_sd() {
58278        let a = _mm_set_pd(1., -1.5);
58279        let b: u32 = 9;
58280        let r = _mm_cvtu32_sd(a, b);
58281        let e = _mm_set_pd(1., 9.);
58282        assert_eq_m128d(r, e);
58283    }
58284
58285    #[simd_test(enable = "avx512f")]
58286    unsafe fn test_mm_comi_round_ss() {
58287        let a = _mm_set1_ps(2.2);
58288        let b = _mm_set1_ps(1.1);
58289        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
58290        let e: i32 = 0;
58291        assert_eq!(r, e);
58292    }
58293
58294    #[simd_test(enable = "avx512f")]
58295    unsafe fn test_mm_comi_round_sd() {
58296        let a = _mm_set1_pd(2.2);
58297        let b = _mm_set1_pd(1.1);
58298        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
58299        let e: i32 = 0;
58300        assert_eq!(r, e);
58301    }
58302
58303    #[simd_test(enable = "avx512f")]
58304    unsafe fn test_mm512_cvtsi512_si32() {
58305        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
58306        let r = _mm512_cvtsi512_si32(a);
58307        let e: i32 = 1;
58308        assert_eq!(r, e);
58309    }
58310
58311    #[simd_test(enable = "avx512f")]
58312    unsafe fn test_mm512_cvtss_f32() {
58313        let a = _mm512_setr_ps(
58314            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
58315        );
58316        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
58317    }
58318
58319    #[simd_test(enable = "avx512f")]
58320    unsafe fn test_mm512_cvtsd_f64() {
58321        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
58322        assert_eq!(r, -1.1);
58323    }
58324
58325    #[simd_test(enable = "avx512f")]
58326    unsafe fn test_mm512_shuffle_pd() {
58327        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
58328        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
58329        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
58330        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
58331        assert_eq_m512d(r, e);
58332    }
58333
58334    #[simd_test(enable = "avx512f")]
58335    unsafe fn test_mm512_mask_shuffle_pd() {
58336        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
58337        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
58338        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
58339        assert_eq_m512d(r, a);
58340        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
58341        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
58342        assert_eq_m512d(r, e);
58343    }
58344
58345    #[simd_test(enable = "avx512f")]
58346    unsafe fn test_mm512_maskz_shuffle_pd() {
58347        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
58348        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
58349        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
58350        assert_eq_m512d(r, _mm512_setzero_pd());
58351        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
58352        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
58353        assert_eq_m512d(r, e);
58354    }
58355
58356    #[simd_test(enable = "avx512f")]
58357    unsafe fn test_mm512_mask_expandloadu_epi32() {
58358        let src = _mm512_set1_epi32(42);
58359        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
58360        let p = a.as_ptr();
58361        let m = 0b11101000_11001010;
58362        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
58363        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
58364        assert_eq_m512i(r, e);
58365    }
58366
58367    #[simd_test(enable = "avx512f")]
58368    unsafe fn test_mm512_maskz_expandloadu_epi32() {
58369        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
58370        let p = a.as_ptr();
58371        let m = 0b11101000_11001010;
58372        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
58373        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
58374        assert_eq_m512i(r, e);
58375    }
58376
58377    #[simd_test(enable = "avx512f,avx512vl")]
58378    unsafe fn test_mm256_mask_expandloadu_epi32() {
58379        let src = _mm256_set1_epi32(42);
58380        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
58381        let p = a.as_ptr();
58382        let m = 0b11101000;
58383        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
58384        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
58385        assert_eq_m256i(r, e);
58386    }
58387
58388    #[simd_test(enable = "avx512f,avx512vl")]
58389    unsafe fn test_mm256_maskz_expandloadu_epi32() {
58390        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
58391        let p = a.as_ptr();
58392        let m = 0b11101000;
58393        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
58394        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
58395        assert_eq_m256i(r, e);
58396    }
58397
58398    #[simd_test(enable = "avx512f,avx512vl")]
58399    unsafe fn test_mm_mask_expandloadu_epi32() {
58400        let src = _mm_set1_epi32(42);
58401        let a = &[1_i32, 2, 3, 4];
58402        let p = a.as_ptr();
58403        let m = 0b11111000;
58404        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
58405        let e = _mm_set_epi32(1, 42, 42, 42);
58406        assert_eq_m128i(r, e);
58407    }
58408
58409    #[simd_test(enable = "avx512f,avx512vl")]
58410    unsafe fn test_mm_maskz_expandloadu_epi32() {
58411        let a = &[1_i32, 2, 3, 4];
58412        let p = a.as_ptr();
58413        let m = 0b11111000;
58414        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
58415        let e = _mm_set_epi32(1, 0, 0, 0);
58416        assert_eq_m128i(r, e);
58417    }
58418
58419    #[simd_test(enable = "avx512f")]
58420    unsafe fn test_mm512_mask_expandloadu_epi64() {
58421        let src = _mm512_set1_epi64(42);
58422        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
58423        let p = a.as_ptr();
58424        let m = 0b11101000;
58425        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
58426        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
58427        assert_eq_m512i(r, e);
58428    }
58429
58430    #[simd_test(enable = "avx512f")]
58431    unsafe fn test_mm512_maskz_expandloadu_epi64() {
58432        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
58433        let p = a.as_ptr();
58434        let m = 0b11101000;
58435        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
58436        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
58437        assert_eq_m512i(r, e);
58438    }
58439
58440    #[simd_test(enable = "avx512f,avx512vl")]
58441    unsafe fn test_mm256_mask_expandloadu_epi64() {
58442        let src = _mm256_set1_epi64x(42);
58443        let a = &[1_i64, 2, 3, 4];
58444        let p = a.as_ptr();
58445        let m = 0b11101000;
58446        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
58447        let e = _mm256_set_epi64x(1, 42, 42, 42);
58448        assert_eq_m256i(r, e);
58449    }
58450
58451    #[simd_test(enable = "avx512f,avx512vl")]
58452    unsafe fn test_mm256_maskz_expandloadu_epi64() {
58453        let a = &[1_i64, 2, 3, 4];
58454        let p = a.as_ptr();
58455        let m = 0b11101000;
58456        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
58457        let e = _mm256_set_epi64x(1, 0, 0, 0);
58458        assert_eq_m256i(r, e);
58459    }
58460
58461    #[simd_test(enable = "avx512f,avx512vl")]
58462    unsafe fn test_mm_mask_expandloadu_epi64() {
58463        let src = _mm_set1_epi64x(42);
58464        let a = &[1_i64, 2];
58465        let p = a.as_ptr();
58466        let m = 0b11101000;
58467        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
58468        let e = _mm_set_epi64x(42, 42);
58469        assert_eq_m128i(r, e);
58470    }
58471
58472    #[simd_test(enable = "avx512f,avx512vl")]
58473    unsafe fn test_mm_maskz_expandloadu_epi64() {
58474        let a = &[1_i64, 2];
58475        let p = a.as_ptr();
58476        let m = 0b11101000;
58477        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
58478        let e = _mm_set_epi64x(0, 0);
58479        assert_eq_m128i(r, e);
58480    }
58481
58482    #[simd_test(enable = "avx512f")]
58483    unsafe fn test_mm512_mask_expandloadu_ps() {
58484        let src = _mm512_set1_ps(42.);
58485        let a = &[
58486            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
58487        ];
58488        let p = a.as_ptr();
58489        let m = 0b11101000_11001010;
58490        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
58491        let e = _mm512_set_ps(
58492            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
58493        );
58494        assert_eq_m512(r, e);
58495    }
58496
58497    #[simd_test(enable = "avx512f")]
58498    unsafe fn test_mm512_maskz_expandloadu_ps() {
58499        let a = &[
58500            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
58501        ];
58502        let p = a.as_ptr();
58503        let m = 0b11101000_11001010;
58504        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
58505        let e = _mm512_set_ps(
58506            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
58507        );
58508        assert_eq_m512(r, e);
58509    }
58510
58511    #[simd_test(enable = "avx512f,avx512vl")]
58512    unsafe fn test_mm256_mask_expandloadu_ps() {
58513        let src = _mm256_set1_ps(42.);
58514        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
58515        let p = a.as_ptr();
58516        let m = 0b11101000;
58517        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
58518        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
58519        assert_eq_m256(r, e);
58520    }
58521
58522    #[simd_test(enable = "avx512f,avx512vl")]
58523    unsafe fn test_mm256_maskz_expandloadu_ps() {
58524        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
58525        let p = a.as_ptr();
58526        let m = 0b11101000;
58527        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
58528        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
58529        assert_eq_m256(r, e);
58530    }
58531
58532    #[simd_test(enable = "avx512f,avx512vl")]
58533    unsafe fn test_mm_mask_expandloadu_ps() {
58534        let src = _mm_set1_ps(42.);
58535        let a = &[1.0f32, 2., 3., 4.];
58536        let p = a.as_ptr();
58537        let m = 0b11101000;
58538        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
58539        let e = _mm_set_ps(1., 42., 42., 42.);
58540        assert_eq_m128(r, e);
58541    }
58542
58543    #[simd_test(enable = "avx512f,avx512vl")]
58544    unsafe fn test_mm_maskz_expandloadu_ps() {
58545        let a = &[1.0f32, 2., 3., 4.];
58546        let p = a.as_ptr();
58547        let m = 0b11101000;
58548        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
58549        let e = _mm_set_ps(1., 0., 0., 0.);
58550        assert_eq_m128(r, e);
58551    }
58552
58553    #[simd_test(enable = "avx512f")]
58554    unsafe fn test_mm512_mask_expandloadu_pd() {
58555        let src = _mm512_set1_pd(42.);
58556        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
58557        let p = a.as_ptr();
58558        let m = 0b11101000;
58559        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
58560        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
58561        assert_eq_m512d(r, e);
58562    }
58563
58564    #[simd_test(enable = "avx512f")]
58565    unsafe fn test_mm512_maskz_expandloadu_pd() {
58566        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
58567        let p = a.as_ptr();
58568        let m = 0b11101000;
58569        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
58570        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
58571        assert_eq_m512d(r, e);
58572    }
58573
58574    #[simd_test(enable = "avx512f,avx512vl")]
58575    unsafe fn test_mm256_mask_expandloadu_pd() {
58576        let src = _mm256_set1_pd(42.);
58577        let a = &[1.0f64, 2., 3., 4.];
58578        let p = a.as_ptr();
58579        let m = 0b11101000;
58580        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
58581        let e = _mm256_set_pd(1., 42., 42., 42.);
58582        assert_eq_m256d(r, e);
58583    }
58584
58585    #[simd_test(enable = "avx512f,avx512vl")]
58586    unsafe fn test_mm256_maskz_expandloadu_pd() {
58587        let a = &[1.0f64, 2., 3., 4.];
58588        let p = a.as_ptr();
58589        let m = 0b11101000;
58590        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
58591        let e = _mm256_set_pd(1., 0., 0., 0.);
58592        assert_eq_m256d(r, e);
58593    }
58594
58595    #[simd_test(enable = "avx512f,avx512vl")]
58596    unsafe fn test_mm_mask_expandloadu_pd() {
58597        let src = _mm_set1_pd(42.);
58598        let a = &[1.0f64, 2.];
58599        let p = a.as_ptr();
58600        let m = 0b11101000;
58601        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
58602        let e = _mm_set_pd(42., 42.);
58603        assert_eq_m128d(r, e);
58604    }
58605
58606    #[simd_test(enable = "avx512f,avx512vl")]
58607    unsafe fn test_mm_maskz_expandloadu_pd() {
58608        let a = &[1.0f64, 2.];
58609        let p = a.as_ptr();
58610        let m = 0b11101000;
58611        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
58612        let e = _mm_set_pd(0., 0.);
58613        assert_eq_m128d(r, e);
58614    }
58615}