1use crate::{
17 core_arch::{simd::*, x86::*},
18 intrinsics::simd::*,
19 mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34 simd_add(a, b)
35}
36
37#[inline]
42#[target_feature(enable = "avx")]
43#[cfg_attr(test, assert_instr(vaddps))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46 simd_add(a, b)
47}
48
49#[inline]
54#[target_feature(enable = "avx")]
55#[cfg_attr(test, assert_instr(vandp))]
57#[stable(feature = "simd_x86", since = "1.27.0")]
58pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
59 let a: u64x4 = transmute(a);
60 let b: u64x4 = transmute(b);
61 transmute(simd_and(a, b))
62}
63
64#[inline]
69#[target_feature(enable = "avx")]
70#[cfg_attr(test, assert_instr(vandps))]
71#[stable(feature = "simd_x86", since = "1.27.0")]
72pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
73 let a: u32x8 = transmute(a);
74 let b: u32x8 = transmute(b);
75 transmute(simd_and(a, b))
76}
77
78#[inline]
83#[target_feature(enable = "avx")]
84#[cfg_attr(test, assert_instr(vorp))]
86#[stable(feature = "simd_x86", since = "1.27.0")]
87pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
88 let a: u64x4 = transmute(a);
89 let b: u64x4 = transmute(b);
90 transmute(simd_or(a, b))
91}
92
93#[inline]
98#[target_feature(enable = "avx")]
99#[cfg_attr(test, assert_instr(vorps))]
100#[stable(feature = "simd_x86", since = "1.27.0")]
101pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
102 let a: u32x8 = transmute(a);
103 let b: u32x8 = transmute(b);
104 transmute(simd_or(a, b))
105}
106
107#[inline]
112#[target_feature(enable = "avx")]
113#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
114#[rustc_legacy_const_generics(2)]
115#[stable(feature = "simd_x86", since = "1.27.0")]
116pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
117 static_assert_uimm_bits!(MASK, 8);
118 simd_shuffle!(
119 a,
120 b,
121 [
122 MASK as u32 & 0b1,
123 ((MASK as u32 >> 1) & 0b1) + 4,
124 ((MASK as u32 >> 2) & 0b1) + 2,
125 ((MASK as u32 >> 3) & 0b1) + 6,
126 ],
127 )
128}
129
130#[inline]
135#[target_feature(enable = "avx")]
136#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
137#[rustc_legacy_const_generics(2)]
138#[stable(feature = "simd_x86", since = "1.27.0")]
139pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
140 static_assert_uimm_bits!(MASK, 8);
141 simd_shuffle!(
142 a,
143 b,
144 [
145 MASK as u32 & 0b11,
146 (MASK as u32 >> 2) & 0b11,
147 ((MASK as u32 >> 4) & 0b11) + 8,
148 ((MASK as u32 >> 6) & 0b11) + 8,
149 (MASK as u32 & 0b11) + 4,
150 ((MASK as u32 >> 2) & 0b11) + 4,
151 ((MASK as u32 >> 4) & 0b11) + 12,
152 ((MASK as u32 >> 6) & 0b11) + 12,
153 ],
154 )
155}
156
157#[inline]
162#[target_feature(enable = "avx")]
163#[cfg_attr(test, assert_instr(vandnp))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
166 let a: u64x4 = transmute(a);
167 let b: u64x4 = transmute(b);
168 transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b))
169}
170
171#[inline]
177#[target_feature(enable = "avx")]
178#[cfg_attr(test, assert_instr(vandnps))]
179#[stable(feature = "simd_x86", since = "1.27.0")]
180pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
181 let a: u32x8 = transmute(a);
182 let b: u32x8 = transmute(b);
183 transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b))
184}
185
186#[inline]
191#[target_feature(enable = "avx")]
192#[cfg_attr(test, assert_instr(vmaxpd))]
193#[stable(feature = "simd_x86", since = "1.27.0")]
194pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
195 vmaxpd(a, b)
196}
197
198#[inline]
203#[target_feature(enable = "avx")]
204#[cfg_attr(test, assert_instr(vmaxps))]
205#[stable(feature = "simd_x86", since = "1.27.0")]
206pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
207 vmaxps(a, b)
208}
209
210#[inline]
215#[target_feature(enable = "avx")]
216#[cfg_attr(test, assert_instr(vminpd))]
217#[stable(feature = "simd_x86", since = "1.27.0")]
218pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
219 vminpd(a, b)
220}
221
222#[inline]
227#[target_feature(enable = "avx")]
228#[cfg_attr(test, assert_instr(vminps))]
229#[stable(feature = "simd_x86", since = "1.27.0")]
230pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
231 vminps(a, b)
232}
233
234#[inline]
239#[target_feature(enable = "avx")]
240#[cfg_attr(test, assert_instr(vmulpd))]
241#[stable(feature = "simd_x86", since = "1.27.0")]
242pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
243 simd_mul(a, b)
244}
245
246#[inline]
251#[target_feature(enable = "avx")]
252#[cfg_attr(test, assert_instr(vmulps))]
253#[stable(feature = "simd_x86", since = "1.27.0")]
254pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
255 simd_mul(a, b)
256}
257
258#[inline]
263#[target_feature(enable = "avx")]
264#[cfg_attr(test, assert_instr(vaddsubpd))]
265#[stable(feature = "simd_x86", since = "1.27.0")]
266pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
267 let a = a.as_f64x4();
268 let b = b.as_f64x4();
269 let add = simd_add(a, b);
270 let sub = simd_sub(a, b);
271 simd_shuffle!(add, sub, [4, 1, 6, 3])
272}
273
274#[inline]
279#[target_feature(enable = "avx")]
280#[cfg_attr(test, assert_instr(vaddsubps))]
281#[stable(feature = "simd_x86", since = "1.27.0")]
282pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
283 let a = a.as_f32x8();
284 let b = b.as_f32x8();
285 let add = simd_add(a, b);
286 let sub = simd_sub(a, b);
287 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
288}
289
290#[inline]
295#[target_feature(enable = "avx")]
296#[cfg_attr(test, assert_instr(vsubpd))]
297#[stable(feature = "simd_x86", since = "1.27.0")]
298pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
299 simd_sub(a, b)
300}
301
302#[inline]
307#[target_feature(enable = "avx")]
308#[cfg_attr(test, assert_instr(vsubps))]
309#[stable(feature = "simd_x86", since = "1.27.0")]
310pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
311 simd_sub(a, b)
312}
313
314#[inline]
319#[target_feature(enable = "avx")]
320#[cfg_attr(test, assert_instr(vdivps))]
321#[stable(feature = "simd_x86", since = "1.27.0")]
322pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
323 simd_div(a, b)
324}
325
326#[inline]
331#[target_feature(enable = "avx")]
332#[cfg_attr(test, assert_instr(vdivpd))]
333#[stable(feature = "simd_x86", since = "1.27.0")]
334pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
335 simd_div(a, b)
336}
337
338#[inline]
352#[target_feature(enable = "avx")]
353#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
354#[rustc_legacy_const_generics(1)]
355#[stable(feature = "simd_x86", since = "1.27.0")]
356pub unsafe fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
357 static_assert_uimm_bits!(ROUNDING, 4);
358 roundpd256(a, ROUNDING)
359}
360
361#[inline]
366#[target_feature(enable = "avx")]
367#[cfg_attr(test, assert_instr(vroundpd))]
368#[stable(feature = "simd_x86", since = "1.27.0")]
369pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
370 simd_ceil(a)
371}
372
373#[inline]
378#[target_feature(enable = "avx")]
379#[cfg_attr(test, assert_instr(vroundpd))]
380#[stable(feature = "simd_x86", since = "1.27.0")]
381pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
382 simd_floor(a)
383}
384
385#[inline]
399#[target_feature(enable = "avx")]
400#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
401#[rustc_legacy_const_generics(1)]
402#[stable(feature = "simd_x86", since = "1.27.0")]
403pub unsafe fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
404 static_assert_uimm_bits!(ROUNDING, 4);
405 roundps256(a, ROUNDING)
406}
407
408#[inline]
413#[target_feature(enable = "avx")]
414#[cfg_attr(test, assert_instr(vroundps))]
415#[stable(feature = "simd_x86", since = "1.27.0")]
416pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
417 simd_ceil(a)
418}
419
420#[inline]
425#[target_feature(enable = "avx")]
426#[cfg_attr(test, assert_instr(vroundps))]
427#[stable(feature = "simd_x86", since = "1.27.0")]
428pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
429 simd_floor(a)
430}
431
432#[inline]
437#[target_feature(enable = "avx")]
438#[cfg_attr(test, assert_instr(vsqrtps))]
439#[stable(feature = "simd_x86", since = "1.27.0")]
440pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
441 simd_fsqrt(a)
442}
443
444#[inline]
449#[target_feature(enable = "avx")]
450#[cfg_attr(test, assert_instr(vsqrtpd))]
451#[stable(feature = "simd_x86", since = "1.27.0")]
452pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
453 simd_fsqrt(a)
454}
455
456#[inline]
461#[target_feature(enable = "avx")]
462#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
466#[rustc_legacy_const_generics(2)]
467#[stable(feature = "simd_x86", since = "1.27.0")]
468pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
469 static_assert_uimm_bits!(IMM4, 4);
470 simd_shuffle!(
471 a,
472 b,
473 [
474 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
475 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
476 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
477 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
478 ],
479 )
480}
481
482#[inline]
487#[target_feature(enable = "avx")]
488#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
489#[rustc_legacy_const_generics(2)]
490#[stable(feature = "simd_x86", since = "1.27.0")]
491pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
492 static_assert_uimm_bits!(IMM8, 8);
493 simd_shuffle!(
494 a,
495 b,
496 [
497 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
498 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
499 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
500 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
501 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
502 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
503 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
504 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
505 ],
506 )
507}
508
509#[inline]
514#[target_feature(enable = "avx")]
515#[cfg_attr(test, assert_instr(vblendvpd))]
516#[stable(feature = "simd_x86", since = "1.27.0")]
517pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
518 let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
519 transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
520}
521
522#[inline]
527#[target_feature(enable = "avx")]
528#[cfg_attr(test, assert_instr(vblendvps))]
529#[stable(feature = "simd_x86", since = "1.27.0")]
530pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
531 let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
532 transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
533}
534
535#[inline]
542#[target_feature(enable = "avx")]
543#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
544#[rustc_legacy_const_generics(2)]
545#[stable(feature = "simd_x86", since = "1.27.0")]
546pub unsafe fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
547 static_assert_uimm_bits!(IMM8, 8);
548 vdpps(a, b, IMM8)
549}
550
551#[inline]
558#[target_feature(enable = "avx")]
559#[cfg_attr(test, assert_instr(vhaddpd))]
560#[stable(feature = "simd_x86", since = "1.27.0")]
561pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
562 vhaddpd(a, b)
563}
564
565#[inline]
573#[target_feature(enable = "avx")]
574#[cfg_attr(test, assert_instr(vhaddps))]
575#[stable(feature = "simd_x86", since = "1.27.0")]
576pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
577 vhaddps(a, b)
578}
579
580#[inline]
587#[target_feature(enable = "avx")]
588#[cfg_attr(test, assert_instr(vhsubpd))]
589#[stable(feature = "simd_x86", since = "1.27.0")]
590pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
591 vhsubpd(a, b)
592}
593
594#[inline]
602#[target_feature(enable = "avx")]
603#[cfg_attr(test, assert_instr(vhsubps))]
604#[stable(feature = "simd_x86", since = "1.27.0")]
605pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
606 vhsubps(a, b)
607}
608
609#[inline]
614#[target_feature(enable = "avx")]
615#[cfg_attr(test, assert_instr(vxorp))]
616#[stable(feature = "simd_x86", since = "1.27.0")]
617pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
618 let a: u64x4 = transmute(a);
619 let b: u64x4 = transmute(b);
620 transmute(simd_xor(a, b))
621}
622
623#[inline]
628#[target_feature(enable = "avx")]
629#[cfg_attr(test, assert_instr(vxorps))]
630#[stable(feature = "simd_x86", since = "1.27.0")]
631pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
632 let a: u32x8 = transmute(a);
633 let b: u32x8 = transmute(b);
634 transmute(simd_xor(a, b))
635}
636
637#[stable(feature = "simd_x86", since = "1.27.0")]
639pub const _CMP_EQ_OQ: i32 = 0x00;
640#[stable(feature = "simd_x86", since = "1.27.0")]
642pub const _CMP_LT_OS: i32 = 0x01;
643#[stable(feature = "simd_x86", since = "1.27.0")]
645pub const _CMP_LE_OS: i32 = 0x02;
646#[stable(feature = "simd_x86", since = "1.27.0")]
648pub const _CMP_UNORD_Q: i32 = 0x03;
649#[stable(feature = "simd_x86", since = "1.27.0")]
651pub const _CMP_NEQ_UQ: i32 = 0x04;
652#[stable(feature = "simd_x86", since = "1.27.0")]
654pub const _CMP_NLT_US: i32 = 0x05;
655#[stable(feature = "simd_x86", since = "1.27.0")]
657pub const _CMP_NLE_US: i32 = 0x06;
658#[stable(feature = "simd_x86", since = "1.27.0")]
660pub const _CMP_ORD_Q: i32 = 0x07;
661#[stable(feature = "simd_x86", since = "1.27.0")]
663pub const _CMP_EQ_UQ: i32 = 0x08;
664#[stable(feature = "simd_x86", since = "1.27.0")]
666pub const _CMP_NGE_US: i32 = 0x09;
667#[stable(feature = "simd_x86", since = "1.27.0")]
669pub const _CMP_NGT_US: i32 = 0x0a;
670#[stable(feature = "simd_x86", since = "1.27.0")]
672pub const _CMP_FALSE_OQ: i32 = 0x0b;
673#[stable(feature = "simd_x86", since = "1.27.0")]
675pub const _CMP_NEQ_OQ: i32 = 0x0c;
676#[stable(feature = "simd_x86", since = "1.27.0")]
678pub const _CMP_GE_OS: i32 = 0x0d;
679#[stable(feature = "simd_x86", since = "1.27.0")]
681pub const _CMP_GT_OS: i32 = 0x0e;
682#[stable(feature = "simd_x86", since = "1.27.0")]
684pub const _CMP_TRUE_UQ: i32 = 0x0f;
685#[stable(feature = "simd_x86", since = "1.27.0")]
687pub const _CMP_EQ_OS: i32 = 0x10;
688#[stable(feature = "simd_x86", since = "1.27.0")]
690pub const _CMP_LT_OQ: i32 = 0x11;
691#[stable(feature = "simd_x86", since = "1.27.0")]
693pub const _CMP_LE_OQ: i32 = 0x12;
694#[stable(feature = "simd_x86", since = "1.27.0")]
696pub const _CMP_UNORD_S: i32 = 0x13;
697#[stable(feature = "simd_x86", since = "1.27.0")]
699pub const _CMP_NEQ_US: i32 = 0x14;
700#[stable(feature = "simd_x86", since = "1.27.0")]
702pub const _CMP_NLT_UQ: i32 = 0x15;
703#[stable(feature = "simd_x86", since = "1.27.0")]
705pub const _CMP_NLE_UQ: i32 = 0x16;
706#[stable(feature = "simd_x86", since = "1.27.0")]
708pub const _CMP_ORD_S: i32 = 0x17;
709#[stable(feature = "simd_x86", since = "1.27.0")]
711pub const _CMP_EQ_US: i32 = 0x18;
712#[stable(feature = "simd_x86", since = "1.27.0")]
714pub const _CMP_NGE_UQ: i32 = 0x19;
715#[stable(feature = "simd_x86", since = "1.27.0")]
717pub const _CMP_NGT_UQ: i32 = 0x1a;
718#[stable(feature = "simd_x86", since = "1.27.0")]
720pub const _CMP_FALSE_OS: i32 = 0x1b;
721#[stable(feature = "simd_x86", since = "1.27.0")]
723pub const _CMP_NEQ_OS: i32 = 0x1c;
724#[stable(feature = "simd_x86", since = "1.27.0")]
726pub const _CMP_GE_OQ: i32 = 0x1d;
727#[stable(feature = "simd_x86", since = "1.27.0")]
729pub const _CMP_GT_OQ: i32 = 0x1e;
730#[stable(feature = "simd_x86", since = "1.27.0")]
732pub const _CMP_TRUE_US: i32 = 0x1f;
733
734#[inline]
740#[target_feature(enable = "avx")]
741#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
743#[stable(feature = "simd_x86", since = "1.27.0")]
744pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
745 static_assert_uimm_bits!(IMM5, 5);
746 vcmppd(a, b, const { IMM5 as i8 })
747}
748
749#[inline]
755#[target_feature(enable = "avx")]
756#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
758#[stable(feature = "simd_x86", since = "1.27.0")]
759pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
760 static_assert_uimm_bits!(IMM5, 5);
761 vcmppd256(a, b, IMM5 as u8)
762}
763
764#[inline]
770#[target_feature(enable = "avx")]
771#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
773#[stable(feature = "simd_x86", since = "1.27.0")]
774pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
775 static_assert_uimm_bits!(IMM5, 5);
776 vcmpps(a, b, const { IMM5 as i8 })
777}
778
779#[inline]
785#[target_feature(enable = "avx")]
786#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
788#[stable(feature = "simd_x86", since = "1.27.0")]
789pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
790 static_assert_uimm_bits!(IMM5, 5);
791 vcmpps256(a, b, const { IMM5 as u8 })
792}
793
794#[inline]
802#[target_feature(enable = "avx")]
803#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
807 static_assert_uimm_bits!(IMM5, 5);
808 vcmpsd(a, b, IMM5 as i8)
809}
810
811#[inline]
819#[target_feature(enable = "avx")]
820#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] #[rustc_legacy_const_generics(2)]
822#[stable(feature = "simd_x86", since = "1.27.0")]
823pub unsafe fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
824 static_assert_uimm_bits!(IMM5, 5);
825 vcmpss(a, b, IMM5 as i8)
826}
827
828#[inline]
833#[target_feature(enable = "avx")]
834#[cfg_attr(test, assert_instr(vcvtdq2pd))]
835#[stable(feature = "simd_x86", since = "1.27.0")]
836pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
837 simd_cast(a.as_i32x4())
838}
839
840#[inline]
845#[target_feature(enable = "avx")]
846#[cfg_attr(test, assert_instr(vcvtdq2ps))]
847#[stable(feature = "simd_x86", since = "1.27.0")]
848pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
849 simd_cast(a.as_i32x8())
850}
851
852#[inline]
857#[target_feature(enable = "avx")]
858#[cfg_attr(test, assert_instr(vcvtpd2ps))]
859#[stable(feature = "simd_x86", since = "1.27.0")]
860pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
861 simd_cast(a)
862}
863
864#[inline]
869#[target_feature(enable = "avx")]
870#[cfg_attr(test, assert_instr(vcvtps2dq))]
871#[stable(feature = "simd_x86", since = "1.27.0")]
872pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
873 transmute(vcvtps2dq(a))
874}
875
876#[inline]
881#[target_feature(enable = "avx")]
882#[cfg_attr(test, assert_instr(vcvtps2pd))]
883#[stable(feature = "simd_x86", since = "1.27.0")]
884pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
885 simd_cast(a)
886}
887
888#[inline]
892#[target_feature(enable = "avx")]
893#[stable(feature = "simd_x86", since = "1.27.0")]
895pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
896 simd_extract!(a, 0)
897}
898
899#[inline]
904#[target_feature(enable = "avx")]
905#[cfg_attr(test, assert_instr(vcvttpd2dq))]
906#[stable(feature = "simd_x86", since = "1.27.0")]
907pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
908 transmute(vcvttpd2dq(a))
909}
910
911#[inline]
916#[target_feature(enable = "avx")]
917#[cfg_attr(test, assert_instr(vcvtpd2dq))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
920 transmute(vcvtpd2dq(a))
921}
922
923#[inline]
928#[target_feature(enable = "avx")]
929#[cfg_attr(test, assert_instr(vcvttps2dq))]
930#[stable(feature = "simd_x86", since = "1.27.0")]
931pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
932 transmute(vcvttps2dq(a))
933}
934
935#[inline]
940#[target_feature(enable = "avx")]
941#[cfg_attr(
942 all(test, not(target_env = "msvc")),
943 assert_instr(vextractf128, IMM1 = 1)
944)]
945#[rustc_legacy_const_generics(1)]
946#[stable(feature = "simd_x86", since = "1.27.0")]
947pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
948 static_assert_uimm_bits!(IMM1, 1);
949 simd_shuffle!(
950 a,
951 _mm256_undefined_ps(),
952 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
953 )
954}
955
956#[inline]
961#[target_feature(enable = "avx")]
962#[cfg_attr(
963 all(test, not(target_env = "msvc")),
964 assert_instr(vextractf128, IMM1 = 1)
965)]
966#[rustc_legacy_const_generics(1)]
967#[stable(feature = "simd_x86", since = "1.27.0")]
968pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
969 static_assert_uimm_bits!(IMM1, 1);
970 simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize])
971}
972
973#[inline]
977#[target_feature(enable = "avx")]
978#[cfg_attr(
979 all(test, not(target_env = "msvc")),
980 assert_instr(vextractf128, IMM1 = 1)
981)]
982#[rustc_legacy_const_generics(1)]
983#[stable(feature = "simd_x86", since = "1.27.0")]
984pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
985 static_assert_uimm_bits!(IMM1, 1);
986 let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
987 transmute(dst)
988}
989
990#[inline]
994#[target_feature(enable = "avx")]
995#[rustc_legacy_const_generics(1)]
997#[stable(feature = "simd_x86", since = "1.27.0")]
998pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
999 static_assert_uimm_bits!(INDEX, 3);
1000 simd_extract!(a.as_i32x8(), INDEX as u32)
1001}
1002
1003#[inline]
1007#[target_feature(enable = "avx")]
1008#[stable(feature = "simd_x86", since = "1.27.0")]
1009pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1010 simd_extract!(a.as_i32x8(), 0)
1011}
1012
1013#[inline]
1017#[target_feature(enable = "avx")]
1018#[cfg_attr(test, assert_instr(vzeroall))]
1019#[stable(feature = "simd_x86", since = "1.27.0")]
1020pub unsafe fn _mm256_zeroall() {
1021 vzeroall()
1022}
1023
1024#[inline]
1029#[target_feature(enable = "avx")]
1030#[cfg_attr(test, assert_instr(vzeroupper))]
1031#[stable(feature = "simd_x86", since = "1.27.0")]
1032pub unsafe fn _mm256_zeroupper() {
1033 vzeroupper()
1034}
1035
1036#[inline]
1041#[target_feature(enable = "avx")]
1042#[cfg_attr(test, assert_instr(vpermilps))]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1045 vpermilps256(a, b.as_i32x8())
1046}
1047
1048#[inline]
1053#[target_feature(enable = "avx")]
1054#[cfg_attr(test, assert_instr(vpermilps))]
1055#[stable(feature = "simd_x86", since = "1.27.0")]
1056pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1057 vpermilps(a, b.as_i32x4())
1058}
1059
1060#[inline]
1065#[target_feature(enable = "avx")]
1066#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1067#[rustc_legacy_const_generics(1)]
1068#[stable(feature = "simd_x86", since = "1.27.0")]
1069pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1070 static_assert_uimm_bits!(IMM8, 8);
1071 simd_shuffle!(
1072 a,
1073 _mm256_undefined_ps(),
1074 [
1075 (IMM8 as u32 >> 0) & 0b11,
1076 (IMM8 as u32 >> 2) & 0b11,
1077 (IMM8 as u32 >> 4) & 0b11,
1078 (IMM8 as u32 >> 6) & 0b11,
1079 ((IMM8 as u32 >> 0) & 0b11) + 4,
1080 ((IMM8 as u32 >> 2) & 0b11) + 4,
1081 ((IMM8 as u32 >> 4) & 0b11) + 4,
1082 ((IMM8 as u32 >> 6) & 0b11) + 4,
1083 ],
1084 )
1085}
1086
1087#[inline]
1092#[target_feature(enable = "avx")]
1093#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1094#[rustc_legacy_const_generics(1)]
1095#[stable(feature = "simd_x86", since = "1.27.0")]
1096pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1097 static_assert_uimm_bits!(IMM8, 8);
1098 simd_shuffle!(
1099 a,
1100 _mm_undefined_ps(),
1101 [
1102 (IMM8 as u32 >> 0) & 0b11,
1103 (IMM8 as u32 >> 2) & 0b11,
1104 (IMM8 as u32 >> 4) & 0b11,
1105 (IMM8 as u32 >> 6) & 0b11,
1106 ],
1107 )
1108}
1109
1110#[inline]
1115#[target_feature(enable = "avx")]
1116#[cfg_attr(test, assert_instr(vpermilpd))]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1119 vpermilpd256(a, b.as_i64x4())
1120}
1121
1122#[inline]
1127#[target_feature(enable = "avx")]
1128#[cfg_attr(test, assert_instr(vpermilpd))]
1129#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1131 vpermilpd(a, b.as_i64x2())
1132}
1133
1134#[inline]
1139#[target_feature(enable = "avx")]
1140#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1141#[rustc_legacy_const_generics(1)]
1142#[stable(feature = "simd_x86", since = "1.27.0")]
1143pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1144 static_assert_uimm_bits!(IMM4, 4);
1145 simd_shuffle!(
1146 a,
1147 _mm256_undefined_pd(),
1148 [
1149 ((IMM4 as u32 >> 0) & 1),
1150 ((IMM4 as u32 >> 1) & 1),
1151 ((IMM4 as u32 >> 2) & 1) + 2,
1152 ((IMM4 as u32 >> 3) & 1) + 2,
1153 ],
1154 )
1155}
1156
1157#[inline]
1162#[target_feature(enable = "avx")]
1163#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1164#[rustc_legacy_const_generics(1)]
1165#[stable(feature = "simd_x86", since = "1.27.0")]
1166pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1167 static_assert_uimm_bits!(IMM2, 2);
1168 simd_shuffle!(
1169 a,
1170 _mm_undefined_pd(),
1171 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1172 )
1173}
1174
1175#[inline]
1180#[target_feature(enable = "avx")]
1181#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1182#[rustc_legacy_const_generics(2)]
1183#[stable(feature = "simd_x86", since = "1.27.0")]
1184pub unsafe fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1185 static_assert_uimm_bits!(IMM8, 8);
1186 vperm2f128ps256(a, b, IMM8 as i8)
1187}
1188
1189#[inline]
1194#[target_feature(enable = "avx")]
1195#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1196#[rustc_legacy_const_generics(2)]
1197#[stable(feature = "simd_x86", since = "1.27.0")]
1198pub unsafe fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1199 static_assert_uimm_bits!(IMM8, 8);
1200 vperm2f128pd256(a, b, IMM8 as i8)
1201}
1202
1203#[inline]
1208#[target_feature(enable = "avx")]
1209#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1210#[rustc_legacy_const_generics(2)]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1212pub unsafe fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1213 static_assert_uimm_bits!(IMM8, 8);
1214 transmute(vperm2f128si256(a.as_i32x8(), b.as_i32x8(), IMM8 as i8))
1215}
1216
1217#[inline]
1222#[target_feature(enable = "avx")]
1223#[cfg_attr(test, assert_instr(vbroadcastss))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225#[allow(clippy::trivially_copy_pass_by_ref)]
1226pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1227 _mm256_set1_ps(*f)
1228}
1229
1230#[inline]
1235#[target_feature(enable = "avx")]
1236#[cfg_attr(test, assert_instr(vbroadcastss))]
1237#[stable(feature = "simd_x86", since = "1.27.0")]
1238#[allow(clippy::trivially_copy_pass_by_ref)]
1239pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1240 _mm_set1_ps(*f)
1241}
1242
1243#[inline]
1248#[target_feature(enable = "avx")]
1249#[cfg_attr(test, assert_instr(vbroadcastsd))]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251#[allow(clippy::trivially_copy_pass_by_ref)]
1252pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1253 _mm256_set1_pd(*f)
1254}
1255
1256#[inline]
1261#[target_feature(enable = "avx")]
1262#[cfg_attr(test, assert_instr(vbroadcastf128))]
1263#[stable(feature = "simd_x86", since = "1.27.0")]
1264pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1265 simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
1266}
1267
1268#[inline]
1273#[target_feature(enable = "avx")]
1274#[cfg_attr(test, assert_instr(vbroadcastf128))]
1275#[stable(feature = "simd_x86", since = "1.27.0")]
1276pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1277 simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
1278}
1279
1280#[inline]
1286#[target_feature(enable = "avx")]
1287#[cfg_attr(
1288 all(test, not(target_env = "msvc")),
1289 assert_instr(vinsertf128, IMM1 = 1)
1290)]
1291#[rustc_legacy_const_generics(2)]
1292#[stable(feature = "simd_x86", since = "1.27.0")]
1293pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1294 static_assert_uimm_bits!(IMM1, 1);
1295 simd_shuffle!(
1296 a,
1297 _mm256_castps128_ps256(b),
1298 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1299 )
1300}
1301
1302#[inline]
1308#[target_feature(enable = "avx")]
1309#[cfg_attr(
1310 all(test, not(target_env = "msvc")),
1311 assert_instr(vinsertf128, IMM1 = 1)
1312)]
1313#[rustc_legacy_const_generics(2)]
1314#[stable(feature = "simd_x86", since = "1.27.0")]
1315pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1316 static_assert_uimm_bits!(IMM1, 1);
1317 simd_shuffle!(
1318 a,
1319 _mm256_castpd128_pd256(b),
1320 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1321 )
1322}
1323
1324#[inline]
1329#[target_feature(enable = "avx")]
1330#[cfg_attr(
1331 all(test, not(target_env = "msvc")),
1332 assert_instr(vinsertf128, IMM1 = 1)
1333)]
1334#[rustc_legacy_const_generics(2)]
1335#[stable(feature = "simd_x86", since = "1.27.0")]
1336pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1337 static_assert_uimm_bits!(IMM1, 1);
1338 let dst: i64x4 = simd_shuffle!(
1339 a.as_i64x4(),
1340 _mm256_castsi128_si256(b).as_i64x4(),
1341 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1342 );
1343 transmute(dst)
1344}
1345
1346#[inline]
1351#[target_feature(enable = "avx")]
1352#[rustc_legacy_const_generics(2)]
1354#[stable(feature = "simd_x86", since = "1.27.0")]
1355pub unsafe fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1356 static_assert_uimm_bits!(INDEX, 5);
1357 transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i))
1358}
1359
1360#[inline]
1365#[target_feature(enable = "avx")]
1366#[rustc_legacy_const_generics(2)]
1368#[stable(feature = "simd_x86", since = "1.27.0")]
1369pub unsafe fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1370 static_assert_uimm_bits!(INDEX, 4);
1371 transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i))
1372}
1373
1374#[inline]
1379#[target_feature(enable = "avx")]
1380#[rustc_legacy_const_generics(2)]
1382#[stable(feature = "simd_x86", since = "1.27.0")]
1383pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1384 static_assert_uimm_bits!(INDEX, 3);
1385 transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i))
1386}
1387
1388#[inline]
1395#[target_feature(enable = "avx")]
1396#[cfg_attr(test, assert_instr(vmovap))]
1397#[stable(feature = "simd_x86", since = "1.27.0")]
1398#[allow(clippy::cast_ptr_alignment)]
1399pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1400 *(mem_addr as *const __m256d)
1401}
1402
1403#[inline]
1410#[target_feature(enable = "avx")]
1411#[cfg_attr(test, assert_instr(vmovap))]
1412#[stable(feature = "simd_x86", since = "1.27.0")]
1413#[allow(clippy::cast_ptr_alignment)]
1414pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1415 *(mem_addr as *mut __m256d) = a;
1416}
1417
1418#[inline]
1425#[target_feature(enable = "avx")]
1426#[cfg_attr(test, assert_instr(vmovaps))]
1427#[stable(feature = "simd_x86", since = "1.27.0")]
1428#[allow(clippy::cast_ptr_alignment)]
1429pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1430 *(mem_addr as *const __m256)
1431}
1432
1433#[inline]
1440#[target_feature(enable = "avx")]
1441#[cfg_attr(test, assert_instr(vmovaps))]
1442#[stable(feature = "simd_x86", since = "1.27.0")]
1443#[allow(clippy::cast_ptr_alignment)]
1444pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1445 *(mem_addr as *mut __m256) = a;
1446}
1447
1448#[inline]
1454#[target_feature(enable = "avx")]
1455#[cfg_attr(test, assert_instr(vmovup))]
1456#[stable(feature = "simd_x86", since = "1.27.0")]
1457pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1458 let mut dst = _mm256_undefined_pd();
1459 ptr::copy_nonoverlapping(
1460 mem_addr as *const u8,
1461 ptr::addr_of_mut!(dst) as *mut u8,
1462 mem::size_of::<__m256d>(),
1463 );
1464 dst
1465}
1466
1467#[inline]
1473#[target_feature(enable = "avx")]
1474#[cfg_attr(test, assert_instr(vmovup))]
1475#[stable(feature = "simd_x86", since = "1.27.0")]
1476pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1477 mem_addr.cast::<__m256d>().write_unaligned(a);
1478}
1479
1480#[inline]
1486#[target_feature(enable = "avx")]
1487#[cfg_attr(test, assert_instr(vmovups))]
1488#[stable(feature = "simd_x86", since = "1.27.0")]
1489pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1490 let mut dst = _mm256_undefined_ps();
1491 ptr::copy_nonoverlapping(
1492 mem_addr as *const u8,
1493 ptr::addr_of_mut!(dst) as *mut u8,
1494 mem::size_of::<__m256>(),
1495 );
1496 dst
1497}
1498
1499#[inline]
1505#[target_feature(enable = "avx")]
1506#[cfg_attr(test, assert_instr(vmovups))]
1507#[stable(feature = "simd_x86", since = "1.27.0")]
1508pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1509 mem_addr.cast::<__m256>().write_unaligned(a);
1510}
1511
1512#[inline]
1518#[target_feature(enable = "avx")]
1519#[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")]
1521pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1522 *mem_addr
1523}
1524
1525#[inline]
1531#[target_feature(enable = "avx")]
1532#[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")]
1534pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1535 *mem_addr = a;
1536}
1537
1538#[inline]
1543#[target_feature(enable = "avx")]
1544#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1546pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1547 let mut dst = _mm256_undefined_si256();
1548 ptr::copy_nonoverlapping(
1549 mem_addr as *const u8,
1550 ptr::addr_of_mut!(dst) as *mut u8,
1551 mem::size_of::<__m256i>(),
1552 );
1553 dst
1554}
1555
1556#[inline]
1561#[target_feature(enable = "avx")]
1562#[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")]
1564pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1565 mem_addr.write_unaligned(a);
1566}
1567
1568#[inline]
1574#[target_feature(enable = "avx")]
1575#[cfg_attr(test, assert_instr(vmaskmovpd))]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1578 maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
1579}
1580
1581#[inline]
1586#[target_feature(enable = "avx")]
1587#[cfg_attr(test, assert_instr(vmaskmovpd))]
1588#[stable(feature = "simd_x86", since = "1.27.0")]
1589pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1590 maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
1591}
1592
1593#[inline]
1599#[target_feature(enable = "avx")]
1600#[cfg_attr(test, assert_instr(vmaskmovpd))]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1603 maskloadpd(mem_addr as *const i8, mask.as_i64x2())
1604}
1605
1606#[inline]
1611#[target_feature(enable = "avx")]
1612#[cfg_attr(test, assert_instr(vmaskmovpd))]
1613#[stable(feature = "simd_x86", since = "1.27.0")]
1614pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1615 maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
1616}
1617
1618#[inline]
1624#[target_feature(enable = "avx")]
1625#[cfg_attr(test, assert_instr(vmaskmovps))]
1626#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1628 maskloadps256(mem_addr as *const i8, mask.as_i32x8())
1629}
1630
1631#[inline]
1636#[target_feature(enable = "avx")]
1637#[cfg_attr(test, assert_instr(vmaskmovps))]
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1640 maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
1641}
1642
1643#[inline]
1649#[target_feature(enable = "avx")]
1650#[cfg_attr(test, assert_instr(vmaskmovps))]
1651#[stable(feature = "simd_x86", since = "1.27.0")]
1652pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1653 maskloadps(mem_addr as *const i8, mask.as_i32x4())
1654}
1655
1656#[inline]
1661#[target_feature(enable = "avx")]
1662#[cfg_attr(test, assert_instr(vmaskmovps))]
1663#[stable(feature = "simd_x86", since = "1.27.0")]
1664pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1665 maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
1666}
1667
1668#[inline]
1673#[target_feature(enable = "avx")]
1674#[cfg_attr(test, assert_instr(vmovshdup))]
1675#[stable(feature = "simd_x86", since = "1.27.0")]
1676pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1677 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
1678}
1679
1680#[inline]
1685#[target_feature(enable = "avx")]
1686#[cfg_attr(test, assert_instr(vmovsldup))]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1689 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
1690}
1691
1692#[inline]
1697#[target_feature(enable = "avx")]
1698#[cfg_attr(test, assert_instr(vmovddup))]
1699#[stable(feature = "simd_x86", since = "1.27.0")]
1700pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1701 simd_shuffle!(a, a, [0, 0, 2, 2])
1702}
1703
1704#[inline]
1710#[target_feature(enable = "avx")]
1711#[cfg_attr(test, assert_instr(vlddqu))]
1712#[stable(feature = "simd_x86", since = "1.27.0")]
1713pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1714 transmute(vlddqu(mem_addr as *const i8))
1715}
1716
1717#[inline]
1732#[target_feature(enable = "avx")]
1733#[cfg_attr(test, assert_instr(vmovntdq))]
1734#[stable(feature = "simd_x86", since = "1.27.0")]
1735pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1736 crate::arch::asm!(
1737 vps!("vmovntdq", ",{a}"),
1738 p = in(reg) mem_addr,
1739 a = in(ymm_reg) a,
1740 options(nostack, preserves_flags),
1741 );
1742}
1743
1744#[inline]
1759#[target_feature(enable = "avx")]
1760#[cfg_attr(test, assert_instr(vmovntpd))]
1761#[stable(feature = "simd_x86", since = "1.27.0")]
1762#[allow(clippy::cast_ptr_alignment)]
1763pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1764 crate::arch::asm!(
1765 vps!("vmovntpd", ",{a}"),
1766 p = in(reg) mem_addr,
1767 a = in(ymm_reg) a,
1768 options(nostack, preserves_flags),
1769 );
1770}
1771
1772#[inline]
1788#[target_feature(enable = "avx")]
1789#[cfg_attr(test, assert_instr(vmovntps))]
1790#[stable(feature = "simd_x86", since = "1.27.0")]
1791#[allow(clippy::cast_ptr_alignment)]
1792pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1793 crate::arch::asm!(
1794 vps!("vmovntps", ",{a}"),
1795 p = in(reg) mem_addr,
1796 a = in(ymm_reg) a,
1797 options(nostack, preserves_flags),
1798 );
1799}
1800
1801#[inline]
1807#[target_feature(enable = "avx")]
1808#[cfg_attr(test, assert_instr(vrcpps))]
1809#[stable(feature = "simd_x86", since = "1.27.0")]
1810pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
1811 vrcpps(a)
1812}
1813
1814#[inline]
1820#[target_feature(enable = "avx")]
1821#[cfg_attr(test, assert_instr(vrsqrtps))]
1822#[stable(feature = "simd_x86", since = "1.27.0")]
1823pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1824 vrsqrtps(a)
1825}
1826
1827#[inline]
1832#[target_feature(enable = "avx")]
1833#[cfg_attr(test, assert_instr(vunpckhpd))]
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1835pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1836 simd_shuffle!(a, b, [1, 5, 3, 7])
1837}
1838
1839#[inline]
1844#[target_feature(enable = "avx")]
1845#[cfg_attr(test, assert_instr(vunpckhps))]
1846#[stable(feature = "simd_x86", since = "1.27.0")]
1847pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1848 simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
1849}
1850
1851#[inline]
1856#[target_feature(enable = "avx")]
1857#[cfg_attr(test, assert_instr(vunpcklpd))]
1858#[stable(feature = "simd_x86", since = "1.27.0")]
1859pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1860 simd_shuffle!(a, b, [0, 4, 2, 6])
1861}
1862
1863#[inline]
1868#[target_feature(enable = "avx")]
1869#[cfg_attr(test, assert_instr(vunpcklps))]
1870#[stable(feature = "simd_x86", since = "1.27.0")]
1871pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1872 simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
1873}
1874
1875#[inline]
1882#[target_feature(enable = "avx")]
1883#[cfg_attr(test, assert_instr(vptest))]
1884#[stable(feature = "simd_x86", since = "1.27.0")]
1885pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1886 ptestz256(a.as_i64x4(), b.as_i64x4())
1887}
1888
1889#[inline]
1896#[target_feature(enable = "avx")]
1897#[cfg_attr(test, assert_instr(vptest))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1900 ptestc256(a.as_i64x4(), b.as_i64x4())
1901}
1902
1903#[inline]
1911#[target_feature(enable = "avx")]
1912#[cfg_attr(test, assert_instr(vptest))]
1913#[stable(feature = "simd_x86", since = "1.27.0")]
1914pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1915 ptestnzc256(a.as_i64x4(), b.as_i64x4())
1916}
1917
1918#[inline]
1928#[target_feature(enable = "avx")]
1929#[cfg_attr(test, assert_instr(vtestpd))]
1930#[stable(feature = "simd_x86", since = "1.27.0")]
1931pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1932 vtestzpd256(a, b)
1933}
1934
1935#[inline]
1945#[target_feature(enable = "avx")]
1946#[cfg_attr(test, assert_instr(vtestpd))]
1947#[stable(feature = "simd_x86", since = "1.27.0")]
1948pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1949 vtestcpd256(a, b)
1950}
1951
1952#[inline]
1963#[target_feature(enable = "avx")]
1964#[cfg_attr(test, assert_instr(vtestpd))]
1965#[stable(feature = "simd_x86", since = "1.27.0")]
1966pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
1967 vtestnzcpd256(a, b)
1968}
1969
1970#[inline]
1980#[target_feature(enable = "avx")]
1981#[cfg_attr(test, assert_instr(vtestpd))]
1982#[stable(feature = "simd_x86", since = "1.27.0")]
1983pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
1984 vtestzpd(a, b)
1985}
1986
1987#[inline]
1997#[target_feature(enable = "avx")]
1998#[cfg_attr(test, assert_instr(vtestpd))]
1999#[stable(feature = "simd_x86", since = "1.27.0")]
2000pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2001 vtestcpd(a, b)
2002}
2003
2004#[inline]
2015#[target_feature(enable = "avx")]
2016#[cfg_attr(test, assert_instr(vtestpd))]
2017#[stable(feature = "simd_x86", since = "1.27.0")]
2018pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
2019 vtestnzcpd(a, b)
2020}
2021
2022#[inline]
2032#[target_feature(enable = "avx")]
2033#[cfg_attr(test, assert_instr(vtestps))]
2034#[stable(feature = "simd_x86", since = "1.27.0")]
2035pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
2036 vtestzps256(a, b)
2037}
2038
2039#[inline]
2049#[target_feature(enable = "avx")]
2050#[cfg_attr(test, assert_instr(vtestps))]
2051#[stable(feature = "simd_x86", since = "1.27.0")]
2052pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2053 vtestcps256(a, b)
2054}
2055
2056#[inline]
2067#[target_feature(enable = "avx")]
2068#[cfg_attr(test, assert_instr(vtestps))]
2069#[stable(feature = "simd_x86", since = "1.27.0")]
2070pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2071 vtestnzcps256(a, b)
2072}
2073
2074#[inline]
2084#[target_feature(enable = "avx")]
2085#[cfg_attr(test, assert_instr(vtestps))]
2086#[stable(feature = "simd_x86", since = "1.27.0")]
2087pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2088 vtestzps(a, b)
2089}
2090
2091#[inline]
2101#[target_feature(enable = "avx")]
2102#[cfg_attr(test, assert_instr(vtestps))]
2103#[stable(feature = "simd_x86", since = "1.27.0")]
2104pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2105 vtestcps(a, b)
2106}
2107
2108#[inline]
2119#[target_feature(enable = "avx")]
2120#[cfg_attr(test, assert_instr(vtestps))]
2121#[stable(feature = "simd_x86", since = "1.27.0")]
2122pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2123 vtestnzcps(a, b)
2124}
2125
2126#[inline]
2132#[target_feature(enable = "avx")]
2133#[cfg_attr(test, assert_instr(vmovmskpd))]
2134#[stable(feature = "simd_x86", since = "1.27.0")]
2135pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2136 let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
2139 simd_bitmask::<i64x4, u8>(mask).into()
2140}
2141
2142#[inline]
2148#[target_feature(enable = "avx")]
2149#[cfg_attr(test, assert_instr(vmovmskps))]
2150#[stable(feature = "simd_x86", since = "1.27.0")]
2151pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2152 let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
2155 simd_bitmask::<i32x8, u8>(mask).into()
2156}
2157
2158#[inline]
2162#[target_feature(enable = "avx")]
2163#[cfg_attr(test, assert_instr(vxorp))]
2164#[stable(feature = "simd_x86", since = "1.27.0")]
2165pub unsafe fn _mm256_setzero_pd() -> __m256d {
2166 const { mem::zeroed() }
2167}
2168
2169#[inline]
2173#[target_feature(enable = "avx")]
2174#[cfg_attr(test, assert_instr(vxorps))]
2175#[stable(feature = "simd_x86", since = "1.27.0")]
2176pub unsafe fn _mm256_setzero_ps() -> __m256 {
2177 const { mem::zeroed() }
2178}
2179
2180#[inline]
2184#[target_feature(enable = "avx")]
2185#[cfg_attr(test, assert_instr(vxor))]
2186#[stable(feature = "simd_x86", since = "1.27.0")]
2187pub unsafe fn _mm256_setzero_si256() -> __m256i {
2188 const { mem::zeroed() }
2189}
2190
2191#[inline]
2196#[target_feature(enable = "avx")]
2197#[cfg_attr(test, assert_instr(vinsertf128))]
2199#[stable(feature = "simd_x86", since = "1.27.0")]
2200pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2201 _mm256_setr_pd(d, c, b, a)
2202}
2203
2204#[inline]
2209#[target_feature(enable = "avx")]
2210#[stable(feature = "simd_x86", since = "1.27.0")]
2212pub unsafe fn _mm256_set_ps(
2213 a: f32,
2214 b: f32,
2215 c: f32,
2216 d: f32,
2217 e: f32,
2218 f: f32,
2219 g: f32,
2220 h: f32,
2221) -> __m256 {
2222 _mm256_setr_ps(h, g, f, e, d, c, b, a)
2223}
2224
2225#[inline]
2229#[target_feature(enable = "avx")]
2230#[stable(feature = "simd_x86", since = "1.27.0")]
2232pub unsafe fn _mm256_set_epi8(
2233 e00: i8,
2234 e01: i8,
2235 e02: i8,
2236 e03: i8,
2237 e04: i8,
2238 e05: i8,
2239 e06: i8,
2240 e07: i8,
2241 e08: i8,
2242 e09: i8,
2243 e10: i8,
2244 e11: i8,
2245 e12: i8,
2246 e13: i8,
2247 e14: i8,
2248 e15: i8,
2249 e16: i8,
2250 e17: i8,
2251 e18: i8,
2252 e19: i8,
2253 e20: i8,
2254 e21: i8,
2255 e22: i8,
2256 e23: i8,
2257 e24: i8,
2258 e25: i8,
2259 e26: i8,
2260 e27: i8,
2261 e28: i8,
2262 e29: i8,
2263 e30: i8,
2264 e31: i8,
2265) -> __m256i {
2266 #[rustfmt::skip]
2267 _mm256_setr_epi8(
2268 e31, e30, e29, e28, e27, e26, e25, e24,
2269 e23, e22, e21, e20, e19, e18, e17, e16,
2270 e15, e14, e13, e12, e11, e10, e09, e08,
2271 e07, e06, e05, e04, e03, e02, e01, e00,
2272 )
2273}
2274
2275#[inline]
2279#[target_feature(enable = "avx")]
2280#[stable(feature = "simd_x86", since = "1.27.0")]
2282pub unsafe fn _mm256_set_epi16(
2283 e00: i16,
2284 e01: i16,
2285 e02: i16,
2286 e03: i16,
2287 e04: i16,
2288 e05: i16,
2289 e06: i16,
2290 e07: i16,
2291 e08: i16,
2292 e09: i16,
2293 e10: i16,
2294 e11: i16,
2295 e12: i16,
2296 e13: i16,
2297 e14: i16,
2298 e15: i16,
2299) -> __m256i {
2300 #[rustfmt::skip]
2301 _mm256_setr_epi16(
2302 e15, e14, e13, e12,
2303 e11, e10, e09, e08,
2304 e07, e06, e05, e04,
2305 e03, e02, e01, e00,
2306 )
2307}
2308
2309#[inline]
2313#[target_feature(enable = "avx")]
2314#[stable(feature = "simd_x86", since = "1.27.0")]
2316pub unsafe fn _mm256_set_epi32(
2317 e0: i32,
2318 e1: i32,
2319 e2: i32,
2320 e3: i32,
2321 e4: i32,
2322 e5: i32,
2323 e6: i32,
2324 e7: i32,
2325) -> __m256i {
2326 _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0)
2327}
2328
2329#[inline]
2333#[target_feature(enable = "avx")]
2334#[stable(feature = "simd_x86", since = "1.27.0")]
2336pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2337 _mm256_setr_epi64x(d, c, b, a)
2338}
2339
2340#[inline]
2345#[target_feature(enable = "avx")]
2346#[stable(feature = "simd_x86", since = "1.27.0")]
2348pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2349 __m256d([a, b, c, d])
2350}
2351
2352#[inline]
2357#[target_feature(enable = "avx")]
2358#[stable(feature = "simd_x86", since = "1.27.0")]
2360pub unsafe fn _mm256_setr_ps(
2361 a: f32,
2362 b: f32,
2363 c: f32,
2364 d: f32,
2365 e: f32,
2366 f: f32,
2367 g: f32,
2368 h: f32,
2369) -> __m256 {
2370 __m256([a, b, c, d, e, f, g, h])
2371}
2372
2373#[inline]
2378#[target_feature(enable = "avx")]
2379#[stable(feature = "simd_x86", since = "1.27.0")]
2381pub unsafe fn _mm256_setr_epi8(
2382 e00: i8,
2383 e01: i8,
2384 e02: i8,
2385 e03: i8,
2386 e04: i8,
2387 e05: i8,
2388 e06: i8,
2389 e07: i8,
2390 e08: i8,
2391 e09: i8,
2392 e10: i8,
2393 e11: i8,
2394 e12: i8,
2395 e13: i8,
2396 e14: i8,
2397 e15: i8,
2398 e16: i8,
2399 e17: i8,
2400 e18: i8,
2401 e19: i8,
2402 e20: i8,
2403 e21: i8,
2404 e22: i8,
2405 e23: i8,
2406 e24: i8,
2407 e25: i8,
2408 e26: i8,
2409 e27: i8,
2410 e28: i8,
2411 e29: i8,
2412 e30: i8,
2413 e31: i8,
2414) -> __m256i {
2415 #[rustfmt::skip]
2416 transmute(i8x32::new(
2417 e00, e01, e02, e03, e04, e05, e06, e07,
2418 e08, e09, e10, e11, e12, e13, e14, e15,
2419 e16, e17, e18, e19, e20, e21, e22, e23,
2420 e24, e25, e26, e27, e28, e29, e30, e31,
2421 ))
2422}
2423
2424#[inline]
2429#[target_feature(enable = "avx")]
2430#[stable(feature = "simd_x86", since = "1.27.0")]
2432pub unsafe fn _mm256_setr_epi16(
2433 e00: i16,
2434 e01: i16,
2435 e02: i16,
2436 e03: i16,
2437 e04: i16,
2438 e05: i16,
2439 e06: i16,
2440 e07: i16,
2441 e08: i16,
2442 e09: i16,
2443 e10: i16,
2444 e11: i16,
2445 e12: i16,
2446 e13: i16,
2447 e14: i16,
2448 e15: i16,
2449) -> __m256i {
2450 #[rustfmt::skip]
2451 transmute(i16x16::new(
2452 e00, e01, e02, e03,
2453 e04, e05, e06, e07,
2454 e08, e09, e10, e11,
2455 e12, e13, e14, e15,
2456 ))
2457}
2458
2459#[inline]
2464#[target_feature(enable = "avx")]
2465#[stable(feature = "simd_x86", since = "1.27.0")]
2467pub unsafe fn _mm256_setr_epi32(
2468 e0: i32,
2469 e1: i32,
2470 e2: i32,
2471 e3: i32,
2472 e4: i32,
2473 e5: i32,
2474 e6: i32,
2475 e7: i32,
2476) -> __m256i {
2477 transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
2478}
2479
2480#[inline]
2485#[target_feature(enable = "avx")]
2486#[stable(feature = "simd_x86", since = "1.27.0")]
2488pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2489 transmute(i64x4::new(a, b, c, d))
2490}
2491
2492#[inline]
2497#[target_feature(enable = "avx")]
2498#[stable(feature = "simd_x86", since = "1.27.0")]
2500pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
2501 _mm256_setr_pd(a, a, a, a)
2502}
2503
2504#[inline]
2509#[target_feature(enable = "avx")]
2510#[stable(feature = "simd_x86", since = "1.27.0")]
2512pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
2513 _mm256_setr_ps(a, a, a, a, a, a, a, a)
2514}
2515
2516#[inline]
2521#[target_feature(enable = "avx")]
2522#[stable(feature = "simd_x86", since = "1.27.0")]
2524pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
2525 #[rustfmt::skip]
2526 _mm256_setr_epi8(
2527 a, a, a, a, a, a, a, a,
2528 a, a, a, a, a, a, a, a,
2529 a, a, a, a, a, a, a, a,
2530 a, a, a, a, a, a, a, a,
2531 )
2532}
2533
2534#[inline]
2539#[target_feature(enable = "avx")]
2540#[cfg_attr(test, assert_instr(vinsertf128))]
2542#[stable(feature = "simd_x86", since = "1.27.0")]
2544pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
2545 _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
2546}
2547
2548#[inline]
2553#[target_feature(enable = "avx")]
2554#[stable(feature = "simd_x86", since = "1.27.0")]
2556pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
2557 _mm256_setr_epi32(a, a, a, a, a, a, a, a)
2558}
2559
2560#[inline]
2565#[target_feature(enable = "avx")]
2566#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2567#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2568#[stable(feature = "simd_x86", since = "1.27.0")]
2570pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i {
2571 _mm256_setr_epi64x(a, a, a, a)
2572}
2573
2574#[inline]
2578#[target_feature(enable = "avx")]
2579#[stable(feature = "simd_x86", since = "1.27.0")]
2582pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2583 transmute(a)
2584}
2585
2586#[inline]
2590#[target_feature(enable = "avx")]
2591#[stable(feature = "simd_x86", since = "1.27.0")]
2594pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d {
2595 transmute(a)
2596}
2597
2598#[inline]
2602#[target_feature(enable = "avx")]
2603#[stable(feature = "simd_x86", since = "1.27.0")]
2606pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i {
2607 transmute(a)
2608}
2609
2610#[inline]
2614#[target_feature(enable = "avx")]
2615#[stable(feature = "simd_x86", since = "1.27.0")]
2618pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2619 transmute(a)
2620}
2621
2622#[inline]
2626#[target_feature(enable = "avx")]
2627#[stable(feature = "simd_x86", since = "1.27.0")]
2630pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2631 transmute(a)
2632}
2633
2634#[inline]
2638#[target_feature(enable = "avx")]
2639#[stable(feature = "simd_x86", since = "1.27.0")]
2642pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2643 transmute(a)
2644}
2645
2646#[inline]
2650#[target_feature(enable = "avx")]
2651#[stable(feature = "simd_x86", since = "1.27.0")]
2654pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2655 simd_shuffle!(a, a, [0, 1, 2, 3])
2656}
2657
2658#[inline]
2662#[target_feature(enable = "avx")]
2663#[stable(feature = "simd_x86", since = "1.27.0")]
2666pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2667 simd_shuffle!(a, a, [0, 1])
2668}
2669
2670#[inline]
2674#[target_feature(enable = "avx")]
2675#[stable(feature = "simd_x86", since = "1.27.0")]
2678pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2679 let a = a.as_i64x4();
2680 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2681 transmute(dst)
2682}
2683
2684#[inline]
2689#[target_feature(enable = "avx")]
2690#[stable(feature = "simd_x86", since = "1.27.0")]
2693pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2694 simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4])
2695}
2696
2697#[inline]
2702#[target_feature(enable = "avx")]
2703#[stable(feature = "simd_x86", since = "1.27.0")]
2706pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2707 simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2])
2708}
2709
2710#[inline]
2715#[target_feature(enable = "avx")]
2716#[stable(feature = "simd_x86", since = "1.27.0")]
2719pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2720 let a = a.as_i64x2();
2721 let undefined = i64x2::ZERO;
2722 let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
2723 transmute(dst)
2724}
2725
2726#[inline]
2732#[target_feature(enable = "avx")]
2733#[stable(feature = "simd_x86", since = "1.27.0")]
2736pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2737 simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
2738}
2739
2740#[inline]
2746#[target_feature(enable = "avx")]
2747#[stable(feature = "simd_x86", since = "1.27.0")]
2750pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2751 let b = i64x2::ZERO;
2752 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2753 transmute(dst)
2754}
2755
2756#[inline]
2763#[target_feature(enable = "avx")]
2764#[stable(feature = "simd_x86", since = "1.27.0")]
2767pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2768 simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3])
2769}
2770
2771#[inline]
2777#[target_feature(enable = "avx")]
2778#[stable(feature = "simd_x86", since = "1.27.0")]
2780pub unsafe fn _mm256_undefined_ps() -> __m256 {
2781 const { mem::zeroed() }
2782}
2783
2784#[inline]
2790#[target_feature(enable = "avx")]
2791#[stable(feature = "simd_x86", since = "1.27.0")]
2793pub unsafe fn _mm256_undefined_pd() -> __m256d {
2794 const { mem::zeroed() }
2795}
2796
2797#[inline]
2803#[target_feature(enable = "avx")]
2804#[stable(feature = "simd_x86", since = "1.27.0")]
2806pub unsafe fn _mm256_undefined_si256() -> __m256i {
2807 const { mem::zeroed() }
2808}
2809
2810#[inline]
2814#[target_feature(enable = "avx")]
2815#[cfg_attr(test, assert_instr(vinsertf128))]
2816#[stable(feature = "simd_x86", since = "1.27.0")]
2817pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2818 simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
2819}
2820
2821#[inline]
2825#[target_feature(enable = "avx")]
2826#[cfg_attr(test, assert_instr(vinsertf128))]
2827#[stable(feature = "simd_x86", since = "1.27.0")]
2828pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2829 let hi: __m128 = transmute(hi);
2830 let lo: __m128 = transmute(lo);
2831 transmute(_mm256_set_m128(hi, lo))
2832}
2833
2834#[inline]
2838#[target_feature(enable = "avx")]
2839#[cfg_attr(test, assert_instr(vinsertf128))]
2840#[stable(feature = "simd_x86", since = "1.27.0")]
2841pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2842 let hi: __m128 = transmute(hi);
2843 let lo: __m128 = transmute(lo);
2844 transmute(_mm256_set_m128(hi, lo))
2845}
2846
2847#[inline]
2851#[target_feature(enable = "avx")]
2852#[cfg_attr(test, assert_instr(vinsertf128))]
2853#[stable(feature = "simd_x86", since = "1.27.0")]
2854pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2855 _mm256_set_m128(hi, lo)
2856}
2857
2858#[inline]
2862#[target_feature(enable = "avx")]
2863#[cfg_attr(test, assert_instr(vinsertf128))]
2864#[stable(feature = "simd_x86", since = "1.27.0")]
2865pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2866 _mm256_set_m128d(hi, lo)
2867}
2868
2869#[inline]
2873#[target_feature(enable = "avx")]
2874#[cfg_attr(test, assert_instr(vinsertf128))]
2875#[stable(feature = "simd_x86", since = "1.27.0")]
2876pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2877 _mm256_set_m128i(hi, lo)
2878}
2879
2880#[inline]
2887#[target_feature(enable = "avx")]
2888#[stable(feature = "simd_x86", since = "1.27.0")]
2890pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2891 let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2892 _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
2893}
2894
2895#[inline]
2902#[target_feature(enable = "avx")]
2903#[stable(feature = "simd_x86", since = "1.27.0")]
2905pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2906 let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
2907 _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
2908}
2909
2910#[inline]
2916#[target_feature(enable = "avx")]
2917#[stable(feature = "simd_x86", since = "1.27.0")]
2919pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2920 let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
2921 _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
2922}
2923
2924#[inline]
2931#[target_feature(enable = "avx")]
2932#[stable(feature = "simd_x86", since = "1.27.0")]
2934pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2935 let lo = _mm256_castps256_ps128(a);
2936 _mm_storeu_ps(loaddr, lo);
2937 let hi = _mm256_extractf128_ps::<1>(a);
2938 _mm_storeu_ps(hiaddr, hi);
2939}
2940
2941#[inline]
2948#[target_feature(enable = "avx")]
2949#[stable(feature = "simd_x86", since = "1.27.0")]
2951pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
2952 let lo = _mm256_castpd256_pd128(a);
2953 _mm_storeu_pd(loaddr, lo);
2954 let hi = _mm256_extractf128_pd::<1>(a);
2955 _mm_storeu_pd(hiaddr, hi);
2956}
2957
2958#[inline]
2964#[target_feature(enable = "avx")]
2965#[stable(feature = "simd_x86", since = "1.27.0")]
2967pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
2968 let lo = _mm256_castsi256_si128(a);
2969 _mm_storeu_si128(loaddr, lo);
2970 let hi = _mm256_extractf128_si256::<1>(a);
2971 _mm_storeu_si128(hiaddr, hi);
2972}
2973
2974#[inline]
2978#[target_feature(enable = "avx")]
2979#[stable(feature = "simd_x86", since = "1.27.0")]
2981pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2982 simd_extract!(a, 0)
2983}
2984
2985#[allow(improper_ctypes)]
2987extern "C" {
2988 #[link_name = "llvm.x86.avx.round.pd.256"]
2989 fn roundpd256(a: __m256d, b: i32) -> __m256d;
2990 #[link_name = "llvm.x86.avx.round.ps.256"]
2991 fn roundps256(a: __m256, b: i32) -> __m256;
2992 #[link_name = "llvm.x86.avx.dp.ps.256"]
2993 fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
2994 #[link_name = "llvm.x86.avx.hadd.pd.256"]
2995 fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
2996 #[link_name = "llvm.x86.avx.hadd.ps.256"]
2997 fn vhaddps(a: __m256, b: __m256) -> __m256;
2998 #[link_name = "llvm.x86.avx.hsub.pd.256"]
2999 fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
3000 #[link_name = "llvm.x86.avx.hsub.ps.256"]
3001 fn vhsubps(a: __m256, b: __m256) -> __m256;
3002 #[link_name = "llvm.x86.sse2.cmp.pd"]
3003 fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3004 #[link_name = "llvm.x86.avx.cmp.pd.256"]
3005 fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
3006 #[link_name = "llvm.x86.sse.cmp.ps"]
3007 fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
3008 #[link_name = "llvm.x86.avx.cmp.ps.256"]
3009 fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
3010 #[link_name = "llvm.x86.sse2.cmp.sd"]
3011 fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3012 #[link_name = "llvm.x86.sse.cmp.ss"]
3013 fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
3014 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
3015 fn vcvtps2dq(a: __m256) -> i32x8;
3016 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
3017 fn vcvttpd2dq(a: __m256d) -> i32x4;
3018 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
3019 fn vcvtpd2dq(a: __m256d) -> i32x4;
3020 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
3021 fn vcvttps2dq(a: __m256) -> i32x8;
3022 #[link_name = "llvm.x86.avx.vzeroall"]
3023 fn vzeroall();
3024 #[link_name = "llvm.x86.avx.vzeroupper"]
3025 fn vzeroupper();
3026 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
3027 fn vpermilps256(a: __m256, b: i32x8) -> __m256;
3028 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
3029 fn vpermilps(a: __m128, b: i32x4) -> __m128;
3030 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
3031 fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
3032 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
3033 fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
3034 #[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
3035 fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
3036 #[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
3037 fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
3038 #[link_name = "llvm.x86.avx.vperm2f128.si.256"]
3039 fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
3040 #[link_name = "llvm.x86.avx.maskload.pd.256"]
3041 fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3042 #[link_name = "llvm.x86.avx.maskstore.pd.256"]
3043 fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3044 #[link_name = "llvm.x86.avx.maskload.pd"]
3045 fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3046 #[link_name = "llvm.x86.avx.maskstore.pd"]
3047 fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3048 #[link_name = "llvm.x86.avx.maskload.ps.256"]
3049 fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3050 #[link_name = "llvm.x86.avx.maskstore.ps.256"]
3051 fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3052 #[link_name = "llvm.x86.avx.maskload.ps"]
3053 fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3054 #[link_name = "llvm.x86.avx.maskstore.ps"]
3055 fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3056 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3057 fn vlddqu(mem_addr: *const i8) -> i8x32;
3058 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3059 fn vrcpps(a: __m256) -> __m256;
3060 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3061 fn vrsqrtps(a: __m256) -> __m256;
3062 #[link_name = "llvm.x86.avx.ptestz.256"]
3063 fn ptestz256(a: i64x4, b: i64x4) -> i32;
3064 #[link_name = "llvm.x86.avx.ptestc.256"]
3065 fn ptestc256(a: i64x4, b: i64x4) -> i32;
3066 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3067 fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3068 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3069 fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3070 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3071 fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3072 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3073 fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3074 #[link_name = "llvm.x86.avx.vtestz.pd"]
3075 fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3076 #[link_name = "llvm.x86.avx.vtestc.pd"]
3077 fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3078 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3079 fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3080 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3081 fn vtestzps256(a: __m256, b: __m256) -> i32;
3082 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3083 fn vtestcps256(a: __m256, b: __m256) -> i32;
3084 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3085 fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3086 #[link_name = "llvm.x86.avx.vtestz.ps"]
3087 fn vtestzps(a: __m128, b: __m128) -> i32;
3088 #[link_name = "llvm.x86.avx.vtestc.ps"]
3089 fn vtestcps(a: __m128, b: __m128) -> i32;
3090 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3091 fn vtestnzcps(a: __m128, b: __m128) -> i32;
3092 #[link_name = "llvm.x86.avx.min.ps.256"]
3093 fn vminps(a: __m256, b: __m256) -> __m256;
3094 #[link_name = "llvm.x86.avx.max.ps.256"]
3095 fn vmaxps(a: __m256, b: __m256) -> __m256;
3096 #[link_name = "llvm.x86.avx.min.pd.256"]
3097 fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3098 #[link_name = "llvm.x86.avx.max.pd.256"]
3099 fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3100}
3101
3102#[cfg(test)]
3103mod tests {
3104 use crate::hint::black_box;
3105 use crate::ptr;
3106 use stdarch_test::simd_test;
3107
3108 use crate::core_arch::x86::*;
3109
3110 #[simd_test(enable = "avx")]
3111 unsafe fn test_mm256_add_pd() {
3112 let a = _mm256_setr_pd(1., 2., 3., 4.);
3113 let b = _mm256_setr_pd(5., 6., 7., 8.);
3114 let r = _mm256_add_pd(a, b);
3115 let e = _mm256_setr_pd(6., 8., 10., 12.);
3116 assert_eq_m256d(r, e);
3117 }
3118
3119 #[simd_test(enable = "avx")]
3120 unsafe fn test_mm256_add_ps() {
3121 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3122 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3123 let r = _mm256_add_ps(a, b);
3124 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3125 assert_eq_m256(r, e);
3126 }
3127
3128 #[simd_test(enable = "avx")]
3129 unsafe fn test_mm256_and_pd() {
3130 let a = _mm256_set1_pd(1.);
3131 let b = _mm256_set1_pd(0.6);
3132 let r = _mm256_and_pd(a, b);
3133 let e = _mm256_set1_pd(0.5);
3134 assert_eq_m256d(r, e);
3135 }
3136
3137 #[simd_test(enable = "avx")]
3138 unsafe fn test_mm256_and_ps() {
3139 let a = _mm256_set1_ps(1.);
3140 let b = _mm256_set1_ps(0.6);
3141 let r = _mm256_and_ps(a, b);
3142 let e = _mm256_set1_ps(0.5);
3143 assert_eq_m256(r, e);
3144 }
3145
3146 #[simd_test(enable = "avx")]
3147 unsafe fn test_mm256_or_pd() {
3148 let a = _mm256_set1_pd(1.);
3149 let b = _mm256_set1_pd(0.6);
3150 let r = _mm256_or_pd(a, b);
3151 let e = _mm256_set1_pd(1.2);
3152 assert_eq_m256d(r, e);
3153 }
3154
3155 #[simd_test(enable = "avx")]
3156 unsafe fn test_mm256_or_ps() {
3157 let a = _mm256_set1_ps(1.);
3158 let b = _mm256_set1_ps(0.6);
3159 let r = _mm256_or_ps(a, b);
3160 let e = _mm256_set1_ps(1.2);
3161 assert_eq_m256(r, e);
3162 }
3163
3164 #[simd_test(enable = "avx")]
3165 unsafe fn test_mm256_shuffle_pd() {
3166 let a = _mm256_setr_pd(1., 4., 5., 8.);
3167 let b = _mm256_setr_pd(2., 3., 6., 7.);
3168 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3169 let e = _mm256_setr_pd(4., 3., 8., 7.);
3170 assert_eq_m256d(r, e);
3171 }
3172
3173 #[simd_test(enable = "avx")]
3174 unsafe fn test_mm256_shuffle_ps() {
3175 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3176 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3177 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3178 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3179 assert_eq_m256(r, e);
3180 }
3181
3182 #[simd_test(enable = "avx")]
3183 unsafe fn test_mm256_andnot_pd() {
3184 let a = _mm256_set1_pd(0.);
3185 let b = _mm256_set1_pd(0.6);
3186 let r = _mm256_andnot_pd(a, b);
3187 assert_eq_m256d(r, b);
3188 }
3189
3190 #[simd_test(enable = "avx")]
3191 unsafe fn test_mm256_andnot_ps() {
3192 let a = _mm256_set1_ps(0.);
3193 let b = _mm256_set1_ps(0.6);
3194 let r = _mm256_andnot_ps(a, b);
3195 assert_eq_m256(r, b);
3196 }
3197
3198 #[simd_test(enable = "avx")]
3199 unsafe fn test_mm256_max_pd() {
3200 let a = _mm256_setr_pd(1., 4., 5., 8.);
3201 let b = _mm256_setr_pd(2., 3., 6., 7.);
3202 let r = _mm256_max_pd(a, b);
3203 let e = _mm256_setr_pd(2., 4., 6., 8.);
3204 assert_eq_m256d(r, e);
3205 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3208 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3209 let wu: [u64; 4] = transmute(w);
3210 let xu: [u64; 4] = transmute(x);
3211 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3212 assert_eq!(xu, [0u64; 4]);
3213 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3217 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3218 let yf: [f64; 4] = transmute(y);
3219 let zf: [f64; 4] = transmute(z);
3220 assert_eq!(yf, [0.0; 4]);
3221 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3222 }
3223
3224 #[simd_test(enable = "avx")]
3225 unsafe fn test_mm256_max_ps() {
3226 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3227 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3228 let r = _mm256_max_ps(a, b);
3229 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3230 assert_eq_m256(r, e);
3231 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3234 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3235 let wu: [u32; 8] = transmute(w);
3236 let xu: [u32; 8] = transmute(x);
3237 assert_eq!(wu, [0x8000_0000u32; 8]);
3238 assert_eq!(xu, [0u32; 8]);
3239 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3243 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3244 let yf: [f32; 8] = transmute(y);
3245 let zf: [f32; 8] = transmute(z);
3246 assert_eq!(yf, [0.0; 8]);
3247 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3248 }
3249
3250 #[simd_test(enable = "avx")]
3251 unsafe fn test_mm256_min_pd() {
3252 let a = _mm256_setr_pd(1., 4., 5., 8.);
3253 let b = _mm256_setr_pd(2., 3., 6., 7.);
3254 let r = _mm256_min_pd(a, b);
3255 let e = _mm256_setr_pd(1., 3., 5., 7.);
3256 assert_eq_m256d(r, e);
3257 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3260 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3261 let wu: [u64; 4] = transmute(w);
3262 let xu: [u64; 4] = transmute(x);
3263 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3264 assert_eq!(xu, [0u64; 4]);
3265 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3269 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3270 let yf: [f64; 4] = transmute(y);
3271 let zf: [f64; 4] = transmute(z);
3272 assert_eq!(yf, [0.0; 4]);
3273 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3274 }
3275
3276 #[simd_test(enable = "avx")]
3277 unsafe fn test_mm256_min_ps() {
3278 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3279 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3280 let r = _mm256_min_ps(a, b);
3281 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3282 assert_eq_m256(r, e);
3283 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3286 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3287 let wu: [u32; 8] = transmute(w);
3288 let xu: [u32; 8] = transmute(x);
3289 assert_eq!(wu, [0x8000_0000u32; 8]);
3290 assert_eq!(xu, [0u32; 8]);
3291 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3295 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3296 let yf: [f32; 8] = transmute(y);
3297 let zf: [f32; 8] = transmute(z);
3298 assert_eq!(yf, [0.0; 8]);
3299 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3300 }
3301
3302 #[simd_test(enable = "avx")]
3303 unsafe fn test_mm256_mul_pd() {
3304 let a = _mm256_setr_pd(1., 2., 3., 4.);
3305 let b = _mm256_setr_pd(5., 6., 7., 8.);
3306 let r = _mm256_mul_pd(a, b);
3307 let e = _mm256_setr_pd(5., 12., 21., 32.);
3308 assert_eq_m256d(r, e);
3309 }
3310
3311 #[simd_test(enable = "avx")]
3312 unsafe fn test_mm256_mul_ps() {
3313 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3314 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3315 let r = _mm256_mul_ps(a, b);
3316 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3317 assert_eq_m256(r, e);
3318 }
3319
3320 #[simd_test(enable = "avx")]
3321 unsafe fn test_mm256_addsub_pd() {
3322 let a = _mm256_setr_pd(1., 2., 3., 4.);
3323 let b = _mm256_setr_pd(5., 6., 7., 8.);
3324 let r = _mm256_addsub_pd(a, b);
3325 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3326 assert_eq_m256d(r, e);
3327 }
3328
3329 #[simd_test(enable = "avx")]
3330 unsafe fn test_mm256_addsub_ps() {
3331 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3332 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3333 let r = _mm256_addsub_ps(a, b);
3334 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3335 assert_eq_m256(r, e);
3336 }
3337
3338 #[simd_test(enable = "avx")]
3339 unsafe fn test_mm256_sub_pd() {
3340 let a = _mm256_setr_pd(1., 2., 3., 4.);
3341 let b = _mm256_setr_pd(5., 6., 7., 8.);
3342 let r = _mm256_sub_pd(a, b);
3343 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3344 assert_eq_m256d(r, e);
3345 }
3346
3347 #[simd_test(enable = "avx")]
3348 unsafe fn test_mm256_sub_ps() {
3349 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3350 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3351 let r = _mm256_sub_ps(a, b);
3352 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3353 assert_eq_m256(r, e);
3354 }
3355
3356 #[simd_test(enable = "avx")]
3357 unsafe fn test_mm256_round_pd() {
3358 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3359 let result_closest = _mm256_round_pd::<0b0000>(a);
3360 let result_down = _mm256_round_pd::<0b0001>(a);
3361 let result_up = _mm256_round_pd::<0b0010>(a);
3362 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3363 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3364 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3365 assert_eq_m256d(result_closest, expected_closest);
3366 assert_eq_m256d(result_down, expected_down);
3367 assert_eq_m256d(result_up, expected_up);
3368 }
3369
3370 #[simd_test(enable = "avx")]
3371 unsafe fn test_mm256_floor_pd() {
3372 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3373 let result_down = _mm256_floor_pd(a);
3374 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3375 assert_eq_m256d(result_down, expected_down);
3376 }
3377
3378 #[simd_test(enable = "avx")]
3379 unsafe fn test_mm256_ceil_pd() {
3380 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3381 let result_up = _mm256_ceil_pd(a);
3382 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3383 assert_eq_m256d(result_up, expected_up);
3384 }
3385
3386 #[simd_test(enable = "avx")]
3387 unsafe fn test_mm256_round_ps() {
3388 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3389 let result_closest = _mm256_round_ps::<0b0000>(a);
3390 let result_down = _mm256_round_ps::<0b0001>(a);
3391 let result_up = _mm256_round_ps::<0b0010>(a);
3392 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3393 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3394 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3395 assert_eq_m256(result_closest, expected_closest);
3396 assert_eq_m256(result_down, expected_down);
3397 assert_eq_m256(result_up, expected_up);
3398 }
3399
3400 #[simd_test(enable = "avx")]
3401 unsafe fn test_mm256_floor_ps() {
3402 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3403 let result_down = _mm256_floor_ps(a);
3404 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3405 assert_eq_m256(result_down, expected_down);
3406 }
3407
3408 #[simd_test(enable = "avx")]
3409 unsafe fn test_mm256_ceil_ps() {
3410 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3411 let result_up = _mm256_ceil_ps(a);
3412 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3413 assert_eq_m256(result_up, expected_up);
3414 }
3415
3416 #[simd_test(enable = "avx")]
3417 unsafe fn test_mm256_sqrt_pd() {
3418 let a = _mm256_setr_pd(4., 9., 16., 25.);
3419 let r = _mm256_sqrt_pd(a);
3420 let e = _mm256_setr_pd(2., 3., 4., 5.);
3421 assert_eq_m256d(r, e);
3422 }
3423
3424 #[simd_test(enable = "avx")]
3425 unsafe fn test_mm256_sqrt_ps() {
3426 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3427 let r = _mm256_sqrt_ps(a);
3428 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3429 assert_eq_m256(r, e);
3430 }
3431
3432 #[simd_test(enable = "avx")]
3433 unsafe fn test_mm256_div_ps() {
3434 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3435 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3436 let r = _mm256_div_ps(a, b);
3437 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3438 assert_eq_m256(r, e);
3439 }
3440
3441 #[simd_test(enable = "avx")]
3442 unsafe fn test_mm256_div_pd() {
3443 let a = _mm256_setr_pd(4., 9., 16., 25.);
3444 let b = _mm256_setr_pd(4., 3., 2., 5.);
3445 let r = _mm256_div_pd(a, b);
3446 let e = _mm256_setr_pd(1., 3., 8., 5.);
3447 assert_eq_m256d(r, e);
3448 }
3449
3450 #[simd_test(enable = "avx")]
3451 unsafe fn test_mm256_blend_pd() {
3452 let a = _mm256_setr_pd(4., 9., 16., 25.);
3453 let b = _mm256_setr_pd(4., 3., 2., 5.);
3454 let r = _mm256_blend_pd::<0x0>(a, b);
3455 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3456 let r = _mm256_blend_pd::<0x3>(a, b);
3457 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3458 let r = _mm256_blend_pd::<0xF>(a, b);
3459 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3460 }
3461
3462 #[simd_test(enable = "avx")]
3463 unsafe fn test_mm256_blend_ps() {
3464 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3465 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3466 let r = _mm256_blend_ps::<0x0>(a, b);
3467 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3468 let r = _mm256_blend_ps::<0x3>(a, b);
3469 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3470 let r = _mm256_blend_ps::<0xF>(a, b);
3471 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3472 }
3473
3474 #[simd_test(enable = "avx")]
3475 unsafe fn test_mm256_blendv_pd() {
3476 let a = _mm256_setr_pd(4., 9., 16., 25.);
3477 let b = _mm256_setr_pd(4., 3., 2., 5.);
3478 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3479 let r = _mm256_blendv_pd(a, b, c);
3480 let e = _mm256_setr_pd(4., 9., 2., 5.);
3481 assert_eq_m256d(r, e);
3482 }
3483
3484 #[simd_test(enable = "avx")]
3485 unsafe fn test_mm256_blendv_ps() {
3486 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3487 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3488 #[rustfmt::skip]
3489 let c = _mm256_setr_ps(
3490 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3491 );
3492 let r = _mm256_blendv_ps(a, b, c);
3493 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3494 assert_eq_m256(r, e);
3495 }
3496
3497 #[simd_test(enable = "avx")]
3498 unsafe fn test_mm256_dp_ps() {
3499 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3500 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3501 let r = _mm256_dp_ps::<0xFF>(a, b);
3502 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3503 assert_eq_m256(r, e);
3504 }
3505
3506 #[simd_test(enable = "avx")]
3507 unsafe fn test_mm256_hadd_pd() {
3508 let a = _mm256_setr_pd(4., 9., 16., 25.);
3509 let b = _mm256_setr_pd(4., 3., 2., 5.);
3510 let r = _mm256_hadd_pd(a, b);
3511 let e = _mm256_setr_pd(13., 7., 41., 7.);
3512 assert_eq_m256d(r, e);
3513
3514 let a = _mm256_setr_pd(1., 2., 3., 4.);
3515 let b = _mm256_setr_pd(5., 6., 7., 8.);
3516 let r = _mm256_hadd_pd(a, b);
3517 let e = _mm256_setr_pd(3., 11., 7., 15.);
3518 assert_eq_m256d(r, e);
3519 }
3520
3521 #[simd_test(enable = "avx")]
3522 unsafe fn test_mm256_hadd_ps() {
3523 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3524 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3525 let r = _mm256_hadd_ps(a, b);
3526 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3527 assert_eq_m256(r, e);
3528
3529 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3530 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3531 let r = _mm256_hadd_ps(a, b);
3532 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3533 assert_eq_m256(r, e);
3534 }
3535
3536 #[simd_test(enable = "avx")]
3537 unsafe fn test_mm256_hsub_pd() {
3538 let a = _mm256_setr_pd(4., 9., 16., 25.);
3539 let b = _mm256_setr_pd(4., 3., 2., 5.);
3540 let r = _mm256_hsub_pd(a, b);
3541 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3542 assert_eq_m256d(r, e);
3543
3544 let a = _mm256_setr_pd(1., 2., 3., 4.);
3545 let b = _mm256_setr_pd(5., 6., 7., 8.);
3546 let r = _mm256_hsub_pd(a, b);
3547 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3548 assert_eq_m256d(r, e);
3549 }
3550
3551 #[simd_test(enable = "avx")]
3552 unsafe fn test_mm256_hsub_ps() {
3553 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3554 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3555 let r = _mm256_hsub_ps(a, b);
3556 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3557 assert_eq_m256(r, e);
3558
3559 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3560 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3561 let r = _mm256_hsub_ps(a, b);
3562 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3563 assert_eq_m256(r, e);
3564 }
3565
3566 #[simd_test(enable = "avx")]
3567 unsafe fn test_mm256_xor_pd() {
3568 let a = _mm256_setr_pd(4., 9., 16., 25.);
3569 let b = _mm256_set1_pd(0.);
3570 let r = _mm256_xor_pd(a, b);
3571 assert_eq_m256d(r, a);
3572 }
3573
3574 #[simd_test(enable = "avx")]
3575 unsafe fn test_mm256_xor_ps() {
3576 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3577 let b = _mm256_set1_ps(0.);
3578 let r = _mm256_xor_ps(a, b);
3579 assert_eq_m256(r, a);
3580 }
3581
3582 #[simd_test(enable = "avx")]
3583 unsafe fn test_mm_cmp_pd() {
3584 let a = _mm_setr_pd(4., 9.);
3585 let b = _mm_setr_pd(4., 3.);
3586 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3587 assert!(get_m128d(r, 0).is_nan());
3588 assert!(get_m128d(r, 1).is_nan());
3589 }
3590
3591 #[simd_test(enable = "avx")]
3592 unsafe fn test_mm256_cmp_pd() {
3593 let a = _mm256_setr_pd(1., 2., 3., 4.);
3594 let b = _mm256_setr_pd(5., 6., 7., 8.);
3595 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3596 let e = _mm256_set1_pd(0.);
3597 assert_eq_m256d(r, e);
3598 }
3599
3600 #[simd_test(enable = "avx")]
3601 unsafe fn test_mm_cmp_ps() {
3602 let a = _mm_setr_ps(4., 3., 2., 5.);
3603 let b = _mm_setr_ps(4., 9., 16., 25.);
3604 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3605 assert!(get_m128(r, 0).is_nan());
3606 assert_eq!(get_m128(r, 1), 0.);
3607 assert_eq!(get_m128(r, 2), 0.);
3608 assert_eq!(get_m128(r, 3), 0.);
3609 }
3610
3611 #[simd_test(enable = "avx")]
3612 unsafe fn test_mm256_cmp_ps() {
3613 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3614 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3615 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3616 let e = _mm256_set1_ps(0.);
3617 assert_eq_m256(r, e);
3618 }
3619
3620 #[simd_test(enable = "avx")]
3621 unsafe fn test_mm_cmp_sd() {
3622 let a = _mm_setr_pd(4., 9.);
3623 let b = _mm_setr_pd(4., 3.);
3624 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3625 assert!(get_m128d(r, 0).is_nan());
3626 assert_eq!(get_m128d(r, 1), 9.);
3627 }
3628
3629 #[simd_test(enable = "avx")]
3630 unsafe fn test_mm_cmp_ss() {
3631 let a = _mm_setr_ps(4., 3., 2., 5.);
3632 let b = _mm_setr_ps(4., 9., 16., 25.);
3633 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3634 assert!(get_m128(r, 0).is_nan());
3635 assert_eq!(get_m128(r, 1), 3.);
3636 assert_eq!(get_m128(r, 2), 2.);
3637 assert_eq!(get_m128(r, 3), 5.);
3638 }
3639
3640 #[simd_test(enable = "avx")]
3641 unsafe fn test_mm256_cvtepi32_pd() {
3642 let a = _mm_setr_epi32(4, 9, 16, 25);
3643 let r = _mm256_cvtepi32_pd(a);
3644 let e = _mm256_setr_pd(4., 9., 16., 25.);
3645 assert_eq_m256d(r, e);
3646 }
3647
3648 #[simd_test(enable = "avx")]
3649 unsafe fn test_mm256_cvtepi32_ps() {
3650 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3651 let r = _mm256_cvtepi32_ps(a);
3652 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3653 assert_eq_m256(r, e);
3654 }
3655
3656 #[simd_test(enable = "avx")]
3657 unsafe fn test_mm256_cvtpd_ps() {
3658 let a = _mm256_setr_pd(4., 9., 16., 25.);
3659 let r = _mm256_cvtpd_ps(a);
3660 let e = _mm_setr_ps(4., 9., 16., 25.);
3661 assert_eq_m128(r, e);
3662 }
3663
3664 #[simd_test(enable = "avx")]
3665 unsafe fn test_mm256_cvtps_epi32() {
3666 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3667 let r = _mm256_cvtps_epi32(a);
3668 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3669 assert_eq_m256i(r, e);
3670 }
3671
3672 #[simd_test(enable = "avx")]
3673 unsafe fn test_mm256_cvtps_pd() {
3674 let a = _mm_setr_ps(4., 9., 16., 25.);
3675 let r = _mm256_cvtps_pd(a);
3676 let e = _mm256_setr_pd(4., 9., 16., 25.);
3677 assert_eq_m256d(r, e);
3678 }
3679
3680 #[simd_test(enable = "avx")]
3681 unsafe fn test_mm256_cvtsd_f64() {
3682 let a = _mm256_setr_pd(1., 2., 3., 4.);
3683 let r = _mm256_cvtsd_f64(a);
3684 assert_eq!(r, 1.);
3685 }
3686
3687 #[simd_test(enable = "avx")]
3688 unsafe fn test_mm256_cvttpd_epi32() {
3689 let a = _mm256_setr_pd(4., 9., 16., 25.);
3690 let r = _mm256_cvttpd_epi32(a);
3691 let e = _mm_setr_epi32(4, 9, 16, 25);
3692 assert_eq_m128i(r, e);
3693 }
3694
3695 #[simd_test(enable = "avx")]
3696 unsafe fn test_mm256_cvtpd_epi32() {
3697 let a = _mm256_setr_pd(4., 9., 16., 25.);
3698 let r = _mm256_cvtpd_epi32(a);
3699 let e = _mm_setr_epi32(4, 9, 16, 25);
3700 assert_eq_m128i(r, e);
3701 }
3702
3703 #[simd_test(enable = "avx")]
3704 unsafe fn test_mm256_cvttps_epi32() {
3705 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3706 let r = _mm256_cvttps_epi32(a);
3707 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3708 assert_eq_m256i(r, e);
3709 }
3710
3711 #[simd_test(enable = "avx")]
3712 unsafe fn test_mm256_extractf128_ps() {
3713 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3714 let r = _mm256_extractf128_ps::<0>(a);
3715 let e = _mm_setr_ps(4., 3., 2., 5.);
3716 assert_eq_m128(r, e);
3717 }
3718
3719 #[simd_test(enable = "avx")]
3720 unsafe fn test_mm256_extractf128_pd() {
3721 let a = _mm256_setr_pd(4., 3., 2., 5.);
3722 let r = _mm256_extractf128_pd::<0>(a);
3723 let e = _mm_setr_pd(4., 3.);
3724 assert_eq_m128d(r, e);
3725 }
3726
3727 #[simd_test(enable = "avx")]
3728 unsafe fn test_mm256_extractf128_si256() {
3729 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3730 let r = _mm256_extractf128_si256::<0>(a);
3731 let e = _mm_setr_epi64x(4, 3);
3732 assert_eq_m128i(r, e);
3733 }
3734
3735 #[simd_test(enable = "avx")]
3736 unsafe fn test_mm256_extract_epi32() {
3737 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3738 let r1 = _mm256_extract_epi32::<0>(a);
3739 let r2 = _mm256_extract_epi32::<3>(a);
3740 assert_eq!(r1, -1);
3741 assert_eq!(r2, 3);
3742 }
3743
3744 #[simd_test(enable = "avx")]
3745 unsafe fn test_mm256_cvtsi256_si32() {
3746 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3747 let r = _mm256_cvtsi256_si32(a);
3748 assert_eq!(r, 1);
3749 }
3750
3751 #[simd_test(enable = "avx")]
3752 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroall() {
3754 _mm256_zeroall();
3755 }
3756
3757 #[simd_test(enable = "avx")]
3758 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_zeroupper() {
3760 _mm256_zeroupper();
3761 }
3762
3763 #[simd_test(enable = "avx")]
3764 unsafe fn test_mm256_permutevar_ps() {
3765 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3766 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3767 let r = _mm256_permutevar_ps(a, b);
3768 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3769 assert_eq_m256(r, e);
3770 }
3771
3772 #[simd_test(enable = "avx")]
3773 unsafe fn test_mm_permutevar_ps() {
3774 let a = _mm_setr_ps(4., 3., 2., 5.);
3775 let b = _mm_setr_epi32(1, 2, 3, 4);
3776 let r = _mm_permutevar_ps(a, b);
3777 let e = _mm_setr_ps(3., 2., 5., 4.);
3778 assert_eq_m128(r, e);
3779 }
3780
3781 #[simd_test(enable = "avx")]
3782 unsafe fn test_mm256_permute_ps() {
3783 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3784 let r = _mm256_permute_ps::<0x1b>(a);
3785 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3786 assert_eq_m256(r, e);
3787 }
3788
3789 #[simd_test(enable = "avx")]
3790 unsafe fn test_mm_permute_ps() {
3791 let a = _mm_setr_ps(4., 3., 2., 5.);
3792 let r = _mm_permute_ps::<0x1b>(a);
3793 let e = _mm_setr_ps(5., 2., 3., 4.);
3794 assert_eq_m128(r, e);
3795 }
3796
3797 #[simd_test(enable = "avx")]
3798 unsafe fn test_mm256_permutevar_pd() {
3799 let a = _mm256_setr_pd(4., 3., 2., 5.);
3800 let b = _mm256_setr_epi64x(1, 2, 3, 4);
3801 let r = _mm256_permutevar_pd(a, b);
3802 let e = _mm256_setr_pd(4., 3., 5., 2.);
3803 assert_eq_m256d(r, e);
3804 }
3805
3806 #[simd_test(enable = "avx")]
3807 unsafe fn test_mm_permutevar_pd() {
3808 let a = _mm_setr_pd(4., 3.);
3809 let b = _mm_setr_epi64x(3, 0);
3810 let r = _mm_permutevar_pd(a, b);
3811 let e = _mm_setr_pd(3., 4.);
3812 assert_eq_m128d(r, e);
3813 }
3814
3815 #[simd_test(enable = "avx")]
3816 unsafe fn test_mm256_permute_pd() {
3817 let a = _mm256_setr_pd(4., 3., 2., 5.);
3818 let r = _mm256_permute_pd::<5>(a);
3819 let e = _mm256_setr_pd(3., 4., 5., 2.);
3820 assert_eq_m256d(r, e);
3821 }
3822
3823 #[simd_test(enable = "avx")]
3824 unsafe fn test_mm_permute_pd() {
3825 let a = _mm_setr_pd(4., 3.);
3826 let r = _mm_permute_pd::<1>(a);
3827 let e = _mm_setr_pd(3., 4.);
3828 assert_eq_m128d(r, e);
3829 }
3830
3831 #[simd_test(enable = "avx")]
3832 unsafe fn test_mm256_permute2f128_ps() {
3833 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3834 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3835 let r = _mm256_permute2f128_ps::<0x13>(a, b);
3836 let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3837 assert_eq_m256(r, e);
3838 }
3839
3840 #[simd_test(enable = "avx")]
3841 unsafe fn test_mm256_permute2f128_pd() {
3842 let a = _mm256_setr_pd(1., 2., 3., 4.);
3843 let b = _mm256_setr_pd(5., 6., 7., 8.);
3844 let r = _mm256_permute2f128_pd::<0x31>(a, b);
3845 let e = _mm256_setr_pd(3., 4., 7., 8.);
3846 assert_eq_m256d(r, e);
3847 }
3848
3849 #[simd_test(enable = "avx")]
3850 unsafe fn test_mm256_permute2f128_si256() {
3851 let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3852 let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3853 let r = _mm256_permute2f128_si256::<0x20>(a, b);
3854 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3855 assert_eq_m256i(r, e);
3856 }
3857
3858 #[simd_test(enable = "avx")]
3859 unsafe fn test_mm256_broadcast_ss() {
3860 let r = _mm256_broadcast_ss(&3.);
3861 let e = _mm256_set1_ps(3.);
3862 assert_eq_m256(r, e);
3863 }
3864
3865 #[simd_test(enable = "avx")]
3866 unsafe fn test_mm_broadcast_ss() {
3867 let r = _mm_broadcast_ss(&3.);
3868 let e = _mm_set1_ps(3.);
3869 assert_eq_m128(r, e);
3870 }
3871
3872 #[simd_test(enable = "avx")]
3873 unsafe fn test_mm256_broadcast_sd() {
3874 let r = _mm256_broadcast_sd(&3.);
3875 let e = _mm256_set1_pd(3.);
3876 assert_eq_m256d(r, e);
3877 }
3878
3879 #[simd_test(enable = "avx")]
3880 unsafe fn test_mm256_broadcast_ps() {
3881 let a = _mm_setr_ps(4., 3., 2., 5.);
3882 let r = _mm256_broadcast_ps(&a);
3883 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3884 assert_eq_m256(r, e);
3885 }
3886
3887 #[simd_test(enable = "avx")]
3888 unsafe fn test_mm256_broadcast_pd() {
3889 let a = _mm_setr_pd(4., 3.);
3890 let r = _mm256_broadcast_pd(&a);
3891 let e = _mm256_setr_pd(4., 3., 4., 3.);
3892 assert_eq_m256d(r, e);
3893 }
3894
3895 #[simd_test(enable = "avx")]
3896 unsafe fn test_mm256_insertf128_ps() {
3897 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3898 let b = _mm_setr_ps(4., 9., 16., 25.);
3899 let r = _mm256_insertf128_ps::<0>(a, b);
3900 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3901 assert_eq_m256(r, e);
3902 }
3903
3904 #[simd_test(enable = "avx")]
3905 unsafe fn test_mm256_insertf128_pd() {
3906 let a = _mm256_setr_pd(1., 2., 3., 4.);
3907 let b = _mm_setr_pd(5., 6.);
3908 let r = _mm256_insertf128_pd::<0>(a, b);
3909 let e = _mm256_setr_pd(5., 6., 3., 4.);
3910 assert_eq_m256d(r, e);
3911 }
3912
3913 #[simd_test(enable = "avx")]
3914 unsafe fn test_mm256_insertf128_si256() {
3915 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3916 let b = _mm_setr_epi64x(5, 6);
3917 let r = _mm256_insertf128_si256::<0>(a, b);
3918 let e = _mm256_setr_epi64x(5, 6, 3, 4);
3919 assert_eq_m256i(r, e);
3920 }
3921
3922 #[simd_test(enable = "avx")]
3923 unsafe fn test_mm256_insert_epi8() {
3924 #[rustfmt::skip]
3925 let a = _mm256_setr_epi8(
3926 1, 2, 3, 4, 5, 6, 7, 8,
3927 9, 10, 11, 12, 13, 14, 15, 16,
3928 17, 18, 19, 20, 21, 22, 23, 24,
3929 25, 26, 27, 28, 29, 30, 31, 32,
3930 );
3931 let r = _mm256_insert_epi8::<31>(a, 0);
3932 #[rustfmt::skip]
3933 let e = _mm256_setr_epi8(
3934 1, 2, 3, 4, 5, 6, 7, 8,
3935 9, 10, 11, 12, 13, 14, 15, 16,
3936 17, 18, 19, 20, 21, 22, 23, 24,
3937 25, 26, 27, 28, 29, 30, 31, 0,
3938 );
3939 assert_eq_m256i(r, e);
3940 }
3941
3942 #[simd_test(enable = "avx")]
3943 unsafe fn test_mm256_insert_epi16() {
3944 #[rustfmt::skip]
3945 let a = _mm256_setr_epi16(
3946 0, 1, 2, 3, 4, 5, 6, 7,
3947 8, 9, 10, 11, 12, 13, 14, 15,
3948 );
3949 let r = _mm256_insert_epi16::<15>(a, 0);
3950 #[rustfmt::skip]
3951 let e = _mm256_setr_epi16(
3952 0, 1, 2, 3, 4, 5, 6, 7,
3953 8, 9, 10, 11, 12, 13, 14, 0,
3954 );
3955 assert_eq_m256i(r, e);
3956 }
3957
3958 #[simd_test(enable = "avx")]
3959 unsafe fn test_mm256_insert_epi32() {
3960 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3961 let r = _mm256_insert_epi32::<7>(a, 0);
3962 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
3963 assert_eq_m256i(r, e);
3964 }
3965
3966 #[simd_test(enable = "avx")]
3967 unsafe fn test_mm256_load_pd() {
3968 let a = _mm256_setr_pd(1., 2., 3., 4.);
3969 let p = ptr::addr_of!(a) as *const f64;
3970 let r = _mm256_load_pd(p);
3971 let e = _mm256_setr_pd(1., 2., 3., 4.);
3972 assert_eq_m256d(r, e);
3973 }
3974
3975 #[simd_test(enable = "avx")]
3976 unsafe fn test_mm256_store_pd() {
3977 let a = _mm256_setr_pd(1., 2., 3., 4.);
3978 let mut r = _mm256_undefined_pd();
3979 _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
3980 assert_eq_m256d(r, a);
3981 }
3982
3983 #[simd_test(enable = "avx")]
3984 unsafe fn test_mm256_load_ps() {
3985 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3986 let p = ptr::addr_of!(a) as *const f32;
3987 let r = _mm256_load_ps(p);
3988 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3989 assert_eq_m256(r, e);
3990 }
3991
3992 #[simd_test(enable = "avx")]
3993 unsafe fn test_mm256_store_ps() {
3994 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3995 let mut r = _mm256_undefined_ps();
3996 _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
3997 assert_eq_m256(r, a);
3998 }
3999
4000 #[simd_test(enable = "avx")]
4001 unsafe fn test_mm256_loadu_pd() {
4002 let a = &[1.0f64, 2., 3., 4.];
4003 let p = a.as_ptr();
4004 let r = _mm256_loadu_pd(black_box(p));
4005 let e = _mm256_setr_pd(1., 2., 3., 4.);
4006 assert_eq_m256d(r, e);
4007 }
4008
4009 #[simd_test(enable = "avx")]
4010 unsafe fn test_mm256_storeu_pd() {
4011 let a = _mm256_set1_pd(9.);
4012 let mut r = _mm256_undefined_pd();
4013 _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4014 assert_eq_m256d(r, a);
4015 }
4016
4017 #[simd_test(enable = "avx")]
4018 unsafe fn test_mm256_loadu_ps() {
4019 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
4020 let p = a.as_ptr();
4021 let r = _mm256_loadu_ps(black_box(p));
4022 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4023 assert_eq_m256(r, e);
4024 }
4025
4026 #[simd_test(enable = "avx")]
4027 unsafe fn test_mm256_storeu_ps() {
4028 let a = _mm256_set1_ps(9.);
4029 let mut r = _mm256_undefined_ps();
4030 _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4031 assert_eq_m256(r, a);
4032 }
4033
4034 #[simd_test(enable = "avx")]
4035 unsafe fn test_mm256_load_si256() {
4036 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4037 let p = ptr::addr_of!(a);
4038 let r = _mm256_load_si256(p);
4039 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4040 assert_eq_m256i(r, e);
4041 }
4042
4043 #[simd_test(enable = "avx")]
4044 unsafe fn test_mm256_store_si256() {
4045 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4046 let mut r = _mm256_undefined_si256();
4047 _mm256_store_si256(ptr::addr_of_mut!(r), a);
4048 assert_eq_m256i(r, a);
4049 }
4050
4051 #[simd_test(enable = "avx")]
4052 unsafe fn test_mm256_loadu_si256() {
4053 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4054 let p = ptr::addr_of!(a);
4055 let r = _mm256_loadu_si256(black_box(p));
4056 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4057 assert_eq_m256i(r, e);
4058 }
4059
4060 #[simd_test(enable = "avx")]
4061 unsafe fn test_mm256_storeu_si256() {
4062 let a = _mm256_set1_epi8(9);
4063 let mut r = _mm256_undefined_si256();
4064 _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4065 assert_eq_m256i(r, a);
4066 }
4067
4068 #[simd_test(enable = "avx")]
4069 unsafe fn test_mm256_maskload_pd() {
4070 let a = &[1.0f64, 2., 3., 4.];
4071 let p = a.as_ptr();
4072 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4073 let r = _mm256_maskload_pd(black_box(p), mask);
4074 let e = _mm256_setr_pd(0., 2., 0., 4.);
4075 assert_eq_m256d(r, e);
4076 }
4077
4078 #[simd_test(enable = "avx")]
4079 unsafe fn test_mm256_maskstore_pd() {
4080 let mut r = _mm256_set1_pd(0.);
4081 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4082 let a = _mm256_setr_pd(1., 2., 3., 4.);
4083 _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4084 let e = _mm256_setr_pd(0., 2., 0., 4.);
4085 assert_eq_m256d(r, e);
4086 }
4087
4088 #[simd_test(enable = "avx")]
4089 unsafe fn test_mm_maskload_pd() {
4090 let a = &[1.0f64, 2.];
4091 let p = a.as_ptr();
4092 let mask = _mm_setr_epi64x(0, !0);
4093 let r = _mm_maskload_pd(black_box(p), mask);
4094 let e = _mm_setr_pd(0., 2.);
4095 assert_eq_m128d(r, e);
4096 }
4097
4098 #[simd_test(enable = "avx")]
4099 unsafe fn test_mm_maskstore_pd() {
4100 let mut r = _mm_set1_pd(0.);
4101 let mask = _mm_setr_epi64x(0, !0);
4102 let a = _mm_setr_pd(1., 2.);
4103 _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4104 let e = _mm_setr_pd(0., 2.);
4105 assert_eq_m128d(r, e);
4106 }
4107
4108 #[simd_test(enable = "avx")]
4109 unsafe fn test_mm256_maskload_ps() {
4110 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4111 let p = a.as_ptr();
4112 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4113 let r = _mm256_maskload_ps(black_box(p), mask);
4114 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4115 assert_eq_m256(r, e);
4116 }
4117
4118 #[simd_test(enable = "avx")]
4119 unsafe fn test_mm256_maskstore_ps() {
4120 let mut r = _mm256_set1_ps(0.);
4121 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4122 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4123 _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4124 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4125 assert_eq_m256(r, e);
4126 }
4127
4128 #[simd_test(enable = "avx")]
4129 unsafe fn test_mm_maskload_ps() {
4130 let a = &[1.0f32, 2., 3., 4.];
4131 let p = a.as_ptr();
4132 let mask = _mm_setr_epi32(0, !0, 0, !0);
4133 let r = _mm_maskload_ps(black_box(p), mask);
4134 let e = _mm_setr_ps(0., 2., 0., 4.);
4135 assert_eq_m128(r, e);
4136 }
4137
4138 #[simd_test(enable = "avx")]
4139 unsafe fn test_mm_maskstore_ps() {
4140 let mut r = _mm_set1_ps(0.);
4141 let mask = _mm_setr_epi32(0, !0, 0, !0);
4142 let a = _mm_setr_ps(1., 2., 3., 4.);
4143 _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4144 let e = _mm_setr_ps(0., 2., 0., 4.);
4145 assert_eq_m128(r, e);
4146 }
4147
4148 #[simd_test(enable = "avx")]
4149 unsafe fn test_mm256_movehdup_ps() {
4150 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4151 let r = _mm256_movehdup_ps(a);
4152 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4153 assert_eq_m256(r, e);
4154 }
4155
4156 #[simd_test(enable = "avx")]
4157 unsafe fn test_mm256_moveldup_ps() {
4158 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4159 let r = _mm256_moveldup_ps(a);
4160 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4161 assert_eq_m256(r, e);
4162 }
4163
4164 #[simd_test(enable = "avx")]
4165 unsafe fn test_mm256_movedup_pd() {
4166 let a = _mm256_setr_pd(1., 2., 3., 4.);
4167 let r = _mm256_movedup_pd(a);
4168 let e = _mm256_setr_pd(1., 1., 3., 3.);
4169 assert_eq_m256d(r, e);
4170 }
4171
4172 #[simd_test(enable = "avx")]
4173 unsafe fn test_mm256_lddqu_si256() {
4174 #[rustfmt::skip]
4175 let a = _mm256_setr_epi8(
4176 1, 2, 3, 4, 5, 6, 7, 8,
4177 9, 10, 11, 12, 13, 14, 15, 16,
4178 17, 18, 19, 20, 21, 22, 23, 24,
4179 25, 26, 27, 28, 29, 30, 31, 32,
4180 );
4181 let p = ptr::addr_of!(a);
4182 let r = _mm256_lddqu_si256(black_box(p));
4183 #[rustfmt::skip]
4184 let e = _mm256_setr_epi8(
4185 1, 2, 3, 4, 5, 6, 7, 8,
4186 9, 10, 11, 12, 13, 14, 15, 16,
4187 17, 18, 19, 20, 21, 22, 23, 24,
4188 25, 26, 27, 28, 29, 30, 31, 32,
4189 );
4190 assert_eq_m256i(r, e);
4191 }
4192
4193 #[simd_test(enable = "avx")]
4194 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_si256() {
4196 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4197 let mut r = _mm256_undefined_si256();
4198 _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4199 assert_eq_m256i(r, a);
4200 }
4201
4202 #[simd_test(enable = "avx")]
4203 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_pd() {
4205 #[repr(align(32))]
4206 struct Memory {
4207 pub data: [f64; 4],
4208 }
4209 let a = _mm256_set1_pd(7.0);
4210 let mut mem = Memory { data: [-1.0; 4] };
4211
4212 _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4213 for i in 0..4 {
4214 assert_eq!(mem.data[i], get_m256d(a, i));
4215 }
4216 }
4217
4218 #[simd_test(enable = "avx")]
4219 #[cfg_attr(miri, ignore)] unsafe fn test_mm256_stream_ps() {
4221 #[repr(align(32))]
4222 struct Memory {
4223 pub data: [f32; 8],
4224 }
4225 let a = _mm256_set1_ps(7.0);
4226 let mut mem = Memory { data: [-1.0; 8] };
4227
4228 _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4229 for i in 0..8 {
4230 assert_eq!(mem.data[i], get_m256(a, i));
4231 }
4232 }
4233
4234 #[simd_test(enable = "avx")]
4235 unsafe fn test_mm256_rcp_ps() {
4236 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4237 let r = _mm256_rcp_ps(a);
4238 #[rustfmt::skip]
4239 let e = _mm256_setr_ps(
4240 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4241 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4242 );
4243 let rel_err = 0.00048828125;
4244 for i in 0..8 {
4245 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4246 }
4247 }
4248
4249 #[simd_test(enable = "avx")]
4250 unsafe fn test_mm256_rsqrt_ps() {
4251 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4252 let r = _mm256_rsqrt_ps(a);
4253 #[rustfmt::skip]
4254 let e = _mm256_setr_ps(
4255 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4256 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4257 );
4258 let rel_err = 0.00048828125;
4259 for i in 0..8 {
4260 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4261 }
4262 }
4263
4264 #[simd_test(enable = "avx")]
4265 unsafe fn test_mm256_unpackhi_pd() {
4266 let a = _mm256_setr_pd(1., 2., 3., 4.);
4267 let b = _mm256_setr_pd(5., 6., 7., 8.);
4268 let r = _mm256_unpackhi_pd(a, b);
4269 let e = _mm256_setr_pd(2., 6., 4., 8.);
4270 assert_eq_m256d(r, e);
4271 }
4272
4273 #[simd_test(enable = "avx")]
4274 unsafe fn test_mm256_unpackhi_ps() {
4275 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4276 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4277 let r = _mm256_unpackhi_ps(a, b);
4278 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4279 assert_eq_m256(r, e);
4280 }
4281
4282 #[simd_test(enable = "avx")]
4283 unsafe fn test_mm256_unpacklo_pd() {
4284 let a = _mm256_setr_pd(1., 2., 3., 4.);
4285 let b = _mm256_setr_pd(5., 6., 7., 8.);
4286 let r = _mm256_unpacklo_pd(a, b);
4287 let e = _mm256_setr_pd(1., 5., 3., 7.);
4288 assert_eq_m256d(r, e);
4289 }
4290
4291 #[simd_test(enable = "avx")]
4292 unsafe fn test_mm256_unpacklo_ps() {
4293 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4294 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4295 let r = _mm256_unpacklo_ps(a, b);
4296 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4297 assert_eq_m256(r, e);
4298 }
4299
4300 #[simd_test(enable = "avx")]
4301 unsafe fn test_mm256_testz_si256() {
4302 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4303 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4304 let r = _mm256_testz_si256(a, b);
4305 assert_eq!(r, 0);
4306 let b = _mm256_set1_epi64x(0);
4307 let r = _mm256_testz_si256(a, b);
4308 assert_eq!(r, 1);
4309 }
4310
4311 #[simd_test(enable = "avx")]
4312 unsafe fn test_mm256_testc_si256() {
4313 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4314 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4315 let r = _mm256_testc_si256(a, b);
4316 assert_eq!(r, 0);
4317 let b = _mm256_set1_epi64x(0);
4318 let r = _mm256_testc_si256(a, b);
4319 assert_eq!(r, 1);
4320 }
4321
4322 #[simd_test(enable = "avx")]
4323 unsafe fn test_mm256_testnzc_si256() {
4324 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4325 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4326 let r = _mm256_testnzc_si256(a, b);
4327 assert_eq!(r, 1);
4328 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4329 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4330 let r = _mm256_testnzc_si256(a, b);
4331 assert_eq!(r, 0);
4332 }
4333
4334 #[simd_test(enable = "avx")]
4335 unsafe fn test_mm256_testz_pd() {
4336 let a = _mm256_setr_pd(1., 2., 3., 4.);
4337 let b = _mm256_setr_pd(5., 6., 7., 8.);
4338 let r = _mm256_testz_pd(a, b);
4339 assert_eq!(r, 1);
4340 let a = _mm256_set1_pd(-1.);
4341 let r = _mm256_testz_pd(a, a);
4342 assert_eq!(r, 0);
4343 }
4344
4345 #[simd_test(enable = "avx")]
4346 unsafe fn test_mm256_testc_pd() {
4347 let a = _mm256_setr_pd(1., 2., 3., 4.);
4348 let b = _mm256_setr_pd(5., 6., 7., 8.);
4349 let r = _mm256_testc_pd(a, b);
4350 assert_eq!(r, 1);
4351 let a = _mm256_set1_pd(1.);
4352 let b = _mm256_set1_pd(-1.);
4353 let r = _mm256_testc_pd(a, b);
4354 assert_eq!(r, 0);
4355 }
4356
4357 #[simd_test(enable = "avx")]
4358 unsafe fn test_mm256_testnzc_pd() {
4359 let a = _mm256_setr_pd(1., 2., 3., 4.);
4360 let b = _mm256_setr_pd(5., 6., 7., 8.);
4361 let r = _mm256_testnzc_pd(a, b);
4362 assert_eq!(r, 0);
4363 let a = _mm256_setr_pd(1., -1., -1., -1.);
4364 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4365 let r = _mm256_testnzc_pd(a, b);
4366 assert_eq!(r, 1);
4367 }
4368
4369 #[simd_test(enable = "avx")]
4370 unsafe fn test_mm_testz_pd() {
4371 let a = _mm_setr_pd(1., 2.);
4372 let b = _mm_setr_pd(5., 6.);
4373 let r = _mm_testz_pd(a, b);
4374 assert_eq!(r, 1);
4375 let a = _mm_set1_pd(-1.);
4376 let r = _mm_testz_pd(a, a);
4377 assert_eq!(r, 0);
4378 }
4379
4380 #[simd_test(enable = "avx")]
4381 unsafe fn test_mm_testc_pd() {
4382 let a = _mm_setr_pd(1., 2.);
4383 let b = _mm_setr_pd(5., 6.);
4384 let r = _mm_testc_pd(a, b);
4385 assert_eq!(r, 1);
4386 let a = _mm_set1_pd(1.);
4387 let b = _mm_set1_pd(-1.);
4388 let r = _mm_testc_pd(a, b);
4389 assert_eq!(r, 0);
4390 }
4391
4392 #[simd_test(enable = "avx")]
4393 unsafe fn test_mm_testnzc_pd() {
4394 let a = _mm_setr_pd(1., 2.);
4395 let b = _mm_setr_pd(5., 6.);
4396 let r = _mm_testnzc_pd(a, b);
4397 assert_eq!(r, 0);
4398 let a = _mm_setr_pd(1., -1.);
4399 let b = _mm_setr_pd(-1., -1.);
4400 let r = _mm_testnzc_pd(a, b);
4401 assert_eq!(r, 1);
4402 }
4403
4404 #[simd_test(enable = "avx")]
4405 unsafe fn test_mm256_testz_ps() {
4406 let a = _mm256_set1_ps(1.);
4407 let r = _mm256_testz_ps(a, a);
4408 assert_eq!(r, 1);
4409 let a = _mm256_set1_ps(-1.);
4410 let r = _mm256_testz_ps(a, a);
4411 assert_eq!(r, 0);
4412 }
4413
4414 #[simd_test(enable = "avx")]
4415 unsafe fn test_mm256_testc_ps() {
4416 let a = _mm256_set1_ps(1.);
4417 let r = _mm256_testc_ps(a, a);
4418 assert_eq!(r, 1);
4419 let b = _mm256_set1_ps(-1.);
4420 let r = _mm256_testc_ps(a, b);
4421 assert_eq!(r, 0);
4422 }
4423
4424 #[simd_test(enable = "avx")]
4425 unsafe fn test_mm256_testnzc_ps() {
4426 let a = _mm256_set1_ps(1.);
4427 let r = _mm256_testnzc_ps(a, a);
4428 assert_eq!(r, 0);
4429 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4430 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4431 let r = _mm256_testnzc_ps(a, b);
4432 assert_eq!(r, 1);
4433 }
4434
4435 #[simd_test(enable = "avx")]
4436 unsafe fn test_mm_testz_ps() {
4437 let a = _mm_set1_ps(1.);
4438 let r = _mm_testz_ps(a, a);
4439 assert_eq!(r, 1);
4440 let a = _mm_set1_ps(-1.);
4441 let r = _mm_testz_ps(a, a);
4442 assert_eq!(r, 0);
4443 }
4444
4445 #[simd_test(enable = "avx")]
4446 unsafe fn test_mm_testc_ps() {
4447 let a = _mm_set1_ps(1.);
4448 let r = _mm_testc_ps(a, a);
4449 assert_eq!(r, 1);
4450 let b = _mm_set1_ps(-1.);
4451 let r = _mm_testc_ps(a, b);
4452 assert_eq!(r, 0);
4453 }
4454
4455 #[simd_test(enable = "avx")]
4456 unsafe fn test_mm_testnzc_ps() {
4457 let a = _mm_set1_ps(1.);
4458 let r = _mm_testnzc_ps(a, a);
4459 assert_eq!(r, 0);
4460 let a = _mm_setr_ps(1., -1., -1., -1.);
4461 let b = _mm_setr_ps(-1., -1., 1., 1.);
4462 let r = _mm_testnzc_ps(a, b);
4463 assert_eq!(r, 1);
4464 }
4465
4466 #[simd_test(enable = "avx")]
4467 unsafe fn test_mm256_movemask_pd() {
4468 let a = _mm256_setr_pd(1., -2., 3., -4.);
4469 let r = _mm256_movemask_pd(a);
4470 assert_eq!(r, 0xA);
4471 }
4472
4473 #[simd_test(enable = "avx")]
4474 unsafe fn test_mm256_movemask_ps() {
4475 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4476 let r = _mm256_movemask_ps(a);
4477 assert_eq!(r, 0xAA);
4478 }
4479
4480 #[simd_test(enable = "avx")]
4481 unsafe fn test_mm256_setzero_pd() {
4482 let r = _mm256_setzero_pd();
4483 assert_eq_m256d(r, _mm256_set1_pd(0.));
4484 }
4485
4486 #[simd_test(enable = "avx")]
4487 unsafe fn test_mm256_setzero_ps() {
4488 let r = _mm256_setzero_ps();
4489 assert_eq_m256(r, _mm256_set1_ps(0.));
4490 }
4491
4492 #[simd_test(enable = "avx")]
4493 unsafe fn test_mm256_setzero_si256() {
4494 let r = _mm256_setzero_si256();
4495 assert_eq_m256i(r, _mm256_set1_epi8(0));
4496 }
4497
4498 #[simd_test(enable = "avx")]
4499 unsafe fn test_mm256_set_pd() {
4500 let r = _mm256_set_pd(1., 2., 3., 4.);
4501 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4502 }
4503
4504 #[simd_test(enable = "avx")]
4505 unsafe fn test_mm256_set_ps() {
4506 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4507 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4508 }
4509
4510 #[simd_test(enable = "avx")]
4511 unsafe fn test_mm256_set_epi8() {
4512 #[rustfmt::skip]
4513 let r = _mm256_set_epi8(
4514 1, 2, 3, 4, 5, 6, 7, 8,
4515 9, 10, 11, 12, 13, 14, 15, 16,
4516 17, 18, 19, 20, 21, 22, 23, 24,
4517 25, 26, 27, 28, 29, 30, 31, 32,
4518 );
4519 #[rustfmt::skip]
4520 let e = _mm256_setr_epi8(
4521 32, 31, 30, 29, 28, 27, 26, 25,
4522 24, 23, 22, 21, 20, 19, 18, 17,
4523 16, 15, 14, 13, 12, 11, 10, 9,
4524 8, 7, 6, 5, 4, 3, 2, 1
4525 );
4526 assert_eq_m256i(r, e);
4527 }
4528
4529 #[simd_test(enable = "avx")]
4530 unsafe fn test_mm256_set_epi16() {
4531 #[rustfmt::skip]
4532 let r = _mm256_set_epi16(
4533 1, 2, 3, 4, 5, 6, 7, 8,
4534 9, 10, 11, 12, 13, 14, 15, 16,
4535 );
4536 #[rustfmt::skip]
4537 let e = _mm256_setr_epi16(
4538 16, 15, 14, 13, 12, 11, 10, 9, 8,
4539 7, 6, 5, 4, 3, 2, 1,
4540 );
4541 assert_eq_m256i(r, e);
4542 }
4543
4544 #[simd_test(enable = "avx")]
4545 unsafe fn test_mm256_set_epi32() {
4546 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4547 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4548 }
4549
4550 #[simd_test(enable = "avx")]
4551 unsafe fn test_mm256_set_epi64x() {
4552 let r = _mm256_set_epi64x(1, 2, 3, 4);
4553 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4554 }
4555
4556 #[simd_test(enable = "avx")]
4557 unsafe fn test_mm256_setr_pd() {
4558 let r = _mm256_setr_pd(1., 2., 3., 4.);
4559 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4560 }
4561
4562 #[simd_test(enable = "avx")]
4563 unsafe fn test_mm256_setr_ps() {
4564 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4565 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4566 }
4567
4568 #[simd_test(enable = "avx")]
4569 unsafe fn test_mm256_setr_epi8() {
4570 #[rustfmt::skip]
4571 let r = _mm256_setr_epi8(
4572 1, 2, 3, 4, 5, 6, 7, 8,
4573 9, 10, 11, 12, 13, 14, 15, 16,
4574 17, 18, 19, 20, 21, 22, 23, 24,
4575 25, 26, 27, 28, 29, 30, 31, 32,
4576 );
4577 #[rustfmt::skip]
4578 let e = _mm256_setr_epi8(
4579 1, 2, 3, 4, 5, 6, 7, 8,
4580 9, 10, 11, 12, 13, 14, 15, 16,
4581 17, 18, 19, 20, 21, 22, 23, 24,
4582 25, 26, 27, 28, 29, 30, 31, 32
4583 );
4584
4585 assert_eq_m256i(r, e);
4586 }
4587
4588 #[simd_test(enable = "avx")]
4589 unsafe fn test_mm256_setr_epi16() {
4590 #[rustfmt::skip]
4591 let r = _mm256_setr_epi16(
4592 1, 2, 3, 4, 5, 6, 7, 8,
4593 9, 10, 11, 12, 13, 14, 15, 16,
4594 );
4595 #[rustfmt::skip]
4596 let e = _mm256_setr_epi16(
4597 1, 2, 3, 4, 5, 6, 7, 8,
4598 9, 10, 11, 12, 13, 14, 15, 16,
4599 );
4600 assert_eq_m256i(r, e);
4601 }
4602
4603 #[simd_test(enable = "avx")]
4604 unsafe fn test_mm256_setr_epi32() {
4605 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4606 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4607 }
4608
4609 #[simd_test(enable = "avx")]
4610 unsafe fn test_mm256_setr_epi64x() {
4611 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4612 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4613 }
4614
4615 #[simd_test(enable = "avx")]
4616 unsafe fn test_mm256_set1_pd() {
4617 let r = _mm256_set1_pd(1.);
4618 assert_eq_m256d(r, _mm256_set1_pd(1.));
4619 }
4620
4621 #[simd_test(enable = "avx")]
4622 unsafe fn test_mm256_set1_ps() {
4623 let r = _mm256_set1_ps(1.);
4624 assert_eq_m256(r, _mm256_set1_ps(1.));
4625 }
4626
4627 #[simd_test(enable = "avx")]
4628 unsafe fn test_mm256_set1_epi8() {
4629 let r = _mm256_set1_epi8(1);
4630 assert_eq_m256i(r, _mm256_set1_epi8(1));
4631 }
4632
4633 #[simd_test(enable = "avx")]
4634 unsafe fn test_mm256_set1_epi16() {
4635 let r = _mm256_set1_epi16(1);
4636 assert_eq_m256i(r, _mm256_set1_epi16(1));
4637 }
4638
4639 #[simd_test(enable = "avx")]
4640 unsafe fn test_mm256_set1_epi32() {
4641 let r = _mm256_set1_epi32(1);
4642 assert_eq_m256i(r, _mm256_set1_epi32(1));
4643 }
4644
4645 #[simd_test(enable = "avx")]
4646 unsafe fn test_mm256_set1_epi64x() {
4647 let r = _mm256_set1_epi64x(1);
4648 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4649 }
4650
4651 #[simd_test(enable = "avx")]
4652 unsafe fn test_mm256_castpd_ps() {
4653 let a = _mm256_setr_pd(1., 2., 3., 4.);
4654 let r = _mm256_castpd_ps(a);
4655 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4656 assert_eq_m256(r, e);
4657 }
4658
4659 #[simd_test(enable = "avx")]
4660 unsafe fn test_mm256_castps_pd() {
4661 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4662 let r = _mm256_castps_pd(a);
4663 let e = _mm256_setr_pd(1., 2., 3., 4.);
4664 assert_eq_m256d(r, e);
4665 }
4666
4667 #[simd_test(enable = "avx")]
4668 unsafe fn test_mm256_castps_si256() {
4669 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4670 let r = _mm256_castps_si256(a);
4671 #[rustfmt::skip]
4672 let e = _mm256_setr_epi8(
4673 0, 0, -128, 63, 0, 0, 0, 64,
4674 0, 0, 64, 64, 0, 0, -128, 64,
4675 0, 0, -96, 64, 0, 0, -64, 64,
4676 0, 0, -32, 64, 0, 0, 0, 65,
4677 );
4678 assert_eq_m256i(r, e);
4679 }
4680
4681 #[simd_test(enable = "avx")]
4682 unsafe fn test_mm256_castsi256_ps() {
4683 #[rustfmt::skip]
4684 let a = _mm256_setr_epi8(
4685 0, 0, -128, 63, 0, 0, 0, 64,
4686 0, 0, 64, 64, 0, 0, -128, 64,
4687 0, 0, -96, 64, 0, 0, -64, 64,
4688 0, 0, -32, 64, 0, 0, 0, 65,
4689 );
4690 let r = _mm256_castsi256_ps(a);
4691 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4692 assert_eq_m256(r, e);
4693 }
4694
4695 #[simd_test(enable = "avx")]
4696 unsafe fn test_mm256_castpd_si256() {
4697 let a = _mm256_setr_pd(1., 2., 3., 4.);
4698 let r = _mm256_castpd_si256(a);
4699 assert_eq_m256d(transmute(r), a);
4700 }
4701
4702 #[simd_test(enable = "avx")]
4703 unsafe fn test_mm256_castsi256_pd() {
4704 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4705 let r = _mm256_castsi256_pd(a);
4706 assert_eq_m256d(r, transmute(a));
4707 }
4708
4709 #[simd_test(enable = "avx")]
4710 unsafe fn test_mm256_castps256_ps128() {
4711 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4712 let r = _mm256_castps256_ps128(a);
4713 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4714 }
4715
4716 #[simd_test(enable = "avx")]
4717 unsafe fn test_mm256_castpd256_pd128() {
4718 let a = _mm256_setr_pd(1., 2., 3., 4.);
4719 let r = _mm256_castpd256_pd128(a);
4720 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4721 }
4722
4723 #[simd_test(enable = "avx")]
4724 unsafe fn test_mm256_castsi256_si128() {
4725 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4726 let r = _mm256_castsi256_si128(a);
4727 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4728 }
4729
4730 #[simd_test(enable = "avx")]
4731 unsafe fn test_mm256_castps128_ps256() {
4732 let a = _mm_setr_ps(1., 2., 3., 4.);
4733 let r = _mm256_castps128_ps256(a);
4734 assert_eq_m128(_mm256_castps256_ps128(r), a);
4735 }
4736
4737 #[simd_test(enable = "avx")]
4738 unsafe fn test_mm256_castpd128_pd256() {
4739 let a = _mm_setr_pd(1., 2.);
4740 let r = _mm256_castpd128_pd256(a);
4741 assert_eq_m128d(_mm256_castpd256_pd128(r), a);
4742 }
4743
4744 #[simd_test(enable = "avx")]
4745 unsafe fn test_mm256_castsi128_si256() {
4746 let a = _mm_setr_epi32(1, 2, 3, 4);
4747 let r = _mm256_castsi128_si256(a);
4748 assert_eq_m128i(_mm256_castsi256_si128(r), a);
4749 }
4750
4751 #[simd_test(enable = "avx")]
4752 unsafe fn test_mm256_zextps128_ps256() {
4753 let a = _mm_setr_ps(1., 2., 3., 4.);
4754 let r = _mm256_zextps128_ps256(a);
4755 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4756 assert_eq_m256(r, e);
4757 }
4758
4759 #[simd_test(enable = "avx")]
4760 unsafe fn test_mm256_zextsi128_si256() {
4761 let a = _mm_setr_epi64x(1, 2);
4762 let r = _mm256_zextsi128_si256(a);
4763 let e = _mm256_setr_epi64x(1, 2, 0, 0);
4764 assert_eq_m256i(r, e);
4765 }
4766
4767 #[simd_test(enable = "avx")]
4768 unsafe fn test_mm256_zextpd128_pd256() {
4769 let a = _mm_setr_pd(1., 2.);
4770 let r = _mm256_zextpd128_pd256(a);
4771 let e = _mm256_setr_pd(1., 2., 0., 0.);
4772 assert_eq_m256d(r, e);
4773 }
4774
4775 #[simd_test(enable = "avx")]
4776 unsafe fn test_mm256_set_m128() {
4777 let hi = _mm_setr_ps(5., 6., 7., 8.);
4778 let lo = _mm_setr_ps(1., 2., 3., 4.);
4779 let r = _mm256_set_m128(hi, lo);
4780 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4781 assert_eq_m256(r, e);
4782 }
4783
4784 #[simd_test(enable = "avx")]
4785 unsafe fn test_mm256_set_m128d() {
4786 let hi = _mm_setr_pd(3., 4.);
4787 let lo = _mm_setr_pd(1., 2.);
4788 let r = _mm256_set_m128d(hi, lo);
4789 let e = _mm256_setr_pd(1., 2., 3., 4.);
4790 assert_eq_m256d(r, e);
4791 }
4792
4793 #[simd_test(enable = "avx")]
4794 unsafe fn test_mm256_set_m128i() {
4795 #[rustfmt::skip]
4796 let hi = _mm_setr_epi8(
4797 17, 18, 19, 20,
4798 21, 22, 23, 24,
4799 25, 26, 27, 28,
4800 29, 30, 31, 32,
4801 );
4802 #[rustfmt::skip]
4803 let lo = _mm_setr_epi8(
4804 1, 2, 3, 4,
4805 5, 6, 7, 8,
4806 9, 10, 11, 12,
4807 13, 14, 15, 16,
4808 );
4809 let r = _mm256_set_m128i(hi, lo);
4810 #[rustfmt::skip]
4811 let e = _mm256_setr_epi8(
4812 1, 2, 3, 4, 5, 6, 7, 8,
4813 9, 10, 11, 12, 13, 14, 15, 16,
4814 17, 18, 19, 20, 21, 22, 23, 24,
4815 25, 26, 27, 28, 29, 30, 31, 32,
4816 );
4817 assert_eq_m256i(r, e);
4818 }
4819
4820 #[simd_test(enable = "avx")]
4821 unsafe fn test_mm256_setr_m128() {
4822 let lo = _mm_setr_ps(1., 2., 3., 4.);
4823 let hi = _mm_setr_ps(5., 6., 7., 8.);
4824 let r = _mm256_setr_m128(lo, hi);
4825 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4826 assert_eq_m256(r, e);
4827 }
4828
4829 #[simd_test(enable = "avx")]
4830 unsafe fn test_mm256_setr_m128d() {
4831 let lo = _mm_setr_pd(1., 2.);
4832 let hi = _mm_setr_pd(3., 4.);
4833 let r = _mm256_setr_m128d(lo, hi);
4834 let e = _mm256_setr_pd(1., 2., 3., 4.);
4835 assert_eq_m256d(r, e);
4836 }
4837
4838 #[simd_test(enable = "avx")]
4839 unsafe fn test_mm256_setr_m128i() {
4840 #[rustfmt::skip]
4841 let lo = _mm_setr_epi8(
4842 1, 2, 3, 4,
4843 5, 6, 7, 8,
4844 9, 10, 11, 12,
4845 13, 14, 15, 16,
4846 );
4847 #[rustfmt::skip]
4848 let hi = _mm_setr_epi8(
4849 17, 18, 19, 20, 21, 22, 23, 24,
4850 25, 26, 27, 28, 29, 30, 31, 32,
4851 );
4852 let r = _mm256_setr_m128i(lo, hi);
4853 #[rustfmt::skip]
4854 let e = _mm256_setr_epi8(
4855 1, 2, 3, 4, 5, 6, 7, 8,
4856 9, 10, 11, 12, 13, 14, 15, 16,
4857 17, 18, 19, 20, 21, 22, 23, 24,
4858 25, 26, 27, 28, 29, 30, 31, 32,
4859 );
4860 assert_eq_m256i(r, e);
4861 }
4862
4863 #[simd_test(enable = "avx")]
4864 unsafe fn test_mm256_loadu2_m128() {
4865 let hi = &[5., 6., 7., 8.];
4866 let hiaddr = hi.as_ptr();
4867 let lo = &[1., 2., 3., 4.];
4868 let loaddr = lo.as_ptr();
4869 let r = _mm256_loadu2_m128(hiaddr, loaddr);
4870 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4871 assert_eq_m256(r, e);
4872 }
4873
4874 #[simd_test(enable = "avx")]
4875 unsafe fn test_mm256_loadu2_m128d() {
4876 let hi = &[3., 4.];
4877 let hiaddr = hi.as_ptr();
4878 let lo = &[1., 2.];
4879 let loaddr = lo.as_ptr();
4880 let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4881 let e = _mm256_setr_pd(1., 2., 3., 4.);
4882 assert_eq_m256d(r, e);
4883 }
4884
4885 #[simd_test(enable = "avx")]
4886 unsafe fn test_mm256_loadu2_m128i() {
4887 #[rustfmt::skip]
4888 let hi = _mm_setr_epi8(
4889 17, 18, 19, 20, 21, 22, 23, 24,
4890 25, 26, 27, 28, 29, 30, 31, 32,
4891 );
4892 #[rustfmt::skip]
4893 let lo = _mm_setr_epi8(
4894 1, 2, 3, 4, 5, 6, 7, 8,
4895 9, 10, 11, 12, 13, 14, 15, 16,
4896 );
4897 let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4898 #[rustfmt::skip]
4899 let e = _mm256_setr_epi8(
4900 1, 2, 3, 4, 5, 6, 7, 8,
4901 9, 10, 11, 12, 13, 14, 15, 16,
4902 17, 18, 19, 20, 21, 22, 23, 24,
4903 25, 26, 27, 28, 29, 30, 31, 32,
4904 );
4905 assert_eq_m256i(r, e);
4906 }
4907
4908 #[simd_test(enable = "avx")]
4909 unsafe fn test_mm256_storeu2_m128() {
4910 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4911 let mut hi = _mm_undefined_ps();
4912 let mut lo = _mm_undefined_ps();
4913 _mm256_storeu2_m128(
4914 ptr::addr_of_mut!(hi) as *mut f32,
4915 ptr::addr_of_mut!(lo) as *mut f32,
4916 a,
4917 );
4918 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
4919 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
4920 }
4921
4922 #[simd_test(enable = "avx")]
4923 unsafe fn test_mm256_storeu2_m128d() {
4924 let a = _mm256_setr_pd(1., 2., 3., 4.);
4925 let mut hi = _mm_undefined_pd();
4926 let mut lo = _mm_undefined_pd();
4927 _mm256_storeu2_m128d(
4928 ptr::addr_of_mut!(hi) as *mut f64,
4929 ptr::addr_of_mut!(lo) as *mut f64,
4930 a,
4931 );
4932 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4933 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
4934 }
4935
4936 #[simd_test(enable = "avx")]
4937 unsafe fn test_mm256_storeu2_m128i() {
4938 #[rustfmt::skip]
4939 let a = _mm256_setr_epi8(
4940 1, 2, 3, 4, 5, 6, 7, 8,
4941 9, 10, 11, 12, 13, 14, 15, 16,
4942 17, 18, 19, 20, 21, 22, 23, 24,
4943 25, 26, 27, 28, 29, 30, 31, 32,
4944 );
4945 let mut hi = _mm_undefined_si128();
4946 let mut lo = _mm_undefined_si128();
4947 _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
4948 #[rustfmt::skip]
4949 let e_hi = _mm_setr_epi8(
4950 17, 18, 19, 20, 21, 22, 23, 24,
4951 25, 26, 27, 28, 29, 30, 31, 32
4952 );
4953 #[rustfmt::skip]
4954 let e_lo = _mm_setr_epi8(
4955 1, 2, 3, 4, 5, 6, 7, 8,
4956 9, 10, 11, 12, 13, 14, 15, 16
4957 );
4958
4959 assert_eq_m128i(hi, e_hi);
4960 assert_eq_m128i(lo, e_lo);
4961 }
4962
4963 #[simd_test(enable = "avx")]
4964 unsafe fn test_mm256_cvtss_f32() {
4965 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4966 let r = _mm256_cvtss_f32(a);
4967 assert_eq!(r, 1.);
4968 }
4969}