1use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
5
6#[cfg(test)]
7use stdarch_test::assert_instr;
8
9#[stable(feature = "simd_x86", since = "1.27.0")]
12pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
13#[stable(feature = "simd_x86", since = "1.27.0")]
15pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
16#[stable(feature = "simd_x86", since = "1.27.0")]
18pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
19#[stable(feature = "simd_x86", since = "1.27.0")]
21pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
22#[stable(feature = "simd_x86", since = "1.27.0")]
24pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
25#[stable(feature = "simd_x86", since = "1.27.0")]
27pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
28#[stable(feature = "simd_x86", since = "1.27.0")]
30pub const _MM_FROUND_NO_EXC: i32 = 0x08;
31#[stable(feature = "simd_x86", since = "1.27.0")]
33pub const _MM_FROUND_NINT: i32 = 0x00;
34#[stable(feature = "simd_x86", since = "1.27.0")]
36pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
37#[stable(feature = "simd_x86", since = "1.27.0")]
39pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
40#[stable(feature = "simd_x86", since = "1.27.0")]
42pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
43#[stable(feature = "simd_x86", since = "1.27.0")]
46pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
47#[stable(feature = "simd_x86", since = "1.27.0")]
49pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
50
51#[inline]
59#[target_feature(enable = "sse4.1")]
60#[cfg_attr(test, assert_instr(pblendvb))]
61#[stable(feature = "simd_x86", since = "1.27.0")]
62pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
63 let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO);
64 transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
65}
66
67#[inline]
75#[target_feature(enable = "sse4.1")]
76#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
77#[rustc_legacy_const_generics(2)]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
80 static_assert_uimm_bits!(IMM8, 8);
81 transmute::<i16x8, _>(simd_shuffle!(
82 a.as_i16x8(),
83 b.as_i16x8(),
84 [
85 [0, 8][IMM8 as usize & 1],
86 [1, 9][(IMM8 >> 1) as usize & 1],
87 [2, 10][(IMM8 >> 2) as usize & 1],
88 [3, 11][(IMM8 >> 3) as usize & 1],
89 [4, 12][(IMM8 >> 4) as usize & 1],
90 [5, 13][(IMM8 >> 5) as usize & 1],
91 [6, 14][(IMM8 >> 6) as usize & 1],
92 [7, 15][(IMM8 >> 7) as usize & 1],
93 ]
94 ))
95}
96
97#[inline]
102#[target_feature(enable = "sse4.1")]
103#[cfg_attr(test, assert_instr(blendvpd))]
104#[stable(feature = "simd_x86", since = "1.27.0")]
105pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
106 let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO);
107 transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
108}
109
110#[inline]
115#[target_feature(enable = "sse4.1")]
116#[cfg_attr(test, assert_instr(blendvps))]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
119 let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO);
120 transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
121}
122
123#[inline]
128#[target_feature(enable = "sse4.1")]
129#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
133#[rustc_legacy_const_generics(2)]
134#[stable(feature = "simd_x86", since = "1.27.0")]
135pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
136 static_assert_uimm_bits!(IMM2, 2);
137 transmute::<f64x2, _>(simd_shuffle!(
138 a.as_f64x2(),
139 b.as_f64x2(),
140 [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
141 ))
142}
143
144#[inline]
149#[target_feature(enable = "sse4.1")]
150#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
151#[rustc_legacy_const_generics(2)]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
154 static_assert_uimm_bits!(IMM4, 4);
155 transmute::<f32x4, _>(simd_shuffle!(
156 a.as_f32x4(),
157 b.as_f32x4(),
158 [
159 [0, 4][IMM4 as usize & 1],
160 [1, 5][(IMM4 >> 1) as usize & 1],
161 [2, 6][(IMM4 >> 2) as usize & 1],
162 [3, 7][(IMM4 >> 3) as usize & 1],
163 ]
164 ))
165}
166
167#[inline]
193#[target_feature(enable = "sse4.1")]
194#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 0))]
195#[rustc_legacy_const_generics(1)]
196#[stable(feature = "simd_x86", since = "1.27.0")]
197pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
198 static_assert_uimm_bits!(IMM8, 2);
199 simd_extract!(a, IMM8 as u32, f32).to_bits() as i32
200}
201
202#[inline]
209#[target_feature(enable = "sse4.1")]
210#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
211#[rustc_legacy_const_generics(1)]
212#[stable(feature = "simd_x86", since = "1.27.0")]
213pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
214 static_assert_uimm_bits!(IMM8, 4);
215 simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32
216}
217
218#[inline]
222#[target_feature(enable = "sse4.1")]
223#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(extractps, IMM8 = 1))]
224#[rustc_legacy_const_generics(1)]
225#[stable(feature = "simd_x86", since = "1.27.0")]
226pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
227 static_assert_uimm_bits!(IMM8, 2);
228 simd_extract!(a.as_i32x4(), IMM8 as u32, i32)
229}
230
231#[inline]
256#[target_feature(enable = "sse4.1")]
257#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
258#[rustc_legacy_const_generics(2)]
259#[stable(feature = "simd_x86", since = "1.27.0")]
260pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
261 static_assert_uimm_bits!(IMM8, 8);
262 insertps(a, b, IMM8 as u8)
263}
264
265#[inline]
270#[target_feature(enable = "sse4.1")]
271#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
272#[rustc_legacy_const_generics(2)]
273#[stable(feature = "simd_x86", since = "1.27.0")]
274pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
275 static_assert_uimm_bits!(IMM8, 4);
276 transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8))
277}
278
279#[inline]
284#[target_feature(enable = "sse4.1")]
285#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
286#[rustc_legacy_const_generics(2)]
287#[stable(feature = "simd_x86", since = "1.27.0")]
288pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
289 static_assert_uimm_bits!(IMM8, 2);
290 transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i))
291}
292
293#[inline]
298#[target_feature(enable = "sse4.1")]
299#[cfg_attr(test, assert_instr(pmaxsb))]
300#[stable(feature = "simd_x86", since = "1.27.0")]
301pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
302 let a = a.as_i8x16();
303 let b = b.as_i8x16();
304 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
305}
306
307#[inline]
312#[target_feature(enable = "sse4.1")]
313#[cfg_attr(test, assert_instr(pmaxuw))]
314#[stable(feature = "simd_x86", since = "1.27.0")]
315pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
316 let a = a.as_u16x8();
317 let b = b.as_u16x8();
318 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
319}
320
321#[inline]
326#[target_feature(enable = "sse4.1")]
327#[cfg_attr(test, assert_instr(pmaxsd))]
328#[stable(feature = "simd_x86", since = "1.27.0")]
329pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
330 let a = a.as_i32x4();
331 let b = b.as_i32x4();
332 transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
333}
334
335#[inline]
340#[target_feature(enable = "sse4.1")]
341#[cfg_attr(test, assert_instr(pmaxud))]
342#[stable(feature = "simd_x86", since = "1.27.0")]
343pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
344 let a = a.as_u32x4();
345 let b = b.as_u32x4();
346 transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
347}
348
349#[inline]
354#[target_feature(enable = "sse4.1")]
355#[cfg_attr(test, assert_instr(pminsb))]
356#[stable(feature = "simd_x86", since = "1.27.0")]
357pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
358 let a = a.as_i8x16();
359 let b = b.as_i8x16();
360 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
361}
362
363#[inline]
368#[target_feature(enable = "sse4.1")]
369#[cfg_attr(test, assert_instr(pminuw))]
370#[stable(feature = "simd_x86", since = "1.27.0")]
371pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
372 let a = a.as_u16x8();
373 let b = b.as_u16x8();
374 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
375}
376
377#[inline]
382#[target_feature(enable = "sse4.1")]
383#[cfg_attr(test, assert_instr(pminsd))]
384#[stable(feature = "simd_x86", since = "1.27.0")]
385pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
386 let a = a.as_i32x4();
387 let b = b.as_i32x4();
388 transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
389}
390
391#[inline]
396#[target_feature(enable = "sse4.1")]
397#[cfg_attr(test, assert_instr(pminud))]
398#[stable(feature = "simd_x86", since = "1.27.0")]
399pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
400 let a = a.as_u32x4();
401 let b = b.as_u32x4();
402 transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
403}
404
405#[inline]
410#[target_feature(enable = "sse4.1")]
411#[cfg_attr(test, assert_instr(packusdw))]
412#[stable(feature = "simd_x86", since = "1.27.0")]
413pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
414 transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
415}
416
417#[inline]
421#[target_feature(enable = "sse4.1")]
422#[cfg_attr(test, assert_instr(pcmpeqq))]
423#[stable(feature = "simd_x86", since = "1.27.0")]
424pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
425 transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
426}
427
428#[inline]
432#[target_feature(enable = "sse4.1")]
433#[cfg_attr(test, assert_instr(pmovsxbw))]
434#[stable(feature = "simd_x86", since = "1.27.0")]
435pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
436 let a = a.as_i8x16();
437 let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
438 transmute(simd_cast::<_, i16x8>(a))
439}
440
441#[inline]
445#[target_feature(enable = "sse4.1")]
446#[cfg_attr(test, assert_instr(pmovsxbd))]
447#[stable(feature = "simd_x86", since = "1.27.0")]
448pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
449 let a = a.as_i8x16();
450 let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
451 transmute(simd_cast::<_, i32x4>(a))
452}
453
454#[inline]
459#[target_feature(enable = "sse4.1")]
460#[cfg_attr(test, assert_instr(pmovsxbq))]
461#[stable(feature = "simd_x86", since = "1.27.0")]
462pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
463 let a = a.as_i8x16();
464 let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
465 transmute(simd_cast::<_, i64x2>(a))
466}
467
468#[inline]
472#[target_feature(enable = "sse4.1")]
473#[cfg_attr(test, assert_instr(pmovsxwd))]
474#[stable(feature = "simd_x86", since = "1.27.0")]
475pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
476 let a = a.as_i16x8();
477 let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
478 transmute(simd_cast::<_, i32x4>(a))
479}
480
481#[inline]
485#[target_feature(enable = "sse4.1")]
486#[cfg_attr(test, assert_instr(pmovsxwq))]
487#[stable(feature = "simd_x86", since = "1.27.0")]
488pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
489 let a = a.as_i16x8();
490 let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
491 transmute(simd_cast::<_, i64x2>(a))
492}
493
494#[inline]
498#[target_feature(enable = "sse4.1")]
499#[cfg_attr(test, assert_instr(pmovsxdq))]
500#[stable(feature = "simd_x86", since = "1.27.0")]
501pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
502 let a = a.as_i32x4();
503 let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
504 transmute(simd_cast::<_, i64x2>(a))
505}
506
507#[inline]
511#[target_feature(enable = "sse4.1")]
512#[cfg_attr(test, assert_instr(pmovzxbw))]
513#[stable(feature = "simd_x86", since = "1.27.0")]
514pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
515 let a = a.as_u8x16();
516 let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
517 transmute(simd_cast::<_, i16x8>(a))
518}
519
520#[inline]
524#[target_feature(enable = "sse4.1")]
525#[cfg_attr(test, assert_instr(pmovzxbd))]
526#[stable(feature = "simd_x86", since = "1.27.0")]
527pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
528 let a = a.as_u8x16();
529 let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
530 transmute(simd_cast::<_, i32x4>(a))
531}
532
533#[inline]
537#[target_feature(enable = "sse4.1")]
538#[cfg_attr(test, assert_instr(pmovzxbq))]
539#[stable(feature = "simd_x86", since = "1.27.0")]
540pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
541 let a = a.as_u8x16();
542 let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
543 transmute(simd_cast::<_, i64x2>(a))
544}
545
546#[inline]
551#[target_feature(enable = "sse4.1")]
552#[cfg_attr(test, assert_instr(pmovzxwd))]
553#[stable(feature = "simd_x86", since = "1.27.0")]
554pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
555 let a = a.as_u16x8();
556 let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
557 transmute(simd_cast::<_, i32x4>(a))
558}
559
560#[inline]
565#[target_feature(enable = "sse4.1")]
566#[cfg_attr(test, assert_instr(pmovzxwq))]
567#[stable(feature = "simd_x86", since = "1.27.0")]
568pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
569 let a = a.as_u16x8();
570 let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
571 transmute(simd_cast::<_, i64x2>(a))
572}
573
574#[inline]
579#[target_feature(enable = "sse4.1")]
580#[cfg_attr(test, assert_instr(pmovzxdq))]
581#[stable(feature = "simd_x86", since = "1.27.0")]
582pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
583 let a = a.as_u32x4();
584 let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
585 transmute(simd_cast::<_, i64x2>(a))
586}
587
588#[inline]
598#[target_feature(enable = "sse4.1")]
599#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
600#[rustc_legacy_const_generics(2)]
601#[stable(feature = "simd_x86", since = "1.27.0")]
602pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
603 static_assert_uimm_bits!(IMM8, 8);
604 dppd(a, b, IMM8 as u8)
605}
606
607#[inline]
617#[target_feature(enable = "sse4.1")]
618#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
619#[rustc_legacy_const_generics(2)]
620#[stable(feature = "simd_x86", since = "1.27.0")]
621pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
622 static_assert_uimm_bits!(IMM8, 8);
623 dpps(a, b, IMM8 as u8)
624}
625
626#[inline]
632#[target_feature(enable = "sse4.1")]
633#[cfg_attr(test, assert_instr(roundpd))]
634#[stable(feature = "simd_x86", since = "1.27.0")]
635pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
636 simd_floor(a)
637}
638
639#[inline]
645#[target_feature(enable = "sse4.1")]
646#[cfg_attr(test, assert_instr(roundps))]
647#[stable(feature = "simd_x86", since = "1.27.0")]
648pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
649 simd_floor(a)
650}
651
652#[inline]
660#[target_feature(enable = "sse4.1")]
661#[cfg_attr(test, assert_instr(roundsd))]
662#[stable(feature = "simd_x86", since = "1.27.0")]
663pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
664 roundsd(a, b, _MM_FROUND_FLOOR)
665}
666
667#[inline]
675#[target_feature(enable = "sse4.1")]
676#[cfg_attr(test, assert_instr(roundss))]
677#[stable(feature = "simd_x86", since = "1.27.0")]
678pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
679 roundss(a, b, _MM_FROUND_FLOOR)
680}
681
682#[inline]
688#[target_feature(enable = "sse4.1")]
689#[cfg_attr(test, assert_instr(roundpd))]
690#[stable(feature = "simd_x86", since = "1.27.0")]
691pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
692 simd_ceil(a)
693}
694
695#[inline]
701#[target_feature(enable = "sse4.1")]
702#[cfg_attr(test, assert_instr(roundps))]
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
705 simd_ceil(a)
706}
707
708#[inline]
716#[target_feature(enable = "sse4.1")]
717#[cfg_attr(test, assert_instr(roundsd))]
718#[stable(feature = "simd_x86", since = "1.27.0")]
719pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
720 roundsd(a, b, _MM_FROUND_CEIL)
721}
722
723#[inline]
731#[target_feature(enable = "sse4.1")]
732#[cfg_attr(test, assert_instr(roundss))]
733#[stable(feature = "simd_x86", since = "1.27.0")]
734pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
735 roundss(a, b, _MM_FROUND_CEIL)
736}
737
738#[inline]
751#[target_feature(enable = "sse4.1")]
752#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
753#[rustc_legacy_const_generics(1)]
754#[stable(feature = "simd_x86", since = "1.27.0")]
755pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
756 static_assert_uimm_bits!(ROUNDING, 4);
757 roundpd(a, ROUNDING)
758}
759
760#[inline]
773#[target_feature(enable = "sse4.1")]
774#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
775#[rustc_legacy_const_generics(1)]
776#[stable(feature = "simd_x86", since = "1.27.0")]
777pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
778 static_assert_uimm_bits!(ROUNDING, 4);
779 roundps(a, ROUNDING)
780}
781
782#[inline]
797#[target_feature(enable = "sse4.1")]
798#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
799#[rustc_legacy_const_generics(2)]
800#[stable(feature = "simd_x86", since = "1.27.0")]
801pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
802 static_assert_uimm_bits!(ROUNDING, 4);
803 roundsd(a, b, ROUNDING)
804}
805
806#[inline]
821#[target_feature(enable = "sse4.1")]
822#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
823#[rustc_legacy_const_generics(2)]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
826 static_assert_uimm_bits!(ROUNDING, 4);
827 roundss(a, b, ROUNDING)
828}
829
830#[inline]
852#[target_feature(enable = "sse4.1")]
853#[cfg_attr(test, assert_instr(phminposuw))]
854#[stable(feature = "simd_x86", since = "1.27.0")]
855pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
856 transmute(phminposuw(a.as_u16x8()))
857}
858
859#[inline]
864#[target_feature(enable = "sse4.1")]
865#[cfg_attr(test, assert_instr(pmuldq))]
866#[stable(feature = "simd_x86", since = "1.27.0")]
867pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
868 let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
869 let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
870 transmute(simd_mul(a, b))
871}
872
873#[inline]
882#[target_feature(enable = "sse4.1")]
883#[cfg_attr(test, assert_instr(pmulld))]
884#[stable(feature = "simd_x86", since = "1.27.0")]
885pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
886 transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
887}
888
889#[inline]
923#[target_feature(enable = "sse4.1")]
924#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
925#[rustc_legacy_const_generics(2)]
926#[stable(feature = "simd_x86", since = "1.27.0")]
927pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
928 static_assert_uimm_bits!(IMM8, 3);
929 transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8))
930}
931
932#[inline]
948#[target_feature(enable = "sse4.1")]
949#[cfg_attr(test, assert_instr(ptest))]
950#[stable(feature = "simd_x86", since = "1.27.0")]
951pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
952 ptestz(a.as_i64x2(), mask.as_i64x2())
953}
954
955#[inline]
971#[target_feature(enable = "sse4.1")]
972#[cfg_attr(test, assert_instr(ptest))]
973#[stable(feature = "simd_x86", since = "1.27.0")]
974pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
975 ptestc(a.as_i64x2(), mask.as_i64x2())
976}
977
978#[inline]
994#[target_feature(enable = "sse4.1")]
995#[cfg_attr(test, assert_instr(ptest))]
996#[stable(feature = "simd_x86", since = "1.27.0")]
997pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
998 ptestnzc(a.as_i64x2(), mask.as_i64x2())
999}
1000
1001#[inline]
1017#[target_feature(enable = "sse4.1")]
1018#[cfg_attr(test, assert_instr(ptest))]
1019#[stable(feature = "simd_x86", since = "1.27.0")]
1020pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1021 _mm_testz_si128(a, mask)
1022}
1023
1024#[inline]
1038#[target_feature(enable = "sse4.1")]
1039#[cfg_attr(test, assert_instr(pcmpeqd))]
1040#[cfg_attr(test, assert_instr(ptest))]
1041#[stable(feature = "simd_x86", since = "1.27.0")]
1042pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
1043 _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
1044}
1045
1046#[inline]
1062#[target_feature(enable = "sse4.1")]
1063#[cfg_attr(test, assert_instr(ptest))]
1064#[stable(feature = "simd_x86", since = "1.27.0")]
1065pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1066 _mm_testnzc_si128(a, mask)
1067}
1068
1069#[inline]
1075#[target_feature(enable = "sse4.1")]
1076#[cfg_attr(test, assert_instr(movntdqa))]
1077#[stable(feature = "simd_x86_updates", since = "1.82.0")]
1078pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
1079 let dst: __m128i;
1080 crate::arch::asm!(
1081 vpl!("movntdqa {a}"),
1082 a = out(xmm_reg) dst,
1083 p = in(reg) mem_addr,
1084 options(pure, readonly, nostack, preserves_flags),
1085 );
1086 dst
1087}
1088
1089#[allow(improper_ctypes)]
1090extern "C" {
1091 #[link_name = "llvm.x86.sse41.insertps"]
1092 fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1093 #[link_name = "llvm.x86.sse41.packusdw"]
1094 fn packusdw(a: i32x4, b: i32x4) -> u16x8;
1095 #[link_name = "llvm.x86.sse41.dppd"]
1096 fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1097 #[link_name = "llvm.x86.sse41.dpps"]
1098 fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1099 #[link_name = "llvm.x86.sse41.round.pd"]
1100 fn roundpd(a: __m128d, rounding: i32) -> __m128d;
1101 #[link_name = "llvm.x86.sse41.round.ps"]
1102 fn roundps(a: __m128, rounding: i32) -> __m128;
1103 #[link_name = "llvm.x86.sse41.round.sd"]
1104 fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1105 #[link_name = "llvm.x86.sse41.round.ss"]
1106 fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1107 #[link_name = "llvm.x86.sse41.phminposuw"]
1108 fn phminposuw(a: u16x8) -> u16x8;
1109 #[link_name = "llvm.x86.sse41.mpsadbw"]
1110 fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1111 #[link_name = "llvm.x86.sse41.ptestz"]
1112 fn ptestz(a: i64x2, mask: i64x2) -> i32;
1113 #[link_name = "llvm.x86.sse41.ptestc"]
1114 fn ptestc(a: i64x2, mask: i64x2) -> i32;
1115 #[link_name = "llvm.x86.sse41.ptestnzc"]
1116 fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1117}
1118
1119#[cfg(test)]
1120mod tests {
1121 use crate::core_arch::x86::*;
1122 use std::mem;
1123 use stdarch_test::simd_test;
1124
1125 #[simd_test(enable = "sse4.1")]
1126 unsafe fn test_mm_blendv_epi8() {
1127 #[rustfmt::skip]
1128 let a = _mm_setr_epi8(
1129 0, 1, 2, 3, 4, 5, 6, 7,
1130 8, 9, 10, 11, 12, 13, 14, 15,
1131 );
1132 #[rustfmt::skip]
1133 let b = _mm_setr_epi8(
1134 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1135 );
1136 #[rustfmt::skip]
1137 let mask = _mm_setr_epi8(
1138 0, -1, 0, -1, 0, -1, 0, -1,
1139 0, -1, 0, -1, 0, -1, 0, -1,
1140 );
1141 #[rustfmt::skip]
1142 let e = _mm_setr_epi8(
1143 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1144 );
1145 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1146 }
1147
1148 #[simd_test(enable = "sse4.1")]
1149 unsafe fn test_mm_blendv_pd() {
1150 let a = _mm_set1_pd(0.0);
1151 let b = _mm_set1_pd(1.0);
1152 let mask = transmute(_mm_setr_epi64x(0, -1));
1153 let r = _mm_blendv_pd(a, b, mask);
1154 let e = _mm_setr_pd(0.0, 1.0);
1155 assert_eq_m128d(r, e);
1156 }
1157
1158 #[simd_test(enable = "sse4.1")]
1159 unsafe fn test_mm_blendv_ps() {
1160 let a = _mm_set1_ps(0.0);
1161 let b = _mm_set1_ps(1.0);
1162 let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
1163 let r = _mm_blendv_ps(a, b, mask);
1164 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1165 assert_eq_m128(r, e);
1166 }
1167
1168 #[simd_test(enable = "sse4.1")]
1169 unsafe fn test_mm_blend_pd() {
1170 let a = _mm_set1_pd(0.0);
1171 let b = _mm_set1_pd(1.0);
1172 let r = _mm_blend_pd::<0b10>(a, b);
1173 let e = _mm_setr_pd(0.0, 1.0);
1174 assert_eq_m128d(r, e);
1175 }
1176
1177 #[simd_test(enable = "sse4.1")]
1178 unsafe fn test_mm_blend_ps() {
1179 let a = _mm_set1_ps(0.0);
1180 let b = _mm_set1_ps(1.0);
1181 let r = _mm_blend_ps::<0b1010>(a, b);
1182 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1183 assert_eq_m128(r, e);
1184 }
1185
1186 #[simd_test(enable = "sse4.1")]
1187 unsafe fn test_mm_blend_epi16() {
1188 let a = _mm_set1_epi16(0);
1189 let b = _mm_set1_epi16(1);
1190 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
1191 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1192 assert_eq_m128i(r, e);
1193 }
1194
1195 #[simd_test(enable = "sse4.1")]
1196 unsafe fn test_mm_extract_ps() {
1197 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1198 let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
1199 assert_eq!(r, 1.0);
1200 let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
1201 assert_eq!(r, 3.0);
1202 }
1203
1204 #[simd_test(enable = "sse4.1")]
1205 unsafe fn test_mm_extract_epi8() {
1206 #[rustfmt::skip]
1207 let a = _mm_setr_epi8(
1208 -1, 1, 2, 3, 4, 5, 6, 7,
1209 8, 9, 10, 11, 12, 13, 14, 15
1210 );
1211 let r1 = _mm_extract_epi8::<0>(a);
1212 let r2 = _mm_extract_epi8::<3>(a);
1213 assert_eq!(r1, 0xFF);
1214 assert_eq!(r2, 3);
1215 }
1216
1217 #[simd_test(enable = "sse4.1")]
1218 unsafe fn test_mm_extract_epi32() {
1219 let a = _mm_setr_epi32(0, 1, 2, 3);
1220 let r = _mm_extract_epi32::<1>(a);
1221 assert_eq!(r, 1);
1222 let r = _mm_extract_epi32::<3>(a);
1223 assert_eq!(r, 3);
1224 }
1225
1226 #[simd_test(enable = "sse4.1")]
1227 unsafe fn test_mm_insert_ps() {
1228 let a = _mm_set1_ps(1.0);
1229 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1230 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
1231 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1232 assert_eq_m128(r, e);
1233
1234 let a = _mm_set1_ps(1.0);
1236 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1237 let r = _mm_insert_ps::<0b11_00_0001>(a, b);
1238 let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
1239 assert_eq_m128(r, e);
1240 }
1241
1242 #[simd_test(enable = "sse4.1")]
1243 unsafe fn test_mm_insert_epi8() {
1244 let a = _mm_set1_epi8(0);
1245 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1246 let r = _mm_insert_epi8::<1>(a, 32);
1247 assert_eq_m128i(r, e);
1248 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1249 let r = _mm_insert_epi8::<14>(a, 32);
1250 assert_eq_m128i(r, e);
1251 }
1252
1253 #[simd_test(enable = "sse4.1")]
1254 unsafe fn test_mm_insert_epi32() {
1255 let a = _mm_set1_epi32(0);
1256 let e = _mm_setr_epi32(0, 32, 0, 0);
1257 let r = _mm_insert_epi32::<1>(a, 32);
1258 assert_eq_m128i(r, e);
1259 let e = _mm_setr_epi32(0, 0, 0, 32);
1260 let r = _mm_insert_epi32::<3>(a, 32);
1261 assert_eq_m128i(r, e);
1262 }
1263
1264 #[simd_test(enable = "sse4.1")]
1265 unsafe fn test_mm_max_epi8() {
1266 #[rustfmt::skip]
1267 let a = _mm_setr_epi8(
1268 1, 4, 5, 8, 9, 12, 13, 16,
1269 17, 20, 21, 24, 25, 28, 29, 32,
1270 );
1271 #[rustfmt::skip]
1272 let b = _mm_setr_epi8(
1273 2, 3, 6, 7, 10, 11, 14, 15,
1274 18, 19, 22, 23, 26, 27, 30, 31,
1275 );
1276 let r = _mm_max_epi8(a, b);
1277 #[rustfmt::skip]
1278 let e = _mm_setr_epi8(
1279 2, 4, 6, 8, 10, 12, 14, 16,
1280 18, 20, 22, 24, 26, 28, 30, 32,
1281 );
1282 assert_eq_m128i(r, e);
1283 }
1284
1285 #[simd_test(enable = "sse4.1")]
1286 unsafe fn test_mm_max_epu16() {
1287 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1288 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1289 let r = _mm_max_epu16(a, b);
1290 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1291 assert_eq_m128i(r, e);
1292 }
1293
1294 #[simd_test(enable = "sse4.1")]
1295 unsafe fn test_mm_max_epi32() {
1296 let a = _mm_setr_epi32(1, 4, 5, 8);
1297 let b = _mm_setr_epi32(2, 3, 6, 7);
1298 let r = _mm_max_epi32(a, b);
1299 let e = _mm_setr_epi32(2, 4, 6, 8);
1300 assert_eq_m128i(r, e);
1301 }
1302
1303 #[simd_test(enable = "sse4.1")]
1304 unsafe fn test_mm_max_epu32() {
1305 let a = _mm_setr_epi32(1, 4, 5, 8);
1306 let b = _mm_setr_epi32(2, 3, 6, 7);
1307 let r = _mm_max_epu32(a, b);
1308 let e = _mm_setr_epi32(2, 4, 6, 8);
1309 assert_eq_m128i(r, e);
1310 }
1311
1312 #[simd_test(enable = "sse4.1")]
1313 unsafe fn test_mm_min_epi8_1() {
1314 #[rustfmt::skip]
1315 let a = _mm_setr_epi8(
1316 1, 4, 5, 8, 9, 12, 13, 16,
1317 17, 20, 21, 24, 25, 28, 29, 32,
1318 );
1319 #[rustfmt::skip]
1320 let b = _mm_setr_epi8(
1321 2, 3, 6, 7, 10, 11, 14, 15,
1322 18, 19, 22, 23, 26, 27, 30, 31,
1323 );
1324 let r = _mm_min_epi8(a, b);
1325 #[rustfmt::skip]
1326 let e = _mm_setr_epi8(
1327 1, 3, 5, 7, 9, 11, 13, 15,
1328 17, 19, 21, 23, 25, 27, 29, 31,
1329 );
1330 assert_eq_m128i(r, e);
1331 }
1332
1333 #[simd_test(enable = "sse4.1")]
1334 unsafe fn test_mm_min_epi8_2() {
1335 #[rustfmt::skip]
1336 let a = _mm_setr_epi8(
1337 1, -4, -5, 8, -9, -12, 13, -16,
1338 17, 20, 21, 24, 25, 28, 29, 32,
1339 );
1340 #[rustfmt::skip]
1341 let b = _mm_setr_epi8(
1342 2, -3, -6, 7, -10, -11, 14, -15,
1343 18, 19, 22, 23, 26, 27, 30, 31,
1344 );
1345 let r = _mm_min_epi8(a, b);
1346 #[rustfmt::skip]
1347 let e = _mm_setr_epi8(
1348 1, -4, -6, 7, -10, -12, 13, -16,
1349 17, 19, 21, 23, 25, 27, 29, 31,
1350 );
1351 assert_eq_m128i(r, e);
1352 }
1353
1354 #[simd_test(enable = "sse4.1")]
1355 unsafe fn test_mm_min_epu16() {
1356 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1357 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1358 let r = _mm_min_epu16(a, b);
1359 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1360 assert_eq_m128i(r, e);
1361 }
1362
1363 #[simd_test(enable = "sse4.1")]
1364 unsafe fn test_mm_min_epi32_1() {
1365 let a = _mm_setr_epi32(1, 4, 5, 8);
1366 let b = _mm_setr_epi32(2, 3, 6, 7);
1367 let r = _mm_min_epi32(a, b);
1368 let e = _mm_setr_epi32(1, 3, 5, 7);
1369 assert_eq_m128i(r, e);
1370 }
1371
1372 #[simd_test(enable = "sse4.1")]
1373 unsafe fn test_mm_min_epi32_2() {
1374 let a = _mm_setr_epi32(-1, 4, 5, -7);
1375 let b = _mm_setr_epi32(-2, 3, -6, 8);
1376 let r = _mm_min_epi32(a, b);
1377 let e = _mm_setr_epi32(-2, 3, -6, -7);
1378 assert_eq_m128i(r, e);
1379 }
1380
1381 #[simd_test(enable = "sse4.1")]
1382 unsafe fn test_mm_min_epu32() {
1383 let a = _mm_setr_epi32(1, 4, 5, 8);
1384 let b = _mm_setr_epi32(2, 3, 6, 7);
1385 let r = _mm_min_epu32(a, b);
1386 let e = _mm_setr_epi32(1, 3, 5, 7);
1387 assert_eq_m128i(r, e);
1388 }
1389
1390 #[simd_test(enable = "sse4.1")]
1391 unsafe fn test_mm_packus_epi32() {
1392 let a = _mm_setr_epi32(1, 2, 3, 4);
1393 let b = _mm_setr_epi32(-1, -2, -3, -4);
1394 let r = _mm_packus_epi32(a, b);
1395 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1396 assert_eq_m128i(r, e);
1397 }
1398
1399 #[simd_test(enable = "sse4.1")]
1400 unsafe fn test_mm_cmpeq_epi64() {
1401 let a = _mm_setr_epi64x(0, 1);
1402 let b = _mm_setr_epi64x(0, 0);
1403 let r = _mm_cmpeq_epi64(a, b);
1404 let e = _mm_setr_epi64x(-1, 0);
1405 assert_eq_m128i(r, e);
1406 }
1407
1408 #[simd_test(enable = "sse4.1")]
1409 unsafe fn test_mm_cvtepi8_epi16() {
1410 let a = _mm_set1_epi8(10);
1411 let r = _mm_cvtepi8_epi16(a);
1412 let e = _mm_set1_epi16(10);
1413 assert_eq_m128i(r, e);
1414 let a = _mm_set1_epi8(-10);
1415 let r = _mm_cvtepi8_epi16(a);
1416 let e = _mm_set1_epi16(-10);
1417 assert_eq_m128i(r, e);
1418 }
1419
1420 #[simd_test(enable = "sse4.1")]
1421 unsafe fn test_mm_cvtepi8_epi32() {
1422 let a = _mm_set1_epi8(10);
1423 let r = _mm_cvtepi8_epi32(a);
1424 let e = _mm_set1_epi32(10);
1425 assert_eq_m128i(r, e);
1426 let a = _mm_set1_epi8(-10);
1427 let r = _mm_cvtepi8_epi32(a);
1428 let e = _mm_set1_epi32(-10);
1429 assert_eq_m128i(r, e);
1430 }
1431
1432 #[simd_test(enable = "sse4.1")]
1433 unsafe fn test_mm_cvtepi8_epi64() {
1434 let a = _mm_set1_epi8(10);
1435 let r = _mm_cvtepi8_epi64(a);
1436 let e = _mm_set1_epi64x(10);
1437 assert_eq_m128i(r, e);
1438 let a = _mm_set1_epi8(-10);
1439 let r = _mm_cvtepi8_epi64(a);
1440 let e = _mm_set1_epi64x(-10);
1441 assert_eq_m128i(r, e);
1442 }
1443
1444 #[simd_test(enable = "sse4.1")]
1445 unsafe fn test_mm_cvtepi16_epi32() {
1446 let a = _mm_set1_epi16(10);
1447 let r = _mm_cvtepi16_epi32(a);
1448 let e = _mm_set1_epi32(10);
1449 assert_eq_m128i(r, e);
1450 let a = _mm_set1_epi16(-10);
1451 let r = _mm_cvtepi16_epi32(a);
1452 let e = _mm_set1_epi32(-10);
1453 assert_eq_m128i(r, e);
1454 }
1455
1456 #[simd_test(enable = "sse4.1")]
1457 unsafe fn test_mm_cvtepi16_epi64() {
1458 let a = _mm_set1_epi16(10);
1459 let r = _mm_cvtepi16_epi64(a);
1460 let e = _mm_set1_epi64x(10);
1461 assert_eq_m128i(r, e);
1462 let a = _mm_set1_epi16(-10);
1463 let r = _mm_cvtepi16_epi64(a);
1464 let e = _mm_set1_epi64x(-10);
1465 assert_eq_m128i(r, e);
1466 }
1467
1468 #[simd_test(enable = "sse4.1")]
1469 unsafe fn test_mm_cvtepi32_epi64() {
1470 let a = _mm_set1_epi32(10);
1471 let r = _mm_cvtepi32_epi64(a);
1472 let e = _mm_set1_epi64x(10);
1473 assert_eq_m128i(r, e);
1474 let a = _mm_set1_epi32(-10);
1475 let r = _mm_cvtepi32_epi64(a);
1476 let e = _mm_set1_epi64x(-10);
1477 assert_eq_m128i(r, e);
1478 }
1479
1480 #[simd_test(enable = "sse4.1")]
1481 unsafe fn test_mm_cvtepu8_epi16() {
1482 let a = _mm_set1_epi8(10);
1483 let r = _mm_cvtepu8_epi16(a);
1484 let e = _mm_set1_epi16(10);
1485 assert_eq_m128i(r, e);
1486 }
1487
1488 #[simd_test(enable = "sse4.1")]
1489 unsafe fn test_mm_cvtepu8_epi32() {
1490 let a = _mm_set1_epi8(10);
1491 let r = _mm_cvtepu8_epi32(a);
1492 let e = _mm_set1_epi32(10);
1493 assert_eq_m128i(r, e);
1494 }
1495
1496 #[simd_test(enable = "sse4.1")]
1497 unsafe fn test_mm_cvtepu8_epi64() {
1498 let a = _mm_set1_epi8(10);
1499 let r = _mm_cvtepu8_epi64(a);
1500 let e = _mm_set1_epi64x(10);
1501 assert_eq_m128i(r, e);
1502 }
1503
1504 #[simd_test(enable = "sse4.1")]
1505 unsafe fn test_mm_cvtepu16_epi32() {
1506 let a = _mm_set1_epi16(10);
1507 let r = _mm_cvtepu16_epi32(a);
1508 let e = _mm_set1_epi32(10);
1509 assert_eq_m128i(r, e);
1510 }
1511
1512 #[simd_test(enable = "sse4.1")]
1513 unsafe fn test_mm_cvtepu16_epi64() {
1514 let a = _mm_set1_epi16(10);
1515 let r = _mm_cvtepu16_epi64(a);
1516 let e = _mm_set1_epi64x(10);
1517 assert_eq_m128i(r, e);
1518 }
1519
1520 #[simd_test(enable = "sse4.1")]
1521 unsafe fn test_mm_cvtepu32_epi64() {
1522 let a = _mm_set1_epi32(10);
1523 let r = _mm_cvtepu32_epi64(a);
1524 let e = _mm_set1_epi64x(10);
1525 assert_eq_m128i(r, e);
1526 }
1527
1528 #[simd_test(enable = "sse4.1")]
1529 unsafe fn test_mm_dp_pd() {
1530 let a = _mm_setr_pd(2.0, 3.0);
1531 let b = _mm_setr_pd(1.0, 4.0);
1532 let e = _mm_setr_pd(14.0, 0.0);
1533 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
1534 }
1535
1536 #[simd_test(enable = "sse4.1")]
1537 unsafe fn test_mm_dp_ps() {
1538 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1539 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1540 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1541 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
1542 }
1543
1544 #[simd_test(enable = "sse4.1")]
1545 unsafe fn test_mm_floor_pd() {
1546 let a = _mm_setr_pd(2.5, 4.5);
1547 let r = _mm_floor_pd(a);
1548 let e = _mm_setr_pd(2.0, 4.0);
1549 assert_eq_m128d(r, e);
1550 }
1551
1552 #[simd_test(enable = "sse4.1")]
1553 unsafe fn test_mm_floor_ps() {
1554 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1555 let r = _mm_floor_ps(a);
1556 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1557 assert_eq_m128(r, e);
1558 }
1559
1560 #[simd_test(enable = "sse4.1")]
1561 unsafe fn test_mm_floor_sd() {
1562 let a = _mm_setr_pd(2.5, 4.5);
1563 let b = _mm_setr_pd(-1.5, -3.5);
1564 let r = _mm_floor_sd(a, b);
1565 let e = _mm_setr_pd(-2.0, 4.5);
1566 assert_eq_m128d(r, e);
1567 }
1568
1569 #[simd_test(enable = "sse4.1")]
1570 unsafe fn test_mm_floor_ss() {
1571 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1572 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1573 let r = _mm_floor_ss(a, b);
1574 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1575 assert_eq_m128(r, e);
1576 }
1577
1578 #[simd_test(enable = "sse4.1")]
1579 unsafe fn test_mm_ceil_pd() {
1580 let a = _mm_setr_pd(1.5, 3.5);
1581 let r = _mm_ceil_pd(a);
1582 let e = _mm_setr_pd(2.0, 4.0);
1583 assert_eq_m128d(r, e);
1584 }
1585
1586 #[simd_test(enable = "sse4.1")]
1587 unsafe fn test_mm_ceil_ps() {
1588 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1589 let r = _mm_ceil_ps(a);
1590 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1591 assert_eq_m128(r, e);
1592 }
1593
1594 #[simd_test(enable = "sse4.1")]
1595 unsafe fn test_mm_ceil_sd() {
1596 let a = _mm_setr_pd(1.5, 3.5);
1597 let b = _mm_setr_pd(-2.5, -4.5);
1598 let r = _mm_ceil_sd(a, b);
1599 let e = _mm_setr_pd(-2.0, 3.5);
1600 assert_eq_m128d(r, e);
1601 }
1602
1603 #[simd_test(enable = "sse4.1")]
1604 unsafe fn test_mm_ceil_ss() {
1605 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1606 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1607 let r = _mm_ceil_ss(a, b);
1608 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1609 assert_eq_m128(r, e);
1610 }
1611
1612 #[simd_test(enable = "sse4.1")]
1613 unsafe fn test_mm_round_pd() {
1614 let a = _mm_setr_pd(1.25, 3.75);
1615 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
1616 let e = _mm_setr_pd(1.0, 4.0);
1617 assert_eq_m128d(r, e);
1618 }
1619
1620 #[simd_test(enable = "sse4.1")]
1621 unsafe fn test_mm_round_ps() {
1622 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1623 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
1624 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1625 assert_eq_m128(r, e);
1626 }
1627
1628 #[simd_test(enable = "sse4.1")]
1629 unsafe fn test_mm_round_sd() {
1630 let a = _mm_setr_pd(1.5, 3.5);
1631 let b = _mm_setr_pd(-2.5, -4.5);
1632 let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1633 let e = _mm_setr_pd(-2.0, 3.5);
1634 assert_eq_m128d(r, e);
1635
1636 let a = _mm_setr_pd(1.5, 3.5);
1637 let b = _mm_setr_pd(-2.5, -4.5);
1638 let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
1639 let e = _mm_setr_pd(-3.0, 3.5);
1640 assert_eq_m128d(r, e);
1641
1642 let a = _mm_setr_pd(1.5, 3.5);
1643 let b = _mm_setr_pd(-2.5, -4.5);
1644 let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
1645 let e = _mm_setr_pd(-2.0, 3.5);
1646 assert_eq_m128d(r, e);
1647
1648 let a = _mm_setr_pd(1.5, 3.5);
1649 let b = _mm_setr_pd(-2.5, -4.5);
1650 let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
1651 let e = _mm_setr_pd(-2.0, 3.5);
1652 assert_eq_m128d(r, e);
1653 }
1654
1655 #[simd_test(enable = "sse4.1")]
1656 unsafe fn test_mm_round_ss() {
1657 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1658 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1659 let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1660 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1661 assert_eq_m128(r, e);
1662
1663 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1664 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1665 let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
1666 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1667 assert_eq_m128(r, e);
1668
1669 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1670 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1671 let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
1672 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1673 assert_eq_m128(r, e);
1674
1675 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1676 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1677 let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
1678 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1679 assert_eq_m128(r, e);
1680 }
1681
1682 #[simd_test(enable = "sse4.1")]
1683 unsafe fn test_mm_minpos_epu16_1() {
1684 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1685 let r = _mm_minpos_epu16(a);
1686 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1687 assert_eq_m128i(r, e);
1688 }
1689
1690 #[simd_test(enable = "sse4.1")]
1691 unsafe fn test_mm_minpos_epu16_2() {
1692 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1693 let r = _mm_minpos_epu16(a);
1694 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1695 assert_eq_m128i(r, e);
1696 }
1697
1698 #[simd_test(enable = "sse4.1")]
1699 unsafe fn test_mm_minpos_epu16_3() {
1700 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
1702 let r = _mm_minpos_epu16(a);
1703 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1704 assert_eq_m128i(r, e);
1705 }
1706
1707 #[simd_test(enable = "sse4.1")]
1708 unsafe fn test_mm_mul_epi32() {
1709 {
1710 let a = _mm_setr_epi32(1, 1, 1, 1);
1711 let b = _mm_setr_epi32(1, 2, 3, 4);
1712 let r = _mm_mul_epi32(a, b);
1713 let e = _mm_setr_epi64x(1, 3);
1714 assert_eq_m128i(r, e);
1715 }
1716 {
1717 let a = _mm_setr_epi32(15, 2 , 1234567, 4 );
1718 let b = _mm_setr_epi32(
1719 -20, -256, 666666, 666666, );
1722 let r = _mm_mul_epi32(a, b);
1723 let e = _mm_setr_epi64x(-300, 823043843622);
1724 assert_eq_m128i(r, e);
1725 }
1726 }
1727
1728 #[simd_test(enable = "sse4.1")]
1729 unsafe fn test_mm_mullo_epi32() {
1730 {
1731 let a = _mm_setr_epi32(1, 1, 1, 1);
1732 let b = _mm_setr_epi32(1, 2, 3, 4);
1733 let r = _mm_mullo_epi32(a, b);
1734 let e = _mm_setr_epi32(1, 2, 3, 4);
1735 assert_eq_m128i(r, e);
1736 }
1737 {
1738 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1739 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1740 let r = _mm_mullo_epi32(a, b);
1741 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1745 assert_eq_m128i(r, e);
1746 }
1747 }
1748
1749 #[simd_test(enable = "sse4.1")]
1750 unsafe fn test_mm_minpos_epu16() {
1751 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1752 let r = _mm_minpos_epu16(a);
1753 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1754 assert_eq_m128i(r, e);
1755 }
1756
1757 #[simd_test(enable = "sse4.1")]
1758 unsafe fn test_mm_mpsadbw_epu8() {
1759 #[rustfmt::skip]
1760 let a = _mm_setr_epi8(
1761 0, 1, 2, 3, 4, 5, 6, 7,
1762 8, 9, 10, 11, 12, 13, 14, 15,
1763 );
1764
1765 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
1766 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1767 assert_eq_m128i(r, e);
1768
1769 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
1770 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1771 assert_eq_m128i(r, e);
1772
1773 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
1774 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1775 assert_eq_m128i(r, e);
1776
1777 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
1778 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1779 assert_eq_m128i(r, e);
1780
1781 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
1782 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1783 assert_eq_m128i(r, e);
1784 }
1785
1786 #[simd_test(enable = "sse4.1")]
1787 unsafe fn test_mm_testz_si128() {
1788 let a = _mm_set1_epi8(1);
1789 let mask = _mm_set1_epi8(0);
1790 let r = _mm_testz_si128(a, mask);
1791 assert_eq!(r, 1);
1792 let a = _mm_set1_epi8(0b101);
1793 let mask = _mm_set1_epi8(0b110);
1794 let r = _mm_testz_si128(a, mask);
1795 assert_eq!(r, 0);
1796 let a = _mm_set1_epi8(0b011);
1797 let mask = _mm_set1_epi8(0b100);
1798 let r = _mm_testz_si128(a, mask);
1799 assert_eq!(r, 1);
1800 }
1801
1802 #[simd_test(enable = "sse4.1")]
1803 unsafe fn test_mm_testc_si128() {
1804 let a = _mm_set1_epi8(-1);
1805 let mask = _mm_set1_epi8(0);
1806 let r = _mm_testc_si128(a, mask);
1807 assert_eq!(r, 1);
1808 let a = _mm_set1_epi8(0b101);
1809 let mask = _mm_set1_epi8(0b110);
1810 let r = _mm_testc_si128(a, mask);
1811 assert_eq!(r, 0);
1812 let a = _mm_set1_epi8(0b101);
1813 let mask = _mm_set1_epi8(0b100);
1814 let r = _mm_testc_si128(a, mask);
1815 assert_eq!(r, 1);
1816 }
1817
1818 #[simd_test(enable = "sse4.1")]
1819 unsafe fn test_mm_testnzc_si128() {
1820 let a = _mm_set1_epi8(0);
1821 let mask = _mm_set1_epi8(1);
1822 let r = _mm_testnzc_si128(a, mask);
1823 assert_eq!(r, 0);
1824 let a = _mm_set1_epi8(-1);
1825 let mask = _mm_set1_epi8(0);
1826 let r = _mm_testnzc_si128(a, mask);
1827 assert_eq!(r, 0);
1828 let a = _mm_set1_epi8(0b101);
1829 let mask = _mm_set1_epi8(0b110);
1830 let r = _mm_testnzc_si128(a, mask);
1831 assert_eq!(r, 1);
1832 let a = _mm_set1_epi8(0b101);
1833 let mask = _mm_set1_epi8(0b101);
1834 let r = _mm_testnzc_si128(a, mask);
1835 assert_eq!(r, 0);
1836 }
1837
1838 #[simd_test(enable = "sse4.1")]
1839 unsafe fn test_mm_test_all_zeros() {
1840 let a = _mm_set1_epi8(1);
1841 let mask = _mm_set1_epi8(0);
1842 let r = _mm_test_all_zeros(a, mask);
1843 assert_eq!(r, 1);
1844 let a = _mm_set1_epi8(0b101);
1845 let mask = _mm_set1_epi8(0b110);
1846 let r = _mm_test_all_zeros(a, mask);
1847 assert_eq!(r, 0);
1848 let a = _mm_set1_epi8(0b011);
1849 let mask = _mm_set1_epi8(0b100);
1850 let r = _mm_test_all_zeros(a, mask);
1851 assert_eq!(r, 1);
1852 }
1853
1854 #[simd_test(enable = "sse4.1")]
1855 unsafe fn test_mm_test_all_ones() {
1856 let a = _mm_set1_epi8(-1);
1857 let r = _mm_test_all_ones(a);
1858 assert_eq!(r, 1);
1859 let a = _mm_set1_epi8(0b101);
1860 let r = _mm_test_all_ones(a);
1861 assert_eq!(r, 0);
1862 }
1863
1864 #[simd_test(enable = "sse4.1")]
1865 unsafe fn test_mm_test_mix_ones_zeros() {
1866 let a = _mm_set1_epi8(0);
1867 let mask = _mm_set1_epi8(1);
1868 let r = _mm_test_mix_ones_zeros(a, mask);
1869 assert_eq!(r, 0);
1870 let a = _mm_set1_epi8(-1);
1871 let mask = _mm_set1_epi8(0);
1872 let r = _mm_test_mix_ones_zeros(a, mask);
1873 assert_eq!(r, 0);
1874 let a = _mm_set1_epi8(0b101);
1875 let mask = _mm_set1_epi8(0b110);
1876 let r = _mm_test_mix_ones_zeros(a, mask);
1877 assert_eq!(r, 1);
1878 let a = _mm_set1_epi8(0b101);
1879 let mask = _mm_set1_epi8(0b101);
1880 let r = _mm_test_mix_ones_zeros(a, mask);
1881 assert_eq!(r, 0);
1882 }
1883
1884 #[simd_test(enable = "sse4.1")]
1885 unsafe fn test_mm_stream_load_si128() {
1886 let a = _mm_set_epi64x(5, 6);
1887 let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
1888 assert_eq_m128i(a, r);
1889 }
1890}