1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19 vpmadd52huq_512(a, b, c)
20}
21
22#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub unsafe fn _mm512_mask_madd52hi_epu64(
35 a: __m512i,
36 k: __mmask8,
37 b: __m512i,
38 c: __m512i,
39) -> __m512i {
40 simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a)
41}
42
43#[inline]
52#[target_feature(enable = "avx512ifma")]
53#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
54#[cfg_attr(test, assert_instr(vpmadd52huq))]
55pub unsafe fn _mm512_maskz_madd52hi_epu64(
56 k: __mmask8,
57 a: __m512i,
58 b: __m512i,
59 c: __m512i,
60) -> __m512i {
61 simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512())
62}
63
64#[inline]
72#[target_feature(enable = "avx512ifma")]
73#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
74#[cfg_attr(test, assert_instr(vpmadd52luq))]
75pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
76 vpmadd52luq_512(a, b, c)
77}
78
79#[inline]
88#[target_feature(enable = "avx512ifma")]
89#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
90#[cfg_attr(test, assert_instr(vpmadd52luq))]
91pub unsafe fn _mm512_mask_madd52lo_epu64(
92 a: __m512i,
93 k: __mmask8,
94 b: __m512i,
95 c: __m512i,
96) -> __m512i {
97 simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a)
98}
99
100#[inline]
109#[target_feature(enable = "avx512ifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(test, assert_instr(vpmadd52luq))]
112pub unsafe fn _mm512_maskz_madd52lo_epu64(
113 k: __mmask8,
114 a: __m512i,
115 b: __m512i,
116 c: __m512i,
117) -> __m512i {
118 simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512())
119}
120
121#[inline]
129#[target_feature(enable = "avxifma")]
130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
131#[cfg_attr(
132 all(test, any(target_os = "linux", target_env = "msvc")),
133 assert_instr(vpmadd52huq)
134)]
135pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
136 vpmadd52huq_256(a, b, c)
137}
138
139#[inline]
147#[target_feature(enable = "avx512ifma,avx512vl")]
148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
149#[cfg_attr(test, assert_instr(vpmadd52huq))]
150pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
151 vpmadd52huq_256(a, b, c)
152}
153
154#[inline]
163#[target_feature(enable = "avx512ifma,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpmadd52huq))]
166pub unsafe fn _mm256_mask_madd52hi_epu64(
167 a: __m256i,
168 k: __mmask8,
169 b: __m256i,
170 c: __m256i,
171) -> __m256i {
172 simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a)
173}
174
175#[inline]
184#[target_feature(enable = "avx512ifma,avx512vl")]
185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
186#[cfg_attr(test, assert_instr(vpmadd52huq))]
187pub unsafe fn _mm256_maskz_madd52hi_epu64(
188 k: __mmask8,
189 a: __m256i,
190 b: __m256i,
191 c: __m256i,
192) -> __m256i {
193 simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256())
194}
195
196#[inline]
204#[target_feature(enable = "avxifma")]
205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
206#[cfg_attr(
207 all(test, any(target_os = "linux", target_env = "msvc")),
208 assert_instr(vpmadd52luq)
209)]
210pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
211 vpmadd52luq_256(a, b, c)
212}
213
214#[inline]
222#[target_feature(enable = "avx512ifma,avx512vl")]
223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
224#[cfg_attr(test, assert_instr(vpmadd52luq))]
225pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
226 vpmadd52luq_256(a, b, c)
227}
228
229#[inline]
238#[target_feature(enable = "avx512ifma,avx512vl")]
239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
240#[cfg_attr(test, assert_instr(vpmadd52luq))]
241pub unsafe fn _mm256_mask_madd52lo_epu64(
242 a: __m256i,
243 k: __mmask8,
244 b: __m256i,
245 c: __m256i,
246) -> __m256i {
247 simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a)
248}
249
250#[inline]
259#[target_feature(enable = "avx512ifma,avx512vl")]
260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
261#[cfg_attr(test, assert_instr(vpmadd52luq))]
262pub unsafe fn _mm256_maskz_madd52lo_epu64(
263 k: __mmask8,
264 a: __m256i,
265 b: __m256i,
266 c: __m256i,
267) -> __m256i {
268 simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256())
269}
270
271#[inline]
279#[target_feature(enable = "avxifma")]
280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
281#[cfg_attr(
282 all(test, any(target_os = "linux", target_env = "msvc")),
283 assert_instr(vpmadd52huq)
284)]
285pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
286 vpmadd52huq_128(a, b, c)
287}
288
289#[inline]
297#[target_feature(enable = "avx512ifma,avx512vl")]
298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
299#[cfg_attr(test, assert_instr(vpmadd52huq))]
300pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
301 vpmadd52huq_128(a, b, c)
302}
303
304#[inline]
313#[target_feature(enable = "avx512ifma,avx512vl")]
314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
315#[cfg_attr(test, assert_instr(vpmadd52huq))]
316pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
317 simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a)
318}
319
320#[inline]
329#[target_feature(enable = "avx512ifma,avx512vl")]
330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
331#[cfg_attr(test, assert_instr(vpmadd52huq))]
332pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
333 simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128())
334}
335
336#[inline]
344#[target_feature(enable = "avxifma")]
345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
346#[cfg_attr(
347 all(test, any(target_os = "linux", target_env = "msvc")),
348 assert_instr(vpmadd52luq)
349)]
350pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
351 vpmadd52luq_128(a, b, c)
352}
353
354#[inline]
362#[target_feature(enable = "avx512ifma,avx512vl")]
363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
364#[cfg_attr(test, assert_instr(vpmadd52luq))]
365pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
366 vpmadd52luq_128(a, b, c)
367}
368
369#[inline]
378#[target_feature(enable = "avx512ifma,avx512vl")]
379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
380#[cfg_attr(test, assert_instr(vpmadd52luq))]
381pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
382 simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a)
383}
384
385#[inline]
394#[target_feature(enable = "avx512ifma,avx512vl")]
395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
396#[cfg_attr(test, assert_instr(vpmadd52luq))]
397pub unsafe fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
398 simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128())
399}
400
401#[allow(improper_ctypes)]
402extern "C" {
403 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
404 fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
405 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
406 fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
407 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
408 fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
409 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
410 fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
411 #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
412 fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
413 #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
414 fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
415}
416
417#[cfg(test)]
418mod tests {
419
420 use stdarch_test::simd_test;
421
422 use crate::core_arch::x86::*;
423
424 const K: __mmask8 = 0b01101101;
425
426 #[simd_test(enable = "avx512ifma")]
427 unsafe fn test_mm512_madd52hi_epu64() {
428 let a = _mm512_set1_epi64(10 << 40);
429 let b = _mm512_set1_epi64((11 << 40) + 4);
430 let c = _mm512_set1_epi64((12 << 40) + 3);
431
432 let actual = _mm512_madd52hi_epu64(a, b, c);
433
434 let expected = _mm512_set1_epi64(11030549757952);
436
437 assert_eq_m512i(expected, actual);
438 }
439
440 #[simd_test(enable = "avx512ifma")]
441 unsafe fn test_mm512_mask_madd52hi_epu64() {
442 let a = _mm512_set1_epi64(10 << 40);
443 let b = _mm512_set1_epi64((11 << 40) + 4);
444 let c = _mm512_set1_epi64((12 << 40) + 3);
445
446 let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
447
448 let mut expected = _mm512_set1_epi64(11030549757952);
450 expected = _mm512_mask_blend_epi64(K, a, expected);
451
452 assert_eq_m512i(expected, actual);
453 }
454
455 #[simd_test(enable = "avx512ifma")]
456 unsafe fn test_mm512_maskz_madd52hi_epu64() {
457 let a = _mm512_set1_epi64(10 << 40);
458 let b = _mm512_set1_epi64((11 << 40) + 4);
459 let c = _mm512_set1_epi64((12 << 40) + 3);
460
461 let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
462
463 let mut expected = _mm512_set1_epi64(11030549757952);
465 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
466
467 assert_eq_m512i(expected, actual);
468 }
469
470 #[simd_test(enable = "avx512ifma")]
471 unsafe fn test_mm512_madd52lo_epu64() {
472 let a = _mm512_set1_epi64(10 << 40);
473 let b = _mm512_set1_epi64((11 << 40) + 4);
474 let c = _mm512_set1_epi64((12 << 40) + 3);
475
476 let actual = _mm512_madd52lo_epu64(a, b, c);
477
478 let expected = _mm512_set1_epi64(100055558127628);
480
481 assert_eq_m512i(expected, actual);
482 }
483
484 #[simd_test(enable = "avx512ifma")]
485 unsafe fn test_mm512_mask_madd52lo_epu64() {
486 let a = _mm512_set1_epi64(10 << 40);
487 let b = _mm512_set1_epi64((11 << 40) + 4);
488 let c = _mm512_set1_epi64((12 << 40) + 3);
489
490 let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
491
492 let mut expected = _mm512_set1_epi64(100055558127628);
494 expected = _mm512_mask_blend_epi64(K, a, expected);
495
496 assert_eq_m512i(expected, actual);
497 }
498
499 #[simd_test(enable = "avx512ifma")]
500 unsafe fn test_mm512_maskz_madd52lo_epu64() {
501 let a = _mm512_set1_epi64(10 << 40);
502 let b = _mm512_set1_epi64((11 << 40) + 4);
503 let c = _mm512_set1_epi64((12 << 40) + 3);
504
505 let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
506
507 let mut expected = _mm512_set1_epi64(100055558127628);
509 expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
510
511 assert_eq_m512i(expected, actual);
512 }
513
514 #[simd_test(enable = "avxifma")]
515 unsafe fn test_mm256_madd52hi_avx_epu64() {
516 let a = _mm256_set1_epi64x(10 << 40);
517 let b = _mm256_set1_epi64x((11 << 40) + 4);
518 let c = _mm256_set1_epi64x((12 << 40) + 3);
519
520 let actual = _mm256_madd52hi_avx_epu64(a, b, c);
521
522 let expected = _mm256_set1_epi64x(11030549757952);
524
525 assert_eq_m256i(expected, actual);
526 }
527
528 #[simd_test(enable = "avx512ifma,avx512vl")]
529 unsafe fn test_mm256_madd52hi_epu64() {
530 let a = _mm256_set1_epi64x(10 << 40);
531 let b = _mm256_set1_epi64x((11 << 40) + 4);
532 let c = _mm256_set1_epi64x((12 << 40) + 3);
533
534 let actual = _mm256_madd52hi_epu64(a, b, c);
535
536 let expected = _mm256_set1_epi64x(11030549757952);
538
539 assert_eq_m256i(expected, actual);
540 }
541
542 #[simd_test(enable = "avx512ifma,avx512vl")]
543 unsafe fn test_mm256_mask_madd52hi_epu64() {
544 let a = _mm256_set1_epi64x(10 << 40);
545 let b = _mm256_set1_epi64x((11 << 40) + 4);
546 let c = _mm256_set1_epi64x((12 << 40) + 3);
547
548 let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
549
550 let mut expected = _mm256_set1_epi64x(11030549757952);
552 expected = _mm256_mask_blend_epi64(K, a, expected);
553
554 assert_eq_m256i(expected, actual);
555 }
556
557 #[simd_test(enable = "avx512ifma,avx512vl")]
558 unsafe fn test_mm256_maskz_madd52hi_epu64() {
559 let a = _mm256_set1_epi64x(10 << 40);
560 let b = _mm256_set1_epi64x((11 << 40) + 4);
561 let c = _mm256_set1_epi64x((12 << 40) + 3);
562
563 let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
564
565 let mut expected = _mm256_set1_epi64x(11030549757952);
567 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
568
569 assert_eq_m256i(expected, actual);
570 }
571
572 #[simd_test(enable = "avxifma")]
573 unsafe fn test_mm256_madd52lo_avx_epu64() {
574 let a = _mm256_set1_epi64x(10 << 40);
575 let b = _mm256_set1_epi64x((11 << 40) + 4);
576 let c = _mm256_set1_epi64x((12 << 40) + 3);
577
578 let actual = _mm256_madd52lo_avx_epu64(a, b, c);
579
580 let expected = _mm256_set1_epi64x(100055558127628);
582
583 assert_eq_m256i(expected, actual);
584 }
585
586 #[simd_test(enable = "avx512ifma,avx512vl")]
587 unsafe fn test_mm256_madd52lo_epu64() {
588 let a = _mm256_set1_epi64x(10 << 40);
589 let b = _mm256_set1_epi64x((11 << 40) + 4);
590 let c = _mm256_set1_epi64x((12 << 40) + 3);
591
592 let actual = _mm256_madd52lo_epu64(a, b, c);
593
594 let expected = _mm256_set1_epi64x(100055558127628);
596
597 assert_eq_m256i(expected, actual);
598 }
599
600 #[simd_test(enable = "avx512ifma,avx512vl")]
601 unsafe fn test_mm256_mask_madd52lo_epu64() {
602 let a = _mm256_set1_epi64x(10 << 40);
603 let b = _mm256_set1_epi64x((11 << 40) + 4);
604 let c = _mm256_set1_epi64x((12 << 40) + 3);
605
606 let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
607
608 let mut expected = _mm256_set1_epi64x(100055558127628);
610 expected = _mm256_mask_blend_epi64(K, a, expected);
611
612 assert_eq_m256i(expected, actual);
613 }
614
615 #[simd_test(enable = "avx512ifma,avx512vl")]
616 unsafe fn test_mm256_maskz_madd52lo_epu64() {
617 let a = _mm256_set1_epi64x(10 << 40);
618 let b = _mm256_set1_epi64x((11 << 40) + 4);
619 let c = _mm256_set1_epi64x((12 << 40) + 3);
620
621 let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
622
623 let mut expected = _mm256_set1_epi64x(100055558127628);
625 expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
626
627 assert_eq_m256i(expected, actual);
628 }
629
630 #[simd_test(enable = "avxifma")]
631 unsafe fn test_mm_madd52hi_avx_epu64() {
632 let a = _mm_set1_epi64x(10 << 40);
633 let b = _mm_set1_epi64x((11 << 40) + 4);
634 let c = _mm_set1_epi64x((12 << 40) + 3);
635
636 let actual = _mm_madd52hi_avx_epu64(a, b, c);
637
638 let expected = _mm_set1_epi64x(11030549757952);
640
641 assert_eq_m128i(expected, actual);
642 }
643
644 #[simd_test(enable = "avx512ifma,avx512vl")]
645 unsafe fn test_mm_madd52hi_epu64() {
646 let a = _mm_set1_epi64x(10 << 40);
647 let b = _mm_set1_epi64x((11 << 40) + 4);
648 let c = _mm_set1_epi64x((12 << 40) + 3);
649
650 let actual = _mm_madd52hi_epu64(a, b, c);
651
652 let expected = _mm_set1_epi64x(11030549757952);
654
655 assert_eq_m128i(expected, actual);
656 }
657
658 #[simd_test(enable = "avx512ifma,avx512vl")]
659 unsafe fn test_mm_mask_madd52hi_epu64() {
660 let a = _mm_set1_epi64x(10 << 40);
661 let b = _mm_set1_epi64x((11 << 40) + 4);
662 let c = _mm_set1_epi64x((12 << 40) + 3);
663
664 let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
665
666 let mut expected = _mm_set1_epi64x(11030549757952);
668 expected = _mm_mask_blend_epi64(K, a, expected);
669
670 assert_eq_m128i(expected, actual);
671 }
672
673 #[simd_test(enable = "avx512ifma,avx512vl")]
674 unsafe fn test_mm_maskz_madd52hi_epu64() {
675 let a = _mm_set1_epi64x(10 << 40);
676 let b = _mm_set1_epi64x((11 << 40) + 4);
677 let c = _mm_set1_epi64x((12 << 40) + 3);
678
679 let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
680
681 let mut expected = _mm_set1_epi64x(11030549757952);
683 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
684
685 assert_eq_m128i(expected, actual);
686 }
687
688 #[simd_test(enable = "avxifma")]
689 unsafe fn test_mm_madd52lo_avx_epu64() {
690 let a = _mm_set1_epi64x(10 << 40);
691 let b = _mm_set1_epi64x((11 << 40) + 4);
692 let c = _mm_set1_epi64x((12 << 40) + 3);
693
694 let actual = _mm_madd52lo_avx_epu64(a, b, c);
695
696 let expected = _mm_set1_epi64x(100055558127628);
698
699 assert_eq_m128i(expected, actual);
700 }
701
702 #[simd_test(enable = "avx512ifma,avx512vl")]
703 unsafe fn test_mm_madd52lo_epu64() {
704 let a = _mm_set1_epi64x(10 << 40);
705 let b = _mm_set1_epi64x((11 << 40) + 4);
706 let c = _mm_set1_epi64x((12 << 40) + 3);
707
708 let actual = _mm_madd52lo_epu64(a, b, c);
709
710 let expected = _mm_set1_epi64x(100055558127628);
712
713 assert_eq_m128i(expected, actual);
714 }
715
716 #[simd_test(enable = "avx512ifma,avx512vl")]
717 unsafe fn test_mm_mask_madd52lo_epu64() {
718 let a = _mm_set1_epi64x(10 << 40);
719 let b = _mm_set1_epi64x((11 << 40) + 4);
720 let c = _mm_set1_epi64x((12 << 40) + 3);
721
722 let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
723
724 let mut expected = _mm_set1_epi64x(100055558127628);
726 expected = _mm_mask_blend_epi64(K, a, expected);
727
728 assert_eq_m128i(expected, actual);
729 }
730
731 #[simd_test(enable = "avx512ifma,avx512vl")]
732 unsafe fn test_mm_maskz_madd52lo_epu64() {
733 let a = _mm_set1_epi64x(10 << 40);
734 let b = _mm_set1_epi64x((11 << 40) + 4);
735 let c = _mm_set1_epi64x((12 << 40) + 3);
736
737 let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
738
739 let mut expected = _mm_set1_epi64x(100055558127628);
741 expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
742
743 assert_eq_m128i(expected, actual);
744 }
745}