core/stdarch/crates/core_arch/src/x86/
avx512ifma.rs

1use crate::core_arch::x86::*;
2use crate::intrinsics::simd::simd_select_bitmask;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7/// Multiply packed unsigned 52-bit integers in each 64-bit element of
8/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
9/// unsigned integer from the intermediate result with the
10/// corresponding unsigned 64-bit integer in `a`, and store the
11/// results in `dst`.
12///
13/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52hi_epu64)
14#[inline]
15#[target_feature(enable = "avx512ifma")]
16#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17#[cfg_attr(test, assert_instr(vpmadd52huq))]
18pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
19    vpmadd52huq_512(a, b, c)
20}
21
22/// Multiply packed unsigned 52-bit integers in each 64-bit element of
23/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
24/// unsigned integer from the intermediate result with the
25/// corresponding unsigned 64-bit integer in `a`, and store the
26/// results in `dst` using writemask `k` (elements are copied
27/// from `k` when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52hi_epu64)
30#[inline]
31#[target_feature(enable = "avx512ifma")]
32#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33#[cfg_attr(test, assert_instr(vpmadd52huq))]
34pub unsafe fn _mm512_mask_madd52hi_epu64(
35    a: __m512i,
36    k: __mmask8,
37    b: __m512i,
38    c: __m512i,
39) -> __m512i {
40    simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a)
41}
42
43/// Multiply packed unsigned 52-bit integers in each 64-bit element of
44/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
45/// unsigned integer from the intermediate result with the
46/// corresponding unsigned 64-bit integer in `a`, and store the
47/// results in `dst` using writemask `k` (elements are zeroed
48/// out when the corresponding mask bit is not set).
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52hi_epu64)
51#[inline]
52#[target_feature(enable = "avx512ifma")]
53#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
54#[cfg_attr(test, assert_instr(vpmadd52huq))]
55pub unsafe fn _mm512_maskz_madd52hi_epu64(
56    k: __mmask8,
57    a: __m512i,
58    b: __m512i,
59    c: __m512i,
60) -> __m512i {
61    simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512())
62}
63
64/// Multiply packed unsigned 52-bit integers in each 64-bit element of
65/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
66/// unsigned integer from the intermediate result with the
67/// corresponding unsigned 64-bit integer in `a`, and store the
68/// results in `dst`.
69///
70/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_madd52lo_epu64)
71#[inline]
72#[target_feature(enable = "avx512ifma")]
73#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
74#[cfg_attr(test, assert_instr(vpmadd52luq))]
75pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
76    vpmadd52luq_512(a, b, c)
77}
78
79/// Multiply packed unsigned 52-bit integers in each 64-bit element of
80/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
81/// unsigned integer from the intermediate result with the
82/// corresponding unsigned 64-bit integer in `a`, and store the
83/// results in `dst` using writemask `k` (elements are copied
84/// from `k` when the corresponding mask bit is not set).
85///
86/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_mask_madd52lo_epu64)
87#[inline]
88#[target_feature(enable = "avx512ifma")]
89#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
90#[cfg_attr(test, assert_instr(vpmadd52luq))]
91pub unsafe fn _mm512_mask_madd52lo_epu64(
92    a: __m512i,
93    k: __mmask8,
94    b: __m512i,
95    c: __m512i,
96) -> __m512i {
97    simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a)
98}
99
100/// Multiply packed unsigned 52-bit integers in each 64-bit element of
101/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
102/// unsigned integer from the intermediate result with the
103/// corresponding unsigned 64-bit integer in `a`, and store the
104/// results in `dst` using writemask `k` (elements are zeroed
105/// out when the corresponding mask bit is not set).
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm512_maskz_madd52lo_epu64)
108#[inline]
109#[target_feature(enable = "avx512ifma")]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111#[cfg_attr(test, assert_instr(vpmadd52luq))]
112pub unsafe fn _mm512_maskz_madd52lo_epu64(
113    k: __mmask8,
114    a: __m512i,
115    b: __m512i,
116    c: __m512i,
117) -> __m512i {
118    simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512())
119}
120
121/// Multiply packed unsigned 52-bit integers in each 64-bit element of
122/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
123/// unsigned integer from the intermediate result with the
124/// corresponding unsigned 64-bit integer in `a`, and store the
125/// results in `dst`.
126///
127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52hi_avx_epu64)
128#[inline]
129#[target_feature(enable = "avxifma")]
130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
131#[cfg_attr(
132    all(test, any(target_os = "linux", target_env = "msvc")),
133    assert_instr(vpmadd52huq)
134)]
135pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
136    vpmadd52huq_256(a, b, c)
137}
138
139/// Multiply packed unsigned 52-bit integers in each 64-bit element of
140/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
141/// unsigned integer from the intermediate result with the
142/// corresponding unsigned 64-bit integer in `a`, and store the
143/// results in `dst`.
144///
145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52hi_epu64)
146#[inline]
147#[target_feature(enable = "avx512ifma,avx512vl")]
148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
149#[cfg_attr(test, assert_instr(vpmadd52huq))]
150pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
151    vpmadd52huq_256(a, b, c)
152}
153
154/// Multiply packed unsigned 52-bit integers in each 64-bit element of
155/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
156/// unsigned integer from the intermediate result with the
157/// corresponding unsigned 64-bit integer in `a`, and store the
158/// results in `dst` using writemask `k` (elements are copied
159/// from `k` when the corresponding mask bit is not set).
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52hi_epu64)
162#[inline]
163#[target_feature(enable = "avx512ifma,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpmadd52huq))]
166pub unsafe fn _mm256_mask_madd52hi_epu64(
167    a: __m256i,
168    k: __mmask8,
169    b: __m256i,
170    c: __m256i,
171) -> __m256i {
172    simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a)
173}
174
175/// Multiply packed unsigned 52-bit integers in each 64-bit element of
176/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
177/// unsigned integer from the intermediate result with the
178/// corresponding unsigned 64-bit integer in `a`, and store the
179/// results in `dst` using writemask `k` (elements are zeroed
180/// out when the corresponding mask bit is not set).
181///
182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52hi_epu64)
183#[inline]
184#[target_feature(enable = "avx512ifma,avx512vl")]
185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
186#[cfg_attr(test, assert_instr(vpmadd52huq))]
187pub unsafe fn _mm256_maskz_madd52hi_epu64(
188    k: __mmask8,
189    a: __m256i,
190    b: __m256i,
191    c: __m256i,
192) -> __m256i {
193    simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256())
194}
195
196/// Multiply packed unsigned 52-bit integers in each 64-bit element of
197/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
198/// unsigned integer from the intermediate result with the
199/// corresponding unsigned 64-bit integer in `a`, and store the
200/// results in `dst`.
201///
202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd52lo_avx_epu64)
203#[inline]
204#[target_feature(enable = "avxifma")]
205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
206#[cfg_attr(
207    all(test, any(target_os = "linux", target_env = "msvc")),
208    assert_instr(vpmadd52luq)
209)]
210pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
211    vpmadd52luq_256(a, b, c)
212}
213
214/// Multiply packed unsigned 52-bit integers in each 64-bit element of
215/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
216/// unsigned integer from the intermediate result with the
217/// corresponding unsigned 64-bit integer in `a`, and store the
218/// results in `dst`.
219///
220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_madd52lo_epu64)
221#[inline]
222#[target_feature(enable = "avx512ifma,avx512vl")]
223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
224#[cfg_attr(test, assert_instr(vpmadd52luq))]
225pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
226    vpmadd52luq_256(a, b, c)
227}
228
229/// Multiply packed unsigned 52-bit integers in each 64-bit element of
230/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
231/// unsigned integer from the intermediate result with the
232/// corresponding unsigned 64-bit integer in `a`, and store the
233/// results in `dst` using writemask `k` (elements are copied
234/// from `k` when the corresponding mask bit is not set).
235///
236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_mask_madd52lo_epu64)
237#[inline]
238#[target_feature(enable = "avx512ifma,avx512vl")]
239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
240#[cfg_attr(test, assert_instr(vpmadd52luq))]
241pub unsafe fn _mm256_mask_madd52lo_epu64(
242    a: __m256i,
243    k: __mmask8,
244    b: __m256i,
245    c: __m256i,
246) -> __m256i {
247    simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a)
248}
249
250/// Multiply packed unsigned 52-bit integers in each 64-bit element of
251/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
252/// unsigned integer from the intermediate result with the
253/// corresponding unsigned 64-bit integer in `a`, and store the
254/// results in `dst` using writemask `k` (elements are zeroed
255/// out when the corresponding mask bit is not set).
256///
257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm256_maskz_madd52lo_epu64)
258#[inline]
259#[target_feature(enable = "avx512ifma,avx512vl")]
260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
261#[cfg_attr(test, assert_instr(vpmadd52luq))]
262pub unsafe fn _mm256_maskz_madd52lo_epu64(
263    k: __mmask8,
264    a: __m256i,
265    b: __m256i,
266    c: __m256i,
267) -> __m256i {
268    simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256())
269}
270
271/// Multiply packed unsigned 52-bit integers in each 64-bit element of
272/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
273/// unsigned integer from the intermediate result with the
274/// corresponding unsigned 64-bit integer in `a`, and store the
275/// results in `dst`.
276///
277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52hi_avx_epu64)
278#[inline]
279#[target_feature(enable = "avxifma")]
280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
281#[cfg_attr(
282    all(test, any(target_os = "linux", target_env = "msvc")),
283    assert_instr(vpmadd52huq)
284)]
285pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
286    vpmadd52huq_128(a, b, c)
287}
288
289/// Multiply packed unsigned 52-bit integers in each 64-bit element of
290/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
291/// unsigned integer from the intermediate result with the
292/// corresponding unsigned 64-bit integer in `a`, and store the
293/// results in `dst`.
294///
295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52hi_epu64)
296#[inline]
297#[target_feature(enable = "avx512ifma,avx512vl")]
298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
299#[cfg_attr(test, assert_instr(vpmadd52huq))]
300pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
301    vpmadd52huq_128(a, b, c)
302}
303
304/// Multiply packed unsigned 52-bit integers in each 64-bit element of
305/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
306/// unsigned integer from the intermediate result with the
307/// corresponding unsigned 64-bit integer in `a`, and store the
308/// results in `dst` using writemask `k` (elements are copied
309/// from `k` when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52hi_epu64)
312#[inline]
313#[target_feature(enable = "avx512ifma,avx512vl")]
314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
315#[cfg_attr(test, assert_instr(vpmadd52huq))]
316pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
317    simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a)
318}
319
320/// Multiply packed unsigned 52-bit integers in each 64-bit element of
321/// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit
322/// unsigned integer from the intermediate result with the
323/// corresponding unsigned 64-bit integer in `a`, and store the
324/// results in `dst` using writemask `k` (elements are zeroed
325/// out when the corresponding mask bit is not set).
326///
327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52hi_epu64)
328#[inline]
329#[target_feature(enable = "avx512ifma,avx512vl")]
330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
331#[cfg_attr(test, assert_instr(vpmadd52huq))]
332pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
333    simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128())
334}
335
336/// Multiply packed unsigned 52-bit integers in each 64-bit element of
337/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
338/// unsigned integer from the intermediate result with the
339/// corresponding unsigned 64-bit integer in `a`, and store the
340/// results in `dst`.
341///
342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_madd52lo_avx_epu64)
343#[inline]
344#[target_feature(enable = "avxifma")]
345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
346#[cfg_attr(
347    all(test, any(target_os = "linux", target_env = "msvc")),
348    assert_instr(vpmadd52luq)
349)]
350pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
351    vpmadd52luq_128(a, b, c)
352}
353
354/// Multiply packed unsigned 52-bit integers in each 64-bit element of
355/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
356/// unsigned integer from the intermediate result with the
357/// corresponding unsigned 64-bit integer in `a`, and store the
358/// results in `dst`.
359///
360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_madd52lo_epu64)
361#[inline]
362#[target_feature(enable = "avx512ifma,avx512vl")]
363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
364#[cfg_attr(test, assert_instr(vpmadd52luq))]
365pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
366    vpmadd52luq_128(a, b, c)
367}
368
369/// Multiply packed unsigned 52-bit integers in each 64-bit element of
370/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
371/// unsigned integer from the intermediate result with the
372/// corresponding unsigned 64-bit integer in `a`, and store the
373/// results in `dst` using writemask `k` (elements are copied
374/// from `k` when the corresponding mask bit is not set).
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_mask_madd52lo_epu64)
377#[inline]
378#[target_feature(enable = "avx512ifma,avx512vl")]
379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
380#[cfg_attr(test, assert_instr(vpmadd52luq))]
381pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
382    simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a)
383}
384
385/// Multiply packed unsigned 52-bit integers in each 64-bit element of
386/// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit
387/// unsigned integer from the intermediate result with the
388/// corresponding unsigned 64-bit integer in `a`, and store the
389/// results in `dst` using writemask `k` (elements are zeroed
390/// out when the corresponding mask bit is not set).
391///
392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512IFMA52&text=_mm_maskz_madd52lo_epu64)
393#[inline]
394#[target_feature(enable = "avx512ifma,avx512vl")]
395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
396#[cfg_attr(test, assert_instr(vpmadd52luq))]
397pub unsafe fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
398    simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128())
399}
400
401#[allow(improper_ctypes)]
402extern "C" {
403    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"]
404    fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
405    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"]
406    fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i;
407    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"]
408    fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
409    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"]
410    fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i;
411    #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"]
412    fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
413    #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"]
414    fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i;
415}
416
417#[cfg(test)]
418mod tests {
419
420    use stdarch_test::simd_test;
421
422    use crate::core_arch::x86::*;
423
424    const K: __mmask8 = 0b01101101;
425
426    #[simd_test(enable = "avx512ifma")]
427    unsafe fn test_mm512_madd52hi_epu64() {
428        let a = _mm512_set1_epi64(10 << 40);
429        let b = _mm512_set1_epi64((11 << 40) + 4);
430        let c = _mm512_set1_epi64((12 << 40) + 3);
431
432        let actual = _mm512_madd52hi_epu64(a, b, c);
433
434        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
435        let expected = _mm512_set1_epi64(11030549757952);
436
437        assert_eq_m512i(expected, actual);
438    }
439
440    #[simd_test(enable = "avx512ifma")]
441    unsafe fn test_mm512_mask_madd52hi_epu64() {
442        let a = _mm512_set1_epi64(10 << 40);
443        let b = _mm512_set1_epi64((11 << 40) + 4);
444        let c = _mm512_set1_epi64((12 << 40) + 3);
445
446        let actual = _mm512_mask_madd52hi_epu64(a, K, b, c);
447
448        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
449        let mut expected = _mm512_set1_epi64(11030549757952);
450        expected = _mm512_mask_blend_epi64(K, a, expected);
451
452        assert_eq_m512i(expected, actual);
453    }
454
455    #[simd_test(enable = "avx512ifma")]
456    unsafe fn test_mm512_maskz_madd52hi_epu64() {
457        let a = _mm512_set1_epi64(10 << 40);
458        let b = _mm512_set1_epi64((11 << 40) + 4);
459        let c = _mm512_set1_epi64((12 << 40) + 3);
460
461        let actual = _mm512_maskz_madd52hi_epu64(K, a, b, c);
462
463        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
464        let mut expected = _mm512_set1_epi64(11030549757952);
465        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
466
467        assert_eq_m512i(expected, actual);
468    }
469
470    #[simd_test(enable = "avx512ifma")]
471    unsafe fn test_mm512_madd52lo_epu64() {
472        let a = _mm512_set1_epi64(10 << 40);
473        let b = _mm512_set1_epi64((11 << 40) + 4);
474        let c = _mm512_set1_epi64((12 << 40) + 3);
475
476        let actual = _mm512_madd52lo_epu64(a, b, c);
477
478        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
479        let expected = _mm512_set1_epi64(100055558127628);
480
481        assert_eq_m512i(expected, actual);
482    }
483
484    #[simd_test(enable = "avx512ifma")]
485    unsafe fn test_mm512_mask_madd52lo_epu64() {
486        let a = _mm512_set1_epi64(10 << 40);
487        let b = _mm512_set1_epi64((11 << 40) + 4);
488        let c = _mm512_set1_epi64((12 << 40) + 3);
489
490        let actual = _mm512_mask_madd52lo_epu64(a, K, b, c);
491
492        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
493        let mut expected = _mm512_set1_epi64(100055558127628);
494        expected = _mm512_mask_blend_epi64(K, a, expected);
495
496        assert_eq_m512i(expected, actual);
497    }
498
499    #[simd_test(enable = "avx512ifma")]
500    unsafe fn test_mm512_maskz_madd52lo_epu64() {
501        let a = _mm512_set1_epi64(10 << 40);
502        let b = _mm512_set1_epi64((11 << 40) + 4);
503        let c = _mm512_set1_epi64((12 << 40) + 3);
504
505        let actual = _mm512_maskz_madd52lo_epu64(K, a, b, c);
506
507        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
508        let mut expected = _mm512_set1_epi64(100055558127628);
509        expected = _mm512_mask_blend_epi64(K, _mm512_setzero_si512(), expected);
510
511        assert_eq_m512i(expected, actual);
512    }
513
514    #[simd_test(enable = "avxifma")]
515    unsafe fn test_mm256_madd52hi_avx_epu64() {
516        let a = _mm256_set1_epi64x(10 << 40);
517        let b = _mm256_set1_epi64x((11 << 40) + 4);
518        let c = _mm256_set1_epi64x((12 << 40) + 3);
519
520        let actual = _mm256_madd52hi_avx_epu64(a, b, c);
521
522        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
523        let expected = _mm256_set1_epi64x(11030549757952);
524
525        assert_eq_m256i(expected, actual);
526    }
527
528    #[simd_test(enable = "avx512ifma,avx512vl")]
529    unsafe fn test_mm256_madd52hi_epu64() {
530        let a = _mm256_set1_epi64x(10 << 40);
531        let b = _mm256_set1_epi64x((11 << 40) + 4);
532        let c = _mm256_set1_epi64x((12 << 40) + 3);
533
534        let actual = _mm256_madd52hi_epu64(a, b, c);
535
536        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
537        let expected = _mm256_set1_epi64x(11030549757952);
538
539        assert_eq_m256i(expected, actual);
540    }
541
542    #[simd_test(enable = "avx512ifma,avx512vl")]
543    unsafe fn test_mm256_mask_madd52hi_epu64() {
544        let a = _mm256_set1_epi64x(10 << 40);
545        let b = _mm256_set1_epi64x((11 << 40) + 4);
546        let c = _mm256_set1_epi64x((12 << 40) + 3);
547
548        let actual = _mm256_mask_madd52hi_epu64(a, K, b, c);
549
550        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
551        let mut expected = _mm256_set1_epi64x(11030549757952);
552        expected = _mm256_mask_blend_epi64(K, a, expected);
553
554        assert_eq_m256i(expected, actual);
555    }
556
557    #[simd_test(enable = "avx512ifma,avx512vl")]
558    unsafe fn test_mm256_maskz_madd52hi_epu64() {
559        let a = _mm256_set1_epi64x(10 << 40);
560        let b = _mm256_set1_epi64x((11 << 40) + 4);
561        let c = _mm256_set1_epi64x((12 << 40) + 3);
562
563        let actual = _mm256_maskz_madd52hi_epu64(K, a, b, c);
564
565        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
566        let mut expected = _mm256_set1_epi64x(11030549757952);
567        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
568
569        assert_eq_m256i(expected, actual);
570    }
571
572    #[simd_test(enable = "avxifma")]
573    unsafe fn test_mm256_madd52lo_avx_epu64() {
574        let a = _mm256_set1_epi64x(10 << 40);
575        let b = _mm256_set1_epi64x((11 << 40) + 4);
576        let c = _mm256_set1_epi64x((12 << 40) + 3);
577
578        let actual = _mm256_madd52lo_avx_epu64(a, b, c);
579
580        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
581        let expected = _mm256_set1_epi64x(100055558127628);
582
583        assert_eq_m256i(expected, actual);
584    }
585
586    #[simd_test(enable = "avx512ifma,avx512vl")]
587    unsafe fn test_mm256_madd52lo_epu64() {
588        let a = _mm256_set1_epi64x(10 << 40);
589        let b = _mm256_set1_epi64x((11 << 40) + 4);
590        let c = _mm256_set1_epi64x((12 << 40) + 3);
591
592        let actual = _mm256_madd52lo_epu64(a, b, c);
593
594        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
595        let expected = _mm256_set1_epi64x(100055558127628);
596
597        assert_eq_m256i(expected, actual);
598    }
599
600    #[simd_test(enable = "avx512ifma,avx512vl")]
601    unsafe fn test_mm256_mask_madd52lo_epu64() {
602        let a = _mm256_set1_epi64x(10 << 40);
603        let b = _mm256_set1_epi64x((11 << 40) + 4);
604        let c = _mm256_set1_epi64x((12 << 40) + 3);
605
606        let actual = _mm256_mask_madd52lo_epu64(a, K, b, c);
607
608        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
609        let mut expected = _mm256_set1_epi64x(100055558127628);
610        expected = _mm256_mask_blend_epi64(K, a, expected);
611
612        assert_eq_m256i(expected, actual);
613    }
614
615    #[simd_test(enable = "avx512ifma,avx512vl")]
616    unsafe fn test_mm256_maskz_madd52lo_epu64() {
617        let a = _mm256_set1_epi64x(10 << 40);
618        let b = _mm256_set1_epi64x((11 << 40) + 4);
619        let c = _mm256_set1_epi64x((12 << 40) + 3);
620
621        let actual = _mm256_maskz_madd52lo_epu64(K, a, b, c);
622
623        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
624        let mut expected = _mm256_set1_epi64x(100055558127628);
625        expected = _mm256_mask_blend_epi64(K, _mm256_setzero_si256(), expected);
626
627        assert_eq_m256i(expected, actual);
628    }
629
630    #[simd_test(enable = "avxifma")]
631    unsafe fn test_mm_madd52hi_avx_epu64() {
632        let a = _mm_set1_epi64x(10 << 40);
633        let b = _mm_set1_epi64x((11 << 40) + 4);
634        let c = _mm_set1_epi64x((12 << 40) + 3);
635
636        let actual = _mm_madd52hi_avx_epu64(a, b, c);
637
638        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
639        let expected = _mm_set1_epi64x(11030549757952);
640
641        assert_eq_m128i(expected, actual);
642    }
643
644    #[simd_test(enable = "avx512ifma,avx512vl")]
645    unsafe fn test_mm_madd52hi_epu64() {
646        let a = _mm_set1_epi64x(10 << 40);
647        let b = _mm_set1_epi64x((11 << 40) + 4);
648        let c = _mm_set1_epi64x((12 << 40) + 3);
649
650        let actual = _mm_madd52hi_epu64(a, b, c);
651
652        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
653        let expected = _mm_set1_epi64x(11030549757952);
654
655        assert_eq_m128i(expected, actual);
656    }
657
658    #[simd_test(enable = "avx512ifma,avx512vl")]
659    unsafe fn test_mm_mask_madd52hi_epu64() {
660        let a = _mm_set1_epi64x(10 << 40);
661        let b = _mm_set1_epi64x((11 << 40) + 4);
662        let c = _mm_set1_epi64x((12 << 40) + 3);
663
664        let actual = _mm_mask_madd52hi_epu64(a, K, b, c);
665
666        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
667        let mut expected = _mm_set1_epi64x(11030549757952);
668        expected = _mm_mask_blend_epi64(K, a, expected);
669
670        assert_eq_m128i(expected, actual);
671    }
672
673    #[simd_test(enable = "avx512ifma,avx512vl")]
674    unsafe fn test_mm_maskz_madd52hi_epu64() {
675        let a = _mm_set1_epi64x(10 << 40);
676        let b = _mm_set1_epi64x((11 << 40) + 4);
677        let c = _mm_set1_epi64x((12 << 40) + 3);
678
679        let actual = _mm_maskz_madd52hi_epu64(K, a, b, c);
680
681        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52)
682        let mut expected = _mm_set1_epi64x(11030549757952);
683        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
684
685        assert_eq_m128i(expected, actual);
686    }
687
688    #[simd_test(enable = "avxifma")]
689    unsafe fn test_mm_madd52lo_avx_epu64() {
690        let a = _mm_set1_epi64x(10 << 40);
691        let b = _mm_set1_epi64x((11 << 40) + 4);
692        let c = _mm_set1_epi64x((12 << 40) + 3);
693
694        let actual = _mm_madd52lo_avx_epu64(a, b, c);
695
696        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
697        let expected = _mm_set1_epi64x(100055558127628);
698
699        assert_eq_m128i(expected, actual);
700    }
701
702    #[simd_test(enable = "avx512ifma,avx512vl")]
703    unsafe fn test_mm_madd52lo_epu64() {
704        let a = _mm_set1_epi64x(10 << 40);
705        let b = _mm_set1_epi64x((11 << 40) + 4);
706        let c = _mm_set1_epi64x((12 << 40) + 3);
707
708        let actual = _mm_madd52lo_epu64(a, b, c);
709
710        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
711        let expected = _mm_set1_epi64x(100055558127628);
712
713        assert_eq_m128i(expected, actual);
714    }
715
716    #[simd_test(enable = "avx512ifma,avx512vl")]
717    unsafe fn test_mm_mask_madd52lo_epu64() {
718        let a = _mm_set1_epi64x(10 << 40);
719        let b = _mm_set1_epi64x((11 << 40) + 4);
720        let c = _mm_set1_epi64x((12 << 40) + 3);
721
722        let actual = _mm_mask_madd52lo_epu64(a, K, b, c);
723
724        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
725        let mut expected = _mm_set1_epi64x(100055558127628);
726        expected = _mm_mask_blend_epi64(K, a, expected);
727
728        assert_eq_m128i(expected, actual);
729    }
730
731    #[simd_test(enable = "avx512ifma,avx512vl")]
732    unsafe fn test_mm_maskz_madd52lo_epu64() {
733        let a = _mm_set1_epi64x(10 << 40);
734        let b = _mm_set1_epi64x((11 << 40) + 4);
735        let c = _mm_set1_epi64x((12 << 40) + 3);
736
737        let actual = _mm_maskz_madd52lo_epu64(K, a, b, c);
738
739        // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52))
740        let mut expected = _mm_set1_epi64x(100055558127628);
741        expected = _mm_mask_blend_epi64(K, _mm_setzero_si128(), expected);
742
743        assert_eq_m128i(expected, actual);
744    }
745}