core/stdarch/crates/core_arch/src/x86/
avx512bw.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    ptr,
5};
6
7use core::hint::unreachable_unchecked;
8
9#[cfg(test)]
10use stdarch_test::assert_instr;
11
12/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
13///
14/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi16&expand=30)
15#[inline]
16#[target_feature(enable = "avx512bw")]
17#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18#[cfg_attr(test, assert_instr(vpabsw))]
19pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i {
20    let a = a.as_i16x32();
21    let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
22    transmute(simd_select(cmp, a, simd_neg(a)))
23}
24
25/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26///
27/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi16&expand=31)
28#[inline]
29#[target_feature(enable = "avx512bw")]
30#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31#[cfg_attr(test, assert_instr(vpabsw))]
32pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
33    let abs = _mm512_abs_epi16(a).as_i16x32();
34    transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
35}
36
37/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
38///
39/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi16&expand=32)
40#[inline]
41#[target_feature(enable = "avx512bw")]
42#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
43#[cfg_attr(test, assert_instr(vpabsw))]
44pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
45    let abs = _mm512_abs_epi16(a).as_i16x32();
46    transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
47}
48
49/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
50///
51/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi16&expand=28)
52#[inline]
53#[target_feature(enable = "avx512bw,avx512vl")]
54#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
55#[cfg_attr(test, assert_instr(vpabsw))]
56pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
57    let abs = _mm256_abs_epi16(a).as_i16x16();
58    transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
59}
60
61/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
62///
63/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi16&expand=29)
64#[inline]
65#[target_feature(enable = "avx512bw,avx512vl")]
66#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
67#[cfg_attr(test, assert_instr(vpabsw))]
68pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
69    let abs = _mm256_abs_epi16(a).as_i16x16();
70    transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
71}
72
73/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
74///
75/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi16&expand=25)
76#[inline]
77#[target_feature(enable = "avx512bw,avx512vl")]
78#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
79#[cfg_attr(test, assert_instr(vpabsw))]
80pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
81    let abs = _mm_abs_epi16(a).as_i16x8();
82    transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
83}
84
85/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
86///
87/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi16&expand=26)
88#[inline]
89#[target_feature(enable = "avx512bw,avx512vl")]
90#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
91#[cfg_attr(test, assert_instr(vpabsw))]
92pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
93    let abs = _mm_abs_epi16(a).as_i16x8();
94    transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
95}
96
97/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
98///
99/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi8&expand=57)
100#[inline]
101#[target_feature(enable = "avx512bw")]
102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
103#[cfg_attr(test, assert_instr(vpabsb))]
104pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i {
105    let a = a.as_i8x64();
106    let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
107    transmute(simd_select(cmp, a, simd_neg(a)))
108}
109
110/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
111///
112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi8&expand=58)
113#[inline]
114#[target_feature(enable = "avx512bw")]
115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
116#[cfg_attr(test, assert_instr(vpabsb))]
117pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
118    let abs = _mm512_abs_epi8(a).as_i8x64();
119    transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
120}
121
122/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
123///
124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi8&expand=59)
125#[inline]
126#[target_feature(enable = "avx512bw")]
127#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
128#[cfg_attr(test, assert_instr(vpabsb))]
129pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
130    let abs = _mm512_abs_epi8(a).as_i8x64();
131    transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
132}
133
134/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
135///
136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi8&expand=55)
137#[inline]
138#[target_feature(enable = "avx512bw,avx512vl")]
139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
140#[cfg_attr(test, assert_instr(vpabsb))]
141pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
142    let abs = _mm256_abs_epi8(a).as_i8x32();
143    transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
144}
145
146/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
147///
148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi8&expand=56)
149#[inline]
150#[target_feature(enable = "avx512bw,avx512vl")]
151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
152#[cfg_attr(test, assert_instr(vpabsb))]
153pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
154    let abs = _mm256_abs_epi8(a).as_i8x32();
155    transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
156}
157
158/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
159///
160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi8&expand=52)
161#[inline]
162#[target_feature(enable = "avx512bw,avx512vl")]
163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
164#[cfg_attr(test, assert_instr(vpabsb))]
165pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
166    let abs = _mm_abs_epi8(a).as_i8x16();
167    transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
168}
169
170/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
171///
172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi8&expand=53)
173#[inline]
174#[target_feature(enable = "avx512bw,avx512vl")]
175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
176#[cfg_attr(test, assert_instr(vpabsb))]
177pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
178    let abs = _mm_abs_epi8(a).as_i8x16();
179    transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
180}
181
182/// Add packed 16-bit integers in a and b, and store the results in dst.
183///
184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi16&expand=91)
185#[inline]
186#[target_feature(enable = "avx512bw")]
187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
188#[cfg_attr(test, assert_instr(vpaddw))]
189pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
190    transmute(simd_add(a.as_i16x32(), b.as_i16x32()))
191}
192
193/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
194///
195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi16&expand=92)
196#[inline]
197#[target_feature(enable = "avx512bw")]
198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
199#[cfg_attr(test, assert_instr(vpaddw))]
200pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
201    let add = _mm512_add_epi16(a, b).as_i16x32();
202    transmute(simd_select_bitmask(k, add, src.as_i16x32()))
203}
204
205/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
206///
207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi16&expand=93)
208#[inline]
209#[target_feature(enable = "avx512bw")]
210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
211#[cfg_attr(test, assert_instr(vpaddw))]
212pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
213    let add = _mm512_add_epi16(a, b).as_i16x32();
214    transmute(simd_select_bitmask(k, add, i16x32::ZERO))
215}
216
217/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi16&expand=89)
220#[inline]
221#[target_feature(enable = "avx512bw,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vpaddw))]
224pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
225    let add = _mm256_add_epi16(a, b).as_i16x16();
226    transmute(simd_select_bitmask(k, add, src.as_i16x16()))
227}
228
229/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
230///
231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi16&expand=90)
232#[inline]
233#[target_feature(enable = "avx512bw,avx512vl")]
234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
235#[cfg_attr(test, assert_instr(vpaddw))]
236pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
237    let add = _mm256_add_epi16(a, b).as_i16x16();
238    transmute(simd_select_bitmask(k, add, i16x16::ZERO))
239}
240
241/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
242///
243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi16&expand=86)
244#[inline]
245#[target_feature(enable = "avx512bw,avx512vl")]
246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
247#[cfg_attr(test, assert_instr(vpaddw))]
248pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
249    let add = _mm_add_epi16(a, b).as_i16x8();
250    transmute(simd_select_bitmask(k, add, src.as_i16x8()))
251}
252
253/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
254///
255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi16&expand=87)
256#[inline]
257#[target_feature(enable = "avx512bw,avx512vl")]
258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
259#[cfg_attr(test, assert_instr(vpaddw))]
260pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
261    let add = _mm_add_epi16(a, b).as_i16x8();
262    transmute(simd_select_bitmask(k, add, i16x8::ZERO))
263}
264
265/// Add packed 8-bit integers in a and b, and store the results in dst.
266///
267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi8&expand=118)
268#[inline]
269#[target_feature(enable = "avx512bw")]
270#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
271#[cfg_attr(test, assert_instr(vpaddb))]
272pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
273    transmute(simd_add(a.as_i8x64(), b.as_i8x64()))
274}
275
276/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
277///
278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi8&expand=119)
279#[inline]
280#[target_feature(enable = "avx512bw")]
281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
282#[cfg_attr(test, assert_instr(vpaddb))]
283pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
284    let add = _mm512_add_epi8(a, b).as_i8x64();
285    transmute(simd_select_bitmask(k, add, src.as_i8x64()))
286}
287
288/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
289///
290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi8&expand=120)
291#[inline]
292#[target_feature(enable = "avx512bw")]
293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
294#[cfg_attr(test, assert_instr(vpaddb))]
295pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
296    let add = _mm512_add_epi8(a, b).as_i8x64();
297    transmute(simd_select_bitmask(k, add, i8x64::ZERO))
298}
299
300/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
301///
302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi8&expand=116)
303#[inline]
304#[target_feature(enable = "avx512bw,avx512vl")]
305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
306#[cfg_attr(test, assert_instr(vpaddb))]
307pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
308    let add = _mm256_add_epi8(a, b).as_i8x32();
309    transmute(simd_select_bitmask(k, add, src.as_i8x32()))
310}
311
312/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
313///
314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi8&expand=117)
315#[inline]
316#[target_feature(enable = "avx512bw,avx512vl")]
317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
318#[cfg_attr(test, assert_instr(vpaddb))]
319pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
320    let add = _mm256_add_epi8(a, b).as_i8x32();
321    transmute(simd_select_bitmask(k, add, i8x32::ZERO))
322}
323
324/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi8&expand=113)
327#[inline]
328#[target_feature(enable = "avx512bw,avx512vl")]
329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
330#[cfg_attr(test, assert_instr(vpaddb))]
331pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
332    let add = _mm_add_epi8(a, b).as_i8x16();
333    transmute(simd_select_bitmask(k, add, src.as_i8x16()))
334}
335
336/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
337///
338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi8&expand=114)
339#[inline]
340#[target_feature(enable = "avx512bw,avx512vl")]
341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
342#[cfg_attr(test, assert_instr(vpaddb))]
343pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
344    let add = _mm_add_epi8(a, b).as_i8x16();
345    transmute(simd_select_bitmask(k, add, i8x16::ZERO))
346}
347
348/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
349///
350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu16&expand=197)
351#[inline]
352#[target_feature(enable = "avx512bw")]
353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
354#[cfg_attr(test, assert_instr(vpaddusw))]
355pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
356    transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32()))
357}
358
359/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu16&expand=198)
362#[inline]
363#[target_feature(enable = "avx512bw")]
364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365#[cfg_attr(test, assert_instr(vpaddusw))]
366pub unsafe fn _mm512_mask_adds_epu16(
367    src: __m512i,
368    k: __mmask32,
369    a: __m512i,
370    b: __m512i,
371) -> __m512i {
372    let add = _mm512_adds_epu16(a, b).as_u16x32();
373    transmute(simd_select_bitmask(k, add, src.as_u16x32()))
374}
375
376/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
377///
378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu16&expand=199)
379#[inline]
380#[target_feature(enable = "avx512bw")]
381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
382#[cfg_attr(test, assert_instr(vpaddusw))]
383pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
384    let add = _mm512_adds_epu16(a, b).as_u16x32();
385    transmute(simd_select_bitmask(k, add, u16x32::ZERO))
386}
387
388/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
389///
390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu16&expand=195)
391#[inline]
392#[target_feature(enable = "avx512bw,avx512vl")]
393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
394#[cfg_attr(test, assert_instr(vpaddusw))]
395pub unsafe fn _mm256_mask_adds_epu16(
396    src: __m256i,
397    k: __mmask16,
398    a: __m256i,
399    b: __m256i,
400) -> __m256i {
401    let add = _mm256_adds_epu16(a, b).as_u16x16();
402    transmute(simd_select_bitmask(k, add, src.as_u16x16()))
403}
404
405/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
406///
407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu16&expand=196)
408#[inline]
409#[target_feature(enable = "avx512bw,avx512vl")]
410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411#[cfg_attr(test, assert_instr(vpaddusw))]
412pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
413    let add = _mm256_adds_epu16(a, b).as_u16x16();
414    transmute(simd_select_bitmask(k, add, u16x16::ZERO))
415}
416
417/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
418///
419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu16&expand=192)
420#[inline]
421#[target_feature(enable = "avx512bw,avx512vl")]
422#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
423#[cfg_attr(test, assert_instr(vpaddusw))]
424pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
425    let add = _mm_adds_epu16(a, b).as_u16x8();
426    transmute(simd_select_bitmask(k, add, src.as_u16x8()))
427}
428
429/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu16&expand=193)
432#[inline]
433#[target_feature(enable = "avx512bw,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vpaddusw))]
436pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
437    let add = _mm_adds_epu16(a, b).as_u16x8();
438    transmute(simd_select_bitmask(k, add, u16x8::ZERO))
439}
440
441/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
442///
443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epu8&expand=206)
444#[inline]
445#[target_feature(enable = "avx512bw")]
446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
447#[cfg_attr(test, assert_instr(vpaddusb))]
448pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
449    transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64()))
450}
451
452/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
453///
454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epu8&expand=207)
455#[inline]
456#[target_feature(enable = "avx512bw")]
457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
458#[cfg_attr(test, assert_instr(vpaddusb))]
459pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
460    let add = _mm512_adds_epu8(a, b).as_u8x64();
461    transmute(simd_select_bitmask(k, add, src.as_u8x64()))
462}
463
464/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
465///
466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epu8&expand=208)
467#[inline]
468#[target_feature(enable = "avx512bw")]
469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
470#[cfg_attr(test, assert_instr(vpaddusb))]
471pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
472    let add = _mm512_adds_epu8(a, b).as_u8x64();
473    transmute(simd_select_bitmask(k, add, u8x64::ZERO))
474}
475
476/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
477///
478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epu8&expand=204)
479#[inline]
480#[target_feature(enable = "avx512bw,avx512vl")]
481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
482#[cfg_attr(test, assert_instr(vpaddusb))]
483pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
484    let add = _mm256_adds_epu8(a, b).as_u8x32();
485    transmute(simd_select_bitmask(k, add, src.as_u8x32()))
486}
487
488/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
489///
490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epu8&expand=205)
491#[inline]
492#[target_feature(enable = "avx512bw,avx512vl")]
493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
494#[cfg_attr(test, assert_instr(vpaddusb))]
495pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
496    let add = _mm256_adds_epu8(a, b).as_u8x32();
497    transmute(simd_select_bitmask(k, add, u8x32::ZERO))
498}
499
500/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
501///
502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epu8&expand=201)
503#[inline]
504#[target_feature(enable = "avx512bw,avx512vl")]
505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
506#[cfg_attr(test, assert_instr(vpaddusb))]
507pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
508    let add = _mm_adds_epu8(a, b).as_u8x16();
509    transmute(simd_select_bitmask(k, add, src.as_u8x16()))
510}
511
512/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
513///
514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epu8&expand=202)
515#[inline]
516#[target_feature(enable = "avx512bw,avx512vl")]
517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
518#[cfg_attr(test, assert_instr(vpaddusb))]
519pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
520    let add = _mm_adds_epu8(a, b).as_u8x16();
521    transmute(simd_select_bitmask(k, add, u8x16::ZERO))
522}
523
524/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
525///
526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi16&expand=179)
527#[inline]
528#[target_feature(enable = "avx512bw")]
529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
530#[cfg_attr(test, assert_instr(vpaddsw))]
531pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
532    transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32()))
533}
534
535/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
536///
537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi16&expand=180)
538#[inline]
539#[target_feature(enable = "avx512bw")]
540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
541#[cfg_attr(test, assert_instr(vpaddsw))]
542pub unsafe fn _mm512_mask_adds_epi16(
543    src: __m512i,
544    k: __mmask32,
545    a: __m512i,
546    b: __m512i,
547) -> __m512i {
548    let add = _mm512_adds_epi16(a, b).as_i16x32();
549    transmute(simd_select_bitmask(k, add, src.as_i16x32()))
550}
551
552/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
553///
554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi16&expand=181)
555#[inline]
556#[target_feature(enable = "avx512bw")]
557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
558#[cfg_attr(test, assert_instr(vpaddsw))]
559pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
560    let add = _mm512_adds_epi16(a, b).as_i16x32();
561    transmute(simd_select_bitmask(k, add, i16x32::ZERO))
562}
563
564/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
565///
566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi16&expand=177)
567#[inline]
568#[target_feature(enable = "avx512bw,avx512vl")]
569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
570#[cfg_attr(test, assert_instr(vpaddsw))]
571pub unsafe fn _mm256_mask_adds_epi16(
572    src: __m256i,
573    k: __mmask16,
574    a: __m256i,
575    b: __m256i,
576) -> __m256i {
577    let add = _mm256_adds_epi16(a, b).as_i16x16();
578    transmute(simd_select_bitmask(k, add, src.as_i16x16()))
579}
580
581/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
582///
583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi16&expand=178)
584#[inline]
585#[target_feature(enable = "avx512bw,avx512vl")]
586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
587#[cfg_attr(test, assert_instr(vpaddsw))]
588pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
589    let add = _mm256_adds_epi16(a, b).as_i16x16();
590    transmute(simd_select_bitmask(k, add, i16x16::ZERO))
591}
592
593/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
594///
595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi16&expand=174)
596#[inline]
597#[target_feature(enable = "avx512bw,avx512vl")]
598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
599#[cfg_attr(test, assert_instr(vpaddsw))]
600pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
601    let add = _mm_adds_epi16(a, b).as_i16x8();
602    transmute(simd_select_bitmask(k, add, src.as_i16x8()))
603}
604
605/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
606///
607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi16&expand=175)
608#[inline]
609#[target_feature(enable = "avx512bw,avx512vl")]
610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
611#[cfg_attr(test, assert_instr(vpaddsw))]
612pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
613    let add = _mm_adds_epi16(a, b).as_i16x8();
614    transmute(simd_select_bitmask(k, add, i16x8::ZERO))
615}
616
617/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
618///
619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_adds_epi8&expand=188)
620#[inline]
621#[target_feature(enable = "avx512bw")]
622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
623#[cfg_attr(test, assert_instr(vpaddsb))]
624pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
625    transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64()))
626}
627
628/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
629///
630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_adds_epi8&expand=189)
631#[inline]
632#[target_feature(enable = "avx512bw")]
633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
634#[cfg_attr(test, assert_instr(vpaddsb))]
635pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
636    let add = _mm512_adds_epi8(a, b).as_i8x64();
637    transmute(simd_select_bitmask(k, add, src.as_i8x64()))
638}
639
640/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
641///
642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_adds_epi8&expand=190)
643#[inline]
644#[target_feature(enable = "avx512bw")]
645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
646#[cfg_attr(test, assert_instr(vpaddsb))]
647pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
648    let add = _mm512_adds_epi8(a, b).as_i8x64();
649    transmute(simd_select_bitmask(k, add, i8x64::ZERO))
650}
651
652/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
653///
654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_adds_epi8&expand=186)
655#[inline]
656#[target_feature(enable = "avx512bw,avx512vl")]
657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
658#[cfg_attr(test, assert_instr(vpaddsb))]
659pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
660    let add = _mm256_adds_epi8(a, b).as_i8x32();
661    transmute(simd_select_bitmask(k, add, src.as_i8x32()))
662}
663
664/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_adds_epi8&expand=187)
667#[inline]
668#[target_feature(enable = "avx512bw,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpaddsb))]
671pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
672    let add = _mm256_adds_epi8(a, b).as_i8x32();
673    transmute(simd_select_bitmask(k, add, i8x32::ZERO))
674}
675
676/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
677///
678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_adds_epi8&expand=183)
679#[inline]
680#[target_feature(enable = "avx512bw,avx512vl")]
681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
682#[cfg_attr(test, assert_instr(vpaddsb))]
683pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
684    let add = _mm_adds_epi8(a, b).as_i8x16();
685    transmute(simd_select_bitmask(k, add, src.as_i8x16()))
686}
687
688/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
689///
690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_adds_epi8&expand=184)
691#[inline]
692#[target_feature(enable = "avx512bw,avx512vl")]
693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
694#[cfg_attr(test, assert_instr(vpaddsb))]
695pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
696    let add = _mm_adds_epi8(a, b).as_i8x16();
697    transmute(simd_select_bitmask(k, add, i8x16::ZERO))
698}
699
700/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
701///
702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi16&expand=5685)
703#[inline]
704#[target_feature(enable = "avx512bw")]
705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
706#[cfg_attr(test, assert_instr(vpsubw))]
707pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
708    transmute(simd_sub(a.as_i16x32(), b.as_i16x32()))
709}
710
711/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi16&expand=5683)
714#[inline]
715#[target_feature(enable = "avx512bw")]
716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
717#[cfg_attr(test, assert_instr(vpsubw))]
718pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
719    let sub = _mm512_sub_epi16(a, b).as_i16x32();
720    transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
721}
722
723/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
724///
725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi16&expand=5684)
726#[inline]
727#[target_feature(enable = "avx512bw")]
728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
729#[cfg_attr(test, assert_instr(vpsubw))]
730pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
731    let sub = _mm512_sub_epi16(a, b).as_i16x32();
732    transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
733}
734
735/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
736///
737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi16&expand=5680)
738#[inline]
739#[target_feature(enable = "avx512bw,avx512vl")]
740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
741#[cfg_attr(test, assert_instr(vpsubw))]
742pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
743    let sub = _mm256_sub_epi16(a, b).as_i16x16();
744    transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
745}
746
747/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
748///
749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi16&expand=5681)
750#[inline]
751#[target_feature(enable = "avx512bw,avx512vl")]
752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
753#[cfg_attr(test, assert_instr(vpsubw))]
754pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
755    let sub = _mm256_sub_epi16(a, b).as_i16x16();
756    transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
757}
758
759/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi16&expand=5677)
762#[inline]
763#[target_feature(enable = "avx512bw,avx512vl")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpsubw))]
766pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
767    let sub = _mm_sub_epi16(a, b).as_i16x8();
768    transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
769}
770
771/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi16&expand=5678)
774#[inline]
775#[target_feature(enable = "avx512bw,avx512vl")]
776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
777#[cfg_attr(test, assert_instr(vpsubw))]
778pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
779    let sub = _mm_sub_epi16(a, b).as_i16x8();
780    transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
781}
782
783/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
784///
785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi8&expand=5712)
786#[inline]
787#[target_feature(enable = "avx512bw")]
788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
789#[cfg_attr(test, assert_instr(vpsubb))]
790pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
791    transmute(simd_sub(a.as_i8x64(), b.as_i8x64()))
792}
793
794/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
795///
796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi8&expand=5710)
797#[inline]
798#[target_feature(enable = "avx512bw")]
799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
800#[cfg_attr(test, assert_instr(vpsubb))]
801pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
802    let sub = _mm512_sub_epi8(a, b).as_i8x64();
803    transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
804}
805
806/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
807///
808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi8&expand=5711)
809#[inline]
810#[target_feature(enable = "avx512bw")]
811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
812#[cfg_attr(test, assert_instr(vpsubb))]
813pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
814    let sub = _mm512_sub_epi8(a, b).as_i8x64();
815    transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
816}
817
818/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi8&expand=5707)
821#[inline]
822#[target_feature(enable = "avx512bw,avx512vl")]
823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
824#[cfg_attr(test, assert_instr(vpsubb))]
825pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
826    let sub = _mm256_sub_epi8(a, b).as_i8x32();
827    transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
828}
829
830/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
831///
832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi8&expand=5708)
833#[inline]
834#[target_feature(enable = "avx512bw,avx512vl")]
835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
836#[cfg_attr(test, assert_instr(vpsubb))]
837pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
838    let sub = _mm256_sub_epi8(a, b).as_i8x32();
839    transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
840}
841
842/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
843///
844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi8&expand=5704)
845#[inline]
846#[target_feature(enable = "avx512bw,avx512vl")]
847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
848#[cfg_attr(test, assert_instr(vpsubb))]
849pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
850    let sub = _mm_sub_epi8(a, b).as_i8x16();
851    transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
852}
853
854/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi8&expand=5705)
857#[inline]
858#[target_feature(enable = "avx512bw,avx512vl")]
859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860#[cfg_attr(test, assert_instr(vpsubb))]
861pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
862    let sub = _mm_sub_epi8(a, b).as_i8x16();
863    transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
864}
865
866/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
867///
868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu16&expand=5793)
869#[inline]
870#[target_feature(enable = "avx512bw")]
871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
872#[cfg_attr(test, assert_instr(vpsubusw))]
873pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
874    transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32()))
875}
876
877/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
878///
879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu16&expand=5791)
880#[inline]
881#[target_feature(enable = "avx512bw")]
882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
883#[cfg_attr(test, assert_instr(vpsubusw))]
884pub unsafe fn _mm512_mask_subs_epu16(
885    src: __m512i,
886    k: __mmask32,
887    a: __m512i,
888    b: __m512i,
889) -> __m512i {
890    let sub = _mm512_subs_epu16(a, b).as_u16x32();
891    transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
892}
893
894/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
895///
896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu16&expand=5792)
897#[inline]
898#[target_feature(enable = "avx512bw")]
899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
900#[cfg_attr(test, assert_instr(vpsubusw))]
901pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
902    let sub = _mm512_subs_epu16(a, b).as_u16x32();
903    transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
904}
905
906/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu16&expand=5788)
909#[inline]
910#[target_feature(enable = "avx512bw,avx512vl")]
911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
912#[cfg_attr(test, assert_instr(vpsubusw))]
913pub unsafe fn _mm256_mask_subs_epu16(
914    src: __m256i,
915    k: __mmask16,
916    a: __m256i,
917    b: __m256i,
918) -> __m256i {
919    let sub = _mm256_subs_epu16(a, b).as_u16x16();
920    transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
921}
922
923/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
924///
925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu16&expand=5789)
926#[inline]
927#[target_feature(enable = "avx512bw,avx512vl")]
928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
929#[cfg_attr(test, assert_instr(vpsubusw))]
930pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
931    let sub = _mm256_subs_epu16(a, b).as_u16x16();
932    transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
933}
934
935/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu16&expand=5785)
938#[inline]
939#[target_feature(enable = "avx512bw,avx512vl")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vpsubusw))]
942pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
943    let sub = _mm_subs_epu16(a, b).as_u16x8();
944    transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
945}
946
947/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
948///
949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu16&expand=5786)
950#[inline]
951#[target_feature(enable = "avx512bw,avx512vl")]
952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
953#[cfg_attr(test, assert_instr(vpsubusw))]
954pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
955    let sub = _mm_subs_epu16(a, b).as_u16x8();
956    transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
957}
958
959/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
960///
961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epu8&expand=5802)
962#[inline]
963#[target_feature(enable = "avx512bw")]
964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
965#[cfg_attr(test, assert_instr(vpsubusb))]
966pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
967    transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64()))
968}
969
970/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
971///
972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epu8&expand=5800)
973#[inline]
974#[target_feature(enable = "avx512bw")]
975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
976#[cfg_attr(test, assert_instr(vpsubusb))]
977pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
978    let sub = _mm512_subs_epu8(a, b).as_u8x64();
979    transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
980}
981
982/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
983///
984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epu8&expand=5801)
985#[inline]
986#[target_feature(enable = "avx512bw")]
987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
988#[cfg_attr(test, assert_instr(vpsubusb))]
989pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
990    let sub = _mm512_subs_epu8(a, b).as_u8x64();
991    transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
992}
993
994/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
995///
996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epu8&expand=5797)
997#[inline]
998#[target_feature(enable = "avx512bw,avx512vl")]
999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1000#[cfg_attr(test, assert_instr(vpsubusb))]
1001pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1002    let sub = _mm256_subs_epu8(a, b).as_u8x32();
1003    transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
1004}
1005
1006/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1007///
1008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epu8&expand=5798)
1009#[inline]
1010#[target_feature(enable = "avx512bw,avx512vl")]
1011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1012#[cfg_attr(test, assert_instr(vpsubusb))]
1013pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1014    let sub = _mm256_subs_epu8(a, b).as_u8x32();
1015    transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
1016}
1017
1018/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1019///
1020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epu8&expand=5794)
1021#[inline]
1022#[target_feature(enable = "avx512bw,avx512vl")]
1023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1024#[cfg_attr(test, assert_instr(vpsubusb))]
1025pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1026    let sub = _mm_subs_epu8(a, b).as_u8x16();
1027    transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
1028}
1029
1030/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epu8&expand=5795)
1033#[inline]
1034#[target_feature(enable = "avx512bw,avx512vl")]
1035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036#[cfg_attr(test, assert_instr(vpsubusb))]
1037pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1038    let sub = _mm_subs_epu8(a, b).as_u8x16();
1039    transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
1040}
1041
1042/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1043///
1044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi16&expand=5775)
1045#[inline]
1046#[target_feature(enable = "avx512bw")]
1047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1048#[cfg_attr(test, assert_instr(vpsubsw))]
1049pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1050    transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32()))
1051}
1052
1053/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1054///
1055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi16&expand=5773)
1056#[inline]
1057#[target_feature(enable = "avx512bw")]
1058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1059#[cfg_attr(test, assert_instr(vpsubsw))]
1060pub unsafe fn _mm512_mask_subs_epi16(
1061    src: __m512i,
1062    k: __mmask32,
1063    a: __m512i,
1064    b: __m512i,
1065) -> __m512i {
1066    let sub = _mm512_subs_epi16(a, b).as_i16x32();
1067    transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
1068}
1069
1070/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1071///
1072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi16&expand=5774)
1073#[inline]
1074#[target_feature(enable = "avx512bw")]
1075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1076#[cfg_attr(test, assert_instr(vpsubsw))]
1077pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1078    let sub = _mm512_subs_epi16(a, b).as_i16x32();
1079    transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
1080}
1081
1082/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1083///
1084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi16&expand=5770)
1085#[inline]
1086#[target_feature(enable = "avx512bw,avx512vl")]
1087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1088#[cfg_attr(test, assert_instr(vpsubsw))]
1089pub unsafe fn _mm256_mask_subs_epi16(
1090    src: __m256i,
1091    k: __mmask16,
1092    a: __m256i,
1093    b: __m256i,
1094) -> __m256i {
1095    let sub = _mm256_subs_epi16(a, b).as_i16x16();
1096    transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
1097}
1098
1099/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1100///
1101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi16&expand=5771)
1102#[inline]
1103#[target_feature(enable = "avx512bw,avx512vl")]
1104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1105#[cfg_attr(test, assert_instr(vpsubsw))]
1106pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1107    let sub = _mm256_subs_epi16(a, b).as_i16x16();
1108    transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
1109}
1110
1111/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi16&expand=5767)
1114#[inline]
1115#[target_feature(enable = "avx512bw,avx512vl")]
1116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117#[cfg_attr(test, assert_instr(vpsubsw))]
1118pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1119    let sub = _mm_subs_epi16(a, b).as_i16x8();
1120    transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
1121}
1122
1123/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1124///
1125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi16&expand=5768)
1126#[inline]
1127#[target_feature(enable = "avx512bw,avx512vl")]
1128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1129#[cfg_attr(test, assert_instr(vpsubsw))]
1130pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1131    let sub = _mm_subs_epi16(a, b).as_i16x8();
1132    transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
1133}
1134
1135/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1136///
1137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_subs_epi8&expand=5784)
1138#[inline]
1139#[target_feature(enable = "avx512bw")]
1140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1141#[cfg_attr(test, assert_instr(vpsubsb))]
1142pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1143    transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64()))
1144}
1145
1146/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1147///
1148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_subs_epi8&expand=5782)
1149#[inline]
1150#[target_feature(enable = "avx512bw")]
1151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1152#[cfg_attr(test, assert_instr(vpsubsb))]
1153pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1154    let sub = _mm512_subs_epi8(a, b).as_i8x64();
1155    transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
1156}
1157
1158/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1159///
1160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_subs_epi8&expand=5783)
1161#[inline]
1162#[target_feature(enable = "avx512bw")]
1163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1164#[cfg_attr(test, assert_instr(vpsubsb))]
1165pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1166    let sub = _mm512_subs_epi8(a, b).as_i8x64();
1167    transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
1168}
1169
1170/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1171///
1172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_subs_epi8&expand=5779)
1173#[inline]
1174#[target_feature(enable = "avx512bw,avx512vl")]
1175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1176#[cfg_attr(test, assert_instr(vpsubsb))]
1177pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1178    let sub = _mm256_subs_epi8(a, b).as_i8x32();
1179    transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
1180}
1181
1182/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1183///
1184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_subs_epi8&expand=5780)
1185#[inline]
1186#[target_feature(enable = "avx512bw,avx512vl")]
1187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1188#[cfg_attr(test, assert_instr(vpsubsb))]
1189pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1190    let sub = _mm256_subs_epi8(a, b).as_i8x32();
1191    transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
1192}
1193
1194/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1195///
1196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_subs_epi8&expand=5776)
1197#[inline]
1198#[target_feature(enable = "avx512bw,avx512vl")]
1199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1200#[cfg_attr(test, assert_instr(vpsubsb))]
1201pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1202    let sub = _mm_subs_epi8(a, b).as_i8x16();
1203    transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
1204}
1205
1206/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_subs_epi8&expand=5777)
1209#[inline]
1210#[target_feature(enable = "avx512bw,avx512vl")]
1211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212#[cfg_attr(test, assert_instr(vpsubsb))]
1213pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1214    let sub = _mm_subs_epi8(a, b).as_i8x16();
1215    transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
1216}
1217
1218/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1219///
1220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epu16&expand=3973)
1221#[inline]
1222#[target_feature(enable = "avx512bw")]
1223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1224#[cfg_attr(test, assert_instr(vpmulhuw))]
1225pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1226    let a = simd_cast::<_, u32x32>(a.as_u16x32());
1227    let b = simd_cast::<_, u32x32>(b.as_u16x32());
1228    let r = simd_shr(simd_mul(a, b), u32x32::splat(16));
1229    transmute(simd_cast::<u32x32, u16x32>(r))
1230}
1231
1232/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1233///
1234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epu16&expand=3971)
1235#[inline]
1236#[target_feature(enable = "avx512bw")]
1237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1238#[cfg_attr(test, assert_instr(vpmulhuw))]
1239pub unsafe fn _mm512_mask_mulhi_epu16(
1240    src: __m512i,
1241    k: __mmask32,
1242    a: __m512i,
1243    b: __m512i,
1244) -> __m512i {
1245    let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1246    transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1247}
1248
1249/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1250///
1251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epu16&expand=3972)
1252#[inline]
1253#[target_feature(enable = "avx512bw")]
1254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1255#[cfg_attr(test, assert_instr(vpmulhuw))]
1256pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1257    let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1258    transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
1259}
1260
1261/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1262///
1263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epu16&expand=3968)
1264#[inline]
1265#[target_feature(enable = "avx512bw,avx512vl")]
1266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1267#[cfg_attr(test, assert_instr(vpmulhuw))]
1268pub unsafe fn _mm256_mask_mulhi_epu16(
1269    src: __m256i,
1270    k: __mmask16,
1271    a: __m256i,
1272    b: __m256i,
1273) -> __m256i {
1274    let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1275    transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1276}
1277
1278/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epu16&expand=3969)
1281#[inline]
1282#[target_feature(enable = "avx512bw,avx512vl")]
1283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1284#[cfg_attr(test, assert_instr(vpmulhuw))]
1285pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1286    let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1287    transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
1288}
1289
1290/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epu16&expand=3965)
1293#[inline]
1294#[target_feature(enable = "avx512bw,avx512vl")]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296#[cfg_attr(test, assert_instr(vpmulhuw))]
1297pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1298    let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1299    transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1300}
1301
1302/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1303///
1304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epu16&expand=3966)
1305#[inline]
1306#[target_feature(enable = "avx512bw,avx512vl")]
1307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1308#[cfg_attr(test, assert_instr(vpmulhuw))]
1309pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1310    let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1311    transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
1312}
1313
1314/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1315///
1316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhi_epi16&expand=3962)
1317#[inline]
1318#[target_feature(enable = "avx512bw")]
1319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1320#[cfg_attr(test, assert_instr(vpmulhw))]
1321pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1322    let a = simd_cast::<_, i32x32>(a.as_i16x32());
1323    let b = simd_cast::<_, i32x32>(b.as_i16x32());
1324    let r = simd_shr(simd_mul(a, b), i32x32::splat(16));
1325    transmute(simd_cast::<i32x32, i16x32>(r))
1326}
1327
1328/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1329///
1330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhi_epi16&expand=3960)
1331#[inline]
1332#[target_feature(enable = "avx512bw")]
1333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1334#[cfg_attr(test, assert_instr(vpmulhw))]
1335pub unsafe fn _mm512_mask_mulhi_epi16(
1336    src: __m512i,
1337    k: __mmask32,
1338    a: __m512i,
1339    b: __m512i,
1340) -> __m512i {
1341    let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1342    transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1343}
1344
1345/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1346///
1347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhi_epi16&expand=3961)
1348#[inline]
1349#[target_feature(enable = "avx512bw")]
1350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1351#[cfg_attr(test, assert_instr(vpmulhw))]
1352pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1353    let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1354    transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1355}
1356
1357/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhi_epi16&expand=3957)
1360#[inline]
1361#[target_feature(enable = "avx512bw,avx512vl")]
1362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363#[cfg_attr(test, assert_instr(vpmulhw))]
1364pub unsafe fn _mm256_mask_mulhi_epi16(
1365    src: __m256i,
1366    k: __mmask16,
1367    a: __m256i,
1368    b: __m256i,
1369) -> __m256i {
1370    let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1371    transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1372}
1373
1374/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1375///
1376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhi_epi16&expand=3958)
1377#[inline]
1378#[target_feature(enable = "avx512bw,avx512vl")]
1379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1380#[cfg_attr(test, assert_instr(vpmulhw))]
1381pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1382    let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1383    transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1384}
1385
1386/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1387///
1388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhi_epi16&expand=3954)
1389#[inline]
1390#[target_feature(enable = "avx512bw,avx512vl")]
1391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1392#[cfg_attr(test, assert_instr(vpmulhw))]
1393pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1394    let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1395    transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1396}
1397
1398/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1399///
1400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhi_epi16&expand=3955)
1401#[inline]
1402#[target_feature(enable = "avx512bw,avx512vl")]
1403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1404#[cfg_attr(test, assert_instr(vpmulhw))]
1405pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1406    let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1407    transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1408}
1409
1410/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mulhrs_epi16&expand=3986)
1413#[inline]
1414#[target_feature(enable = "avx512bw")]
1415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1416#[cfg_attr(test, assert_instr(vpmulhrsw))]
1417pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1418    transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32()))
1419}
1420
1421/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1422///
1423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mulhrs_epi16&expand=3984)
1424#[inline]
1425#[target_feature(enable = "avx512bw")]
1426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1427#[cfg_attr(test, assert_instr(vpmulhrsw))]
1428pub unsafe fn _mm512_mask_mulhrs_epi16(
1429    src: __m512i,
1430    k: __mmask32,
1431    a: __m512i,
1432    b: __m512i,
1433) -> __m512i {
1434    let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1435    transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1436}
1437
1438/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1439///
1440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1441#[inline]
1442#[target_feature(enable = "avx512bw")]
1443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1444#[cfg_attr(test, assert_instr(vpmulhrsw))]
1445pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1446    let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1447    transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1448}
1449
1450/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1451///
1452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mulhrs_epi16&expand=3981)
1453#[inline]
1454#[target_feature(enable = "avx512bw,avx512vl")]
1455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1456#[cfg_attr(test, assert_instr(vpmulhrsw))]
1457pub unsafe fn _mm256_mask_mulhrs_epi16(
1458    src: __m256i,
1459    k: __mmask16,
1460    a: __m256i,
1461    b: __m256i,
1462) -> __m256i {
1463    let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1464    transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1465}
1466
1467/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1468///
1469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1470#[inline]
1471#[target_feature(enable = "avx512bw,avx512vl")]
1472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1473#[cfg_attr(test, assert_instr(vpmulhrsw))]
1474pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1475    let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1476    transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1477}
1478
1479/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1480///
1481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mulhrs_epi16&expand=3978)
1482#[inline]
1483#[target_feature(enable = "avx512bw,avx512vl")]
1484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1485#[cfg_attr(test, assert_instr(vpmulhrsw))]
1486pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1487    let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1488    transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1489}
1490
1491/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1492///
1493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mulhrs_epi16&expand=3979)
1494#[inline]
1495#[target_feature(enable = "avx512bw,avx512vl")]
1496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1497#[cfg_attr(test, assert_instr(vpmulhrsw))]
1498pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1499    let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1500    transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1501}
1502
1503/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1504///
1505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi16&expand=3996)
1506#[inline]
1507#[target_feature(enable = "avx512bw")]
1508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1509#[cfg_attr(test, assert_instr(vpmullw))]
1510pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1511    transmute(simd_mul(a.as_i16x32(), b.as_i16x32()))
1512}
1513
1514/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1515///
1516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi16&expand=3994)
1517#[inline]
1518#[target_feature(enable = "avx512bw")]
1519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1520#[cfg_attr(test, assert_instr(vpmullw))]
1521pub unsafe fn _mm512_mask_mullo_epi16(
1522    src: __m512i,
1523    k: __mmask32,
1524    a: __m512i,
1525    b: __m512i,
1526) -> __m512i {
1527    let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1528    transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1529}
1530
1531/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi16&expand=3995)
1534#[inline]
1535#[target_feature(enable = "avx512bw")]
1536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1537#[cfg_attr(test, assert_instr(vpmullw))]
1538pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1539    let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1540    transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
1541}
1542
1543/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1544///
1545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi16&expand=3991)
1546#[inline]
1547#[target_feature(enable = "avx512bw,avx512vl")]
1548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1549#[cfg_attr(test, assert_instr(vpmullw))]
1550pub unsafe fn _mm256_mask_mullo_epi16(
1551    src: __m256i,
1552    k: __mmask16,
1553    a: __m256i,
1554    b: __m256i,
1555) -> __m256i {
1556    let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1557    transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1558}
1559
1560/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1561///
1562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi16&expand=3992)
1563#[inline]
1564#[target_feature(enable = "avx512bw,avx512vl")]
1565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1566#[cfg_attr(test, assert_instr(vpmullw))]
1567pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1568    let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1569    transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
1570}
1571
1572/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1573///
1574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi16&expand=3988)
1575#[inline]
1576#[target_feature(enable = "avx512bw,avx512vl")]
1577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1578#[cfg_attr(test, assert_instr(vpmullw))]
1579pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1580    let mul = _mm_mullo_epi16(a, b).as_i16x8();
1581    transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1582}
1583
1584/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1585///
1586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi16&expand=3989)
1587#[inline]
1588#[target_feature(enable = "avx512bw,avx512vl")]
1589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1590#[cfg_attr(test, assert_instr(vpmullw))]
1591pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1592    let mul = _mm_mullo_epi16(a, b).as_i16x8();
1593    transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
1594}
1595
1596/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1597///
1598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu16&expand=3609)
1599#[inline]
1600#[target_feature(enable = "avx512bw")]
1601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1602#[cfg_attr(test, assert_instr(vpmaxuw))]
1603pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1604    let a = a.as_u16x32();
1605    let b = b.as_u16x32();
1606    transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1607}
1608
1609/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1610///
1611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu16&expand=3607)
1612#[inline]
1613#[target_feature(enable = "avx512bw")]
1614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1615#[cfg_attr(test, assert_instr(vpmaxuw))]
1616pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1617    let max = _mm512_max_epu16(a, b).as_u16x32();
1618    transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1619}
1620
1621/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1622///
1623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu16&expand=3608)
1624#[inline]
1625#[target_feature(enable = "avx512bw")]
1626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1627#[cfg_attr(test, assert_instr(vpmaxuw))]
1628pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1629    let max = _mm512_max_epu16(a, b).as_u16x32();
1630    transmute(simd_select_bitmask(k, max, u16x32::ZERO))
1631}
1632
1633/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1634///
1635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu16&expand=3604)
1636#[inline]
1637#[target_feature(enable = "avx512bw,avx512vl")]
1638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1639#[cfg_attr(test, assert_instr(vpmaxuw))]
1640pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1641    let max = _mm256_max_epu16(a, b).as_u16x16();
1642    transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1643}
1644
1645/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1646///
1647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu16&expand=3605)
1648#[inline]
1649#[target_feature(enable = "avx512bw,avx512vl")]
1650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1651#[cfg_attr(test, assert_instr(vpmaxuw))]
1652pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1653    let max = _mm256_max_epu16(a, b).as_u16x16();
1654    transmute(simd_select_bitmask(k, max, u16x16::ZERO))
1655}
1656
1657/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1658///
1659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu16&expand=3601)
1660#[inline]
1661#[target_feature(enable = "avx512bw,avx512vl")]
1662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1663#[cfg_attr(test, assert_instr(vpmaxuw))]
1664pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1665    let max = _mm_max_epu16(a, b).as_u16x8();
1666    transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1667}
1668
1669/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1670///
1671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu16&expand=3602)
1672#[inline]
1673#[target_feature(enable = "avx512bw,avx512vl")]
1674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1675#[cfg_attr(test, assert_instr(vpmaxuw))]
1676pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1677    let max = _mm_max_epu16(a, b).as_u16x8();
1678    transmute(simd_select_bitmask(k, max, u16x8::ZERO))
1679}
1680
1681/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1682///
1683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu8&expand=3636)
1684#[inline]
1685#[target_feature(enable = "avx512bw")]
1686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1687#[cfg_attr(test, assert_instr(vpmaxub))]
1688pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1689    let a = a.as_u8x64();
1690    let b = b.as_u8x64();
1691    transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1692}
1693
1694/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1695///
1696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu8&expand=3634)
1697#[inline]
1698#[target_feature(enable = "avx512bw")]
1699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1700#[cfg_attr(test, assert_instr(vpmaxub))]
1701pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1702    let max = _mm512_max_epu8(a, b).as_u8x64();
1703    transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1704}
1705
1706/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu8&expand=3635)
1709#[inline]
1710#[target_feature(enable = "avx512bw")]
1711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712#[cfg_attr(test, assert_instr(vpmaxub))]
1713pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1714    let max = _mm512_max_epu8(a, b).as_u8x64();
1715    transmute(simd_select_bitmask(k, max, u8x64::ZERO))
1716}
1717
1718/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1719///
1720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu8&expand=3631)
1721#[inline]
1722#[target_feature(enable = "avx512bw,avx512vl")]
1723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1724#[cfg_attr(test, assert_instr(vpmaxub))]
1725pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1726    let max = _mm256_max_epu8(a, b).as_u8x32();
1727    transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1728}
1729
1730/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1731///
1732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu8&expand=3632)
1733#[inline]
1734#[target_feature(enable = "avx512bw,avx512vl")]
1735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1736#[cfg_attr(test, assert_instr(vpmaxub))]
1737pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1738    let max = _mm256_max_epu8(a, b).as_u8x32();
1739    transmute(simd_select_bitmask(k, max, u8x32::ZERO))
1740}
1741
1742/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1743///
1744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu8&expand=3628)
1745#[inline]
1746#[target_feature(enable = "avx512bw,avx512vl")]
1747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1748#[cfg_attr(test, assert_instr(vpmaxub))]
1749pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1750    let max = _mm_max_epu8(a, b).as_u8x16();
1751    transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1752}
1753
1754/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1755///
1756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu8&expand=3629)
1757#[inline]
1758#[target_feature(enable = "avx512bw,avx512vl")]
1759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1760#[cfg_attr(test, assert_instr(vpmaxub))]
1761pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1762    let max = _mm_max_epu8(a, b).as_u8x16();
1763    transmute(simd_select_bitmask(k, max, u8x16::ZERO))
1764}
1765
1766/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1767///
1768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi16&expand=3573)
1769#[inline]
1770#[target_feature(enable = "avx512bw")]
1771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1772#[cfg_attr(test, assert_instr(vpmaxsw))]
1773pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1774    let a = a.as_i16x32();
1775    let b = b.as_i16x32();
1776    transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
1777}
1778
1779/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1780///
1781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi16&expand=3571)
1782#[inline]
1783#[target_feature(enable = "avx512bw")]
1784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1785#[cfg_attr(test, assert_instr(vpmaxsw))]
1786pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1787    let max = _mm512_max_epi16(a, b).as_i16x32();
1788    transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1789}
1790
1791/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1792///
1793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi16&expand=3572)
1794#[inline]
1795#[target_feature(enable = "avx512bw")]
1796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1797#[cfg_attr(test, assert_instr(vpmaxsw))]
1798pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1799    let max = _mm512_max_epi16(a, b).as_i16x32();
1800    transmute(simd_select_bitmask(k, max, i16x32::ZERO))
1801}
1802
1803/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1804///
1805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi16&expand=3568)
1806#[inline]
1807#[target_feature(enable = "avx512bw,avx512vl")]
1808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1809#[cfg_attr(test, assert_instr(vpmaxsw))]
1810pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1811    let max = _mm256_max_epi16(a, b).as_i16x16();
1812    transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1813}
1814
1815/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1816///
1817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi16&expand=3569)
1818#[inline]
1819#[target_feature(enable = "avx512bw,avx512vl")]
1820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1821#[cfg_attr(test, assert_instr(vpmaxsw))]
1822pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1823    let max = _mm256_max_epi16(a, b).as_i16x16();
1824    transmute(simd_select_bitmask(k, max, i16x16::ZERO))
1825}
1826
1827/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1828///
1829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi16&expand=3565)
1830#[inline]
1831#[target_feature(enable = "avx512bw,avx512vl")]
1832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1833#[cfg_attr(test, assert_instr(vpmaxsw))]
1834pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1835    let max = _mm_max_epi16(a, b).as_i16x8();
1836    transmute(simd_select_bitmask(k, max, src.as_i16x8()))
1837}
1838
1839/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi16&expand=3566)
1842#[inline]
1843#[target_feature(enable = "avx512bw,avx512vl")]
1844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1845#[cfg_attr(test, assert_instr(vpmaxsw))]
1846pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1847    let max = _mm_max_epi16(a, b).as_i16x8();
1848    transmute(simd_select_bitmask(k, max, i16x8::ZERO))
1849}
1850
1851/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi8&expand=3600)
1854#[inline]
1855#[target_feature(enable = "avx512bw")]
1856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1857#[cfg_attr(test, assert_instr(vpmaxsb))]
1858pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
1859    let a = a.as_i8x64();
1860    let b = b.as_i8x64();
1861    transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
1862}
1863
1864/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1865///
1866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi8&expand=3598)
1867#[inline]
1868#[target_feature(enable = "avx512bw")]
1869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1870#[cfg_attr(test, assert_instr(vpmaxsb))]
1871pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1872    let max = _mm512_max_epi8(a, b).as_i8x64();
1873    transmute(simd_select_bitmask(k, max, src.as_i8x64()))
1874}
1875
1876/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1877///
1878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi8&expand=3599)
1879#[inline]
1880#[target_feature(enable = "avx512bw")]
1881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1882#[cfg_attr(test, assert_instr(vpmaxsb))]
1883pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1884    let max = _mm512_max_epi8(a, b).as_i8x64();
1885    transmute(simd_select_bitmask(k, max, i8x64::ZERO))
1886}
1887
1888/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1889///
1890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi8&expand=3595)
1891#[inline]
1892#[target_feature(enable = "avx512bw,avx512vl")]
1893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1894#[cfg_attr(test, assert_instr(vpmaxsb))]
1895pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1896    let max = _mm256_max_epi8(a, b).as_i8x32();
1897    transmute(simd_select_bitmask(k, max, src.as_i8x32()))
1898}
1899
1900/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1901///
1902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi8&expand=3596)
1903#[inline]
1904#[target_feature(enable = "avx512bw,avx512vl")]
1905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1906#[cfg_attr(test, assert_instr(vpmaxsb))]
1907pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1908    let max = _mm256_max_epi8(a, b).as_i8x32();
1909    transmute(simd_select_bitmask(k, max, i8x32::ZERO))
1910}
1911
1912/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1913///
1914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi8&expand=3592)
1915#[inline]
1916#[target_feature(enable = "avx512bw,avx512vl")]
1917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1918#[cfg_attr(test, assert_instr(vpmaxsb))]
1919pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1920    let max = _mm_max_epi8(a, b).as_i8x16();
1921    transmute(simd_select_bitmask(k, max, src.as_i8x16()))
1922}
1923
1924/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1925///
1926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi8&expand=3593)
1927#[inline]
1928#[target_feature(enable = "avx512bw,avx512vl")]
1929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1930#[cfg_attr(test, assert_instr(vpmaxsb))]
1931pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1932    let max = _mm_max_epi8(a, b).as_i8x16();
1933    transmute(simd_select_bitmask(k, max, i8x16::ZERO))
1934}
1935
1936/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu16&expand=3723)
1939#[inline]
1940#[target_feature(enable = "avx512bw")]
1941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1942#[cfg_attr(test, assert_instr(vpminuw))]
1943pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
1944    let a = a.as_u16x32();
1945    let b = b.as_u16x32();
1946    transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
1947}
1948
1949/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu16&expand=3721)
1952#[inline]
1953#[target_feature(enable = "avx512bw")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vpminuw))]
1956pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1957    let min = _mm512_min_epu16(a, b).as_u16x32();
1958    transmute(simd_select_bitmask(k, min, src.as_u16x32()))
1959}
1960
1961/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1962///
1963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu16&expand=3722)
1964#[inline]
1965#[target_feature(enable = "avx512bw")]
1966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1967#[cfg_attr(test, assert_instr(vpminuw))]
1968pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1969    let min = _mm512_min_epu16(a, b).as_u16x32();
1970    transmute(simd_select_bitmask(k, min, u16x32::ZERO))
1971}
1972
1973/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1974///
1975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu16&expand=3718)
1976#[inline]
1977#[target_feature(enable = "avx512bw,avx512vl")]
1978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1979#[cfg_attr(test, assert_instr(vpminuw))]
1980pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1981    let min = _mm256_min_epu16(a, b).as_u16x16();
1982    transmute(simd_select_bitmask(k, min, src.as_u16x16()))
1983}
1984
1985/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1986///
1987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu16&expand=3719)
1988#[inline]
1989#[target_feature(enable = "avx512bw,avx512vl")]
1990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1991#[cfg_attr(test, assert_instr(vpminuw))]
1992pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1993    let min = _mm256_min_epu16(a, b).as_u16x16();
1994    transmute(simd_select_bitmask(k, min, u16x16::ZERO))
1995}
1996
1997/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1998///
1999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu16&expand=3715)
2000#[inline]
2001#[target_feature(enable = "avx512bw,avx512vl")]
2002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2003#[cfg_attr(test, assert_instr(vpminuw))]
2004pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2005    let min = _mm_min_epu16(a, b).as_u16x8();
2006    transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2007}
2008
2009/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2010///
2011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu16&expand=3716)
2012#[inline]
2013#[target_feature(enable = "avx512bw,avx512vl")]
2014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2015#[cfg_attr(test, assert_instr(vpminuw))]
2016pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2017    let min = _mm_min_epu16(a, b).as_u16x8();
2018    transmute(simd_select_bitmask(k, min, u16x8::ZERO))
2019}
2020
2021/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2022///
2023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu8&expand=3750)
2024#[inline]
2025#[target_feature(enable = "avx512bw")]
2026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2027#[cfg_attr(test, assert_instr(vpminub))]
2028pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2029    let a = a.as_u8x64();
2030    let b = b.as_u8x64();
2031    transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2032}
2033
2034/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2035///
2036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu8&expand=3748)
2037#[inline]
2038#[target_feature(enable = "avx512bw")]
2039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2040#[cfg_attr(test, assert_instr(vpminub))]
2041pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2042    let min = _mm512_min_epu8(a, b).as_u8x64();
2043    transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2044}
2045
2046/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2047///
2048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu8&expand=3749)
2049#[inline]
2050#[target_feature(enable = "avx512bw")]
2051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2052#[cfg_attr(test, assert_instr(vpminub))]
2053pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2054    let min = _mm512_min_epu8(a, b).as_u8x64();
2055    transmute(simd_select_bitmask(k, min, u8x64::ZERO))
2056}
2057
2058/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu8&expand=3745)
2061#[inline]
2062#[target_feature(enable = "avx512bw,avx512vl")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vpminub))]
2065pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2066    let min = _mm256_min_epu8(a, b).as_u8x32();
2067    transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2068}
2069
2070/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu8&expand=3746)
2073#[inline]
2074#[target_feature(enable = "avx512bw,avx512vl")]
2075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2076#[cfg_attr(test, assert_instr(vpminub))]
2077pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2078    let min = _mm256_min_epu8(a, b).as_u8x32();
2079    transmute(simd_select_bitmask(k, min, u8x32::ZERO))
2080}
2081
2082/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2083///
2084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu8&expand=3742)
2085#[inline]
2086#[target_feature(enable = "avx512bw,avx512vl")]
2087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2088#[cfg_attr(test, assert_instr(vpminub))]
2089pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2090    let min = _mm_min_epu8(a, b).as_u8x16();
2091    transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2092}
2093
2094/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2095///
2096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu8&expand=3743)
2097#[inline]
2098#[target_feature(enable = "avx512bw,avx512vl")]
2099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2100#[cfg_attr(test, assert_instr(vpminub))]
2101pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2102    let min = _mm_min_epu8(a, b).as_u8x16();
2103    transmute(simd_select_bitmask(k, min, u8x16::ZERO))
2104}
2105
2106/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2107///
2108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi16&expand=3687)
2109#[inline]
2110#[target_feature(enable = "avx512bw")]
2111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2112#[cfg_attr(test, assert_instr(vpminsw))]
2113pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2114    let a = a.as_i16x32();
2115    let b = b.as_i16x32();
2116    transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
2117}
2118
2119/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2120///
2121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi16&expand=3685)
2122#[inline]
2123#[target_feature(enable = "avx512bw")]
2124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2125#[cfg_attr(test, assert_instr(vpminsw))]
2126pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2127    let min = _mm512_min_epi16(a, b).as_i16x32();
2128    transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2129}
2130
2131/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2132///
2133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi16&expand=3686)
2134#[inline]
2135#[target_feature(enable = "avx512bw")]
2136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2137#[cfg_attr(test, assert_instr(vpminsw))]
2138pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2139    let min = _mm512_min_epi16(a, b).as_i16x32();
2140    transmute(simd_select_bitmask(k, min, i16x32::ZERO))
2141}
2142
2143/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi16&expand=3682)
2146#[inline]
2147#[target_feature(enable = "avx512bw,avx512vl")]
2148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149#[cfg_attr(test, assert_instr(vpminsw))]
2150pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2151    let min = _mm256_min_epi16(a, b).as_i16x16();
2152    transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2153}
2154
2155/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2156///
2157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi16&expand=3683)
2158#[inline]
2159#[target_feature(enable = "avx512bw,avx512vl")]
2160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2161#[cfg_attr(test, assert_instr(vpminsw))]
2162pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2163    let min = _mm256_min_epi16(a, b).as_i16x16();
2164    transmute(simd_select_bitmask(k, min, i16x16::ZERO))
2165}
2166
2167/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2168///
2169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi16&expand=3679)
2170#[inline]
2171#[target_feature(enable = "avx512bw,avx512vl")]
2172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2173#[cfg_attr(test, assert_instr(vpminsw))]
2174pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2175    let min = _mm_min_epi16(a, b).as_i16x8();
2176    transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2177}
2178
2179/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2180///
2181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi16&expand=3680)
2182#[inline]
2183#[target_feature(enable = "avx512bw,avx512vl")]
2184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2185#[cfg_attr(test, assert_instr(vpminsw))]
2186pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2187    let min = _mm_min_epi16(a, b).as_i16x8();
2188    transmute(simd_select_bitmask(k, min, i16x8::ZERO))
2189}
2190
2191/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2192///
2193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi8&expand=3714)
2194#[inline]
2195#[target_feature(enable = "avx512bw")]
2196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2197#[cfg_attr(test, assert_instr(vpminsb))]
2198pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2199    let a = a.as_i8x64();
2200    let b = b.as_i8x64();
2201    transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
2202}
2203
2204/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2205///
2206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi8&expand=3712)
2207#[inline]
2208#[target_feature(enable = "avx512bw")]
2209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2210#[cfg_attr(test, assert_instr(vpminsb))]
2211pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2212    let min = _mm512_min_epi8(a, b).as_i8x64();
2213    transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2214}
2215
2216/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2217///
2218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi8&expand=3713)
2219#[inline]
2220#[target_feature(enable = "avx512bw")]
2221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2222#[cfg_attr(test, assert_instr(vpminsb))]
2223pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2224    let min = _mm512_min_epi8(a, b).as_i8x64();
2225    transmute(simd_select_bitmask(k, min, i8x64::ZERO))
2226}
2227
2228/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi8&expand=3709)
2231#[inline]
2232#[target_feature(enable = "avx512bw,avx512vl")]
2233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234#[cfg_attr(test, assert_instr(vpminsb))]
2235pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2236    let min = _mm256_min_epi8(a, b).as_i8x32();
2237    transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2238}
2239
2240/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2241///
2242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi8&expand=3710)
2243#[inline]
2244#[target_feature(enable = "avx512bw,avx512vl")]
2245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2246#[cfg_attr(test, assert_instr(vpminsb))]
2247pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2248    let min = _mm256_min_epi8(a, b).as_i8x32();
2249    transmute(simd_select_bitmask(k, min, i8x32::ZERO))
2250}
2251
2252/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2253///
2254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi8&expand=3706)
2255#[inline]
2256#[target_feature(enable = "avx512bw,avx512vl")]
2257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2258#[cfg_attr(test, assert_instr(vpminsb))]
2259pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2260    let min = _mm_min_epi8(a, b).as_i8x16();
2261    transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2262}
2263
2264/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2265///
2266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi8&expand=3707)
2267#[inline]
2268#[target_feature(enable = "avx512bw,avx512vl")]
2269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2270#[cfg_attr(test, assert_instr(vpminsb))]
2271pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2272    let min = _mm_min_epi8(a, b).as_i8x16();
2273    transmute(simd_select_bitmask(k, min, i8x16::ZERO))
2274}
2275
2276/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2277///
2278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu16_mask&expand=1050)
2279#[inline]
2280#[target_feature(enable = "avx512bw")]
2281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2282#[cfg_attr(test, assert_instr(vpcmp))]
2283pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2284    simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32()))
2285}
2286
2287/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2288///
2289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2290#[inline]
2291#[target_feature(enable = "avx512bw")]
2292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2293#[cfg_attr(test, assert_instr(vpcmp))]
2294pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2295    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2296}
2297
2298/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2299///
2300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu16_mask&expand=1050)
2301#[inline]
2302#[target_feature(enable = "avx512bw,avx512vl")]
2303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2304#[cfg_attr(test, assert_instr(vpcmp))]
2305pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2306    simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16()))
2307}
2308
2309/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2310///
2311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2312#[inline]
2313#[target_feature(enable = "avx512bw,avx512vl")]
2314#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2315#[cfg_attr(test, assert_instr(vpcmp))]
2316pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2317    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2318}
2319
2320/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2321///
2322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu16_mask&expand=1018)
2323#[inline]
2324#[target_feature(enable = "avx512bw,avx512vl")]
2325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2326#[cfg_attr(test, assert_instr(vpcmp))]
2327pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2328    simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8()))
2329}
2330
2331/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu16_mask&expand=1019)
2334#[inline]
2335#[target_feature(enable = "avx512bw,avx512vl")]
2336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337#[cfg_attr(test, assert_instr(vpcmp))]
2338pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2339    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
2340}
2341
2342/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2343///
2344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_cmplt_epu8_mask&expand=1068)
2345#[inline]
2346#[target_feature(enable = "avx512bw")]
2347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2348#[cfg_attr(test, assert_instr(vpcmp))]
2349pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2350    simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64()))
2351}
2352
2353/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2354///
2355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2356#[inline]
2357#[target_feature(enable = "avx512bw")]
2358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2359#[cfg_attr(test, assert_instr(vpcmp))]
2360pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2361    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2362}
2363
2364/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2365///
2366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu8_mask&expand=1066)
2367#[inline]
2368#[target_feature(enable = "avx512bw,avx512vl")]
2369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2370#[cfg_attr(test, assert_instr(vpcmp))]
2371pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2372    simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32()))
2373}
2374
2375/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2376///
2377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2378#[inline]
2379#[target_feature(enable = "avx512bw,avx512vl")]
2380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2381#[cfg_attr(test, assert_instr(vpcmp))]
2382pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2383    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2384}
2385
2386/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2387///
2388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu8_mask&expand=1064)
2389#[inline]
2390#[target_feature(enable = "avx512bw,avx512vl")]
2391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2392#[cfg_attr(test, assert_instr(vpcmp))]
2393pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2394    simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16()))
2395}
2396
2397/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2398///
2399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2400#[inline]
2401#[target_feature(enable = "avx512bw,avx512vl")]
2402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2403#[cfg_attr(test, assert_instr(vpcmp))]
2404pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2405    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
2406}
2407
2408/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2409///
2410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi16_mask&expand=1022)
2411#[inline]
2412#[target_feature(enable = "avx512bw")]
2413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2414#[cfg_attr(test, assert_instr(vpcmp))]
2415pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2416    simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32()))
2417}
2418
2419/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2420///
2421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2422#[inline]
2423#[target_feature(enable = "avx512bw")]
2424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2425#[cfg_attr(test, assert_instr(vpcmp))]
2426pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2427    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2428}
2429
2430/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2431///
2432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi16_mask&expand=1020)
2433#[inline]
2434#[target_feature(enable = "avx512bw,avx512vl")]
2435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2436#[cfg_attr(test, assert_instr(vpcmp))]
2437pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2438    simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16()))
2439}
2440
2441/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2442///
2443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2444#[inline]
2445#[target_feature(enable = "avx512bw,avx512vl")]
2446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2447#[cfg_attr(test, assert_instr(vpcmp))]
2448pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2449    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2450}
2451
2452/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2453///
2454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi16_mask&expand=1018)
2455#[inline]
2456#[target_feature(enable = "avx512bw,avx512vl")]
2457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2458#[cfg_attr(test, assert_instr(vpcmp))]
2459pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2460    simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
2461}
2462
2463/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2464///
2465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2466#[inline]
2467#[target_feature(enable = "avx512bw,avx512vl")]
2468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2469#[cfg_attr(test, assert_instr(vpcmp))]
2470pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2471    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
2472}
2473
2474/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2475///
2476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi8_mask&expand=1044)
2477#[inline]
2478#[target_feature(enable = "avx512bw")]
2479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2480#[cfg_attr(test, assert_instr(vpcmp))]
2481pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2482    simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64()))
2483}
2484
2485/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2486///
2487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2488#[inline]
2489#[target_feature(enable = "avx512bw")]
2490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2491#[cfg_attr(test, assert_instr(vpcmp))]
2492pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2493    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2494}
2495
2496/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi8_mask&expand=1042)
2499#[inline]
2500#[target_feature(enable = "avx512bw,avx512vl")]
2501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502#[cfg_attr(test, assert_instr(vpcmp))]
2503pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2504    simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32()))
2505}
2506
2507/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2508///
2509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2510#[inline]
2511#[target_feature(enable = "avx512bw,avx512vl")]
2512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2513#[cfg_attr(test, assert_instr(vpcmp))]
2514pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2515    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2516}
2517
2518/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2519///
2520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi8_mask&expand=1040)
2521#[inline]
2522#[target_feature(enable = "avx512bw,avx512vl")]
2523#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2524#[cfg_attr(test, assert_instr(vpcmp))]
2525pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2526    simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
2527}
2528
2529/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2530///
2531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2532#[inline]
2533#[target_feature(enable = "avx512bw,avx512vl")]
2534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2535#[cfg_attr(test, assert_instr(vpcmp))]
2536pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2537    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
2538}
2539
2540/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2541///
2542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu16_mask&expand=927)
2543#[inline]
2544#[target_feature(enable = "avx512bw")]
2545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2546#[cfg_attr(test, assert_instr(vpcmp))]
2547pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2548    simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32()))
2549}
2550
2551/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2552///
2553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2554#[inline]
2555#[target_feature(enable = "avx512bw")]
2556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2557#[cfg_attr(test, assert_instr(vpcmp))]
2558pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2559    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2560}
2561
2562/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu16_mask&expand=925)
2565#[inline]
2566#[target_feature(enable = "avx512bw,avx512vl")]
2567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2568#[cfg_attr(test, assert_instr(vpcmp))]
2569pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2570    simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16()))
2571}
2572
2573/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2574///
2575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2576#[inline]
2577#[target_feature(enable = "avx512bw,avx512vl")]
2578#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2579#[cfg_attr(test, assert_instr(vpcmp))]
2580pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2581    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2582}
2583
2584/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2585///
2586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu16_mask&expand=923)
2587#[inline]
2588#[target_feature(enable = "avx512bw,avx512vl")]
2589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2590#[cfg_attr(test, assert_instr(vpcmp))]
2591pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2592    simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8()))
2593}
2594
2595/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2598#[inline]
2599#[target_feature(enable = "avx512bw,avx512vl")]
2600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601#[cfg_attr(test, assert_instr(vpcmp))]
2602pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2603    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2604}
2605
2606/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2607///
2608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu8_mask&expand=945)
2609#[inline]
2610#[target_feature(enable = "avx512bw")]
2611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2612#[cfg_attr(test, assert_instr(vpcmp))]
2613pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2614    simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64()))
2615}
2616
2617/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2618///
2619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2620#[inline]
2621#[target_feature(enable = "avx512bw")]
2622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2623#[cfg_attr(test, assert_instr(vpcmp))]
2624pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2625    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2626}
2627
2628/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu8_mask&expand=943)
2631#[inline]
2632#[target_feature(enable = "avx512bw,avx512vl")]
2633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2634#[cfg_attr(test, assert_instr(vpcmp))]
2635pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2636    simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32()))
2637}
2638
2639/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2640///
2641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2642#[inline]
2643#[target_feature(enable = "avx512bw,avx512vl")]
2644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2645#[cfg_attr(test, assert_instr(vpcmp))]
2646pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2647    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2648}
2649
2650/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2651///
2652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu8_mask&expand=941)
2653#[inline]
2654#[target_feature(enable = "avx512bw,avx512vl")]
2655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2656#[cfg_attr(test, assert_instr(vpcmp))]
2657pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2658    simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16()))
2659}
2660
2661/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2662///
2663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2664#[inline]
2665#[target_feature(enable = "avx512bw,avx512vl")]
2666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2667#[cfg_attr(test, assert_instr(vpcmp))]
2668pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2669    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2670}
2671
2672/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2673///
2674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi16_mask&expand=897)
2675#[inline]
2676#[target_feature(enable = "avx512bw")]
2677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2678#[cfg_attr(test, assert_instr(vpcmp))]
2679pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2680    simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32()))
2681}
2682
2683/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2684///
2685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2686#[inline]
2687#[target_feature(enable = "avx512bw")]
2688#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2689#[cfg_attr(test, assert_instr(vpcmp))]
2690pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2691    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2692}
2693
2694/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2695///
2696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16_mask&expand=895)
2697#[inline]
2698#[target_feature(enable = "avx512bw,avx512vl")]
2699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2700#[cfg_attr(test, assert_instr(vpcmp))]
2701pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2702    simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16()))
2703}
2704
2705/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2706///
2707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2708#[inline]
2709#[target_feature(enable = "avx512bw,avx512vl")]
2710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2711#[cfg_attr(test, assert_instr(vpcmp))]
2712pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2713    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2714}
2715
2716/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2717///
2718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi16_mask&expand=893)
2719#[inline]
2720#[target_feature(enable = "avx512bw,avx512vl")]
2721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2722#[cfg_attr(test, assert_instr(vpcmp))]
2723pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2724    simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
2725}
2726
2727/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2728///
2729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2730#[inline]
2731#[target_feature(enable = "avx512bw,avx512vl")]
2732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2733#[cfg_attr(test, assert_instr(vpcmp))]
2734pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2735    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
2736}
2737
2738/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi8_mask&expand=921)
2741#[inline]
2742#[target_feature(enable = "avx512bw")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vpcmp))]
2745pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2746    simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64()))
2747}
2748
2749/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2750///
2751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
2752#[inline]
2753#[target_feature(enable = "avx512bw")]
2754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2755#[cfg_attr(test, assert_instr(vpcmp))]
2756pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2757    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2758}
2759
2760/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2761///
2762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8_mask&expand=919)
2763#[inline]
2764#[target_feature(enable = "avx512bw,avx512vl")]
2765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2766#[cfg_attr(test, assert_instr(vpcmp))]
2767pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2768    simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32()))
2769}
2770
2771/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2772///
2773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
2774#[inline]
2775#[target_feature(enable = "avx512bw,avx512vl")]
2776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2777#[cfg_attr(test, assert_instr(vpcmp))]
2778pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2779    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2780}
2781
2782/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2783///
2784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi8_mask&expand=917)
2785#[inline]
2786#[target_feature(enable = "avx512bw,avx512vl")]
2787#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2788#[cfg_attr(test, assert_instr(vpcmp))]
2789pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2790    simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
2791}
2792
2793/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2794///
2795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi8_mask&expand=918)
2796#[inline]
2797#[target_feature(enable = "avx512bw,avx512vl")]
2798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2799#[cfg_attr(test, assert_instr(vpcmp))]
2800pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2801    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
2802}
2803
2804/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2805///
2806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu16_mask&expand=989)
2807#[inline]
2808#[target_feature(enable = "avx512bw")]
2809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2810#[cfg_attr(test, assert_instr(vpcmp))]
2811pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2812    simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32()))
2813}
2814
2815/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2816///
2817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu16_mask&expand=990)
2818#[inline]
2819#[target_feature(enable = "avx512bw")]
2820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2821#[cfg_attr(test, assert_instr(vpcmp))]
2822pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2823    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
2824}
2825
2826/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2827///
2828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu16_mask&expand=987)
2829#[inline]
2830#[target_feature(enable = "avx512bw,avx512vl")]
2831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2832#[cfg_attr(test, assert_instr(vpcmp))]
2833pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2834    simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16()))
2835}
2836
2837/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu16_mask&expand=988)
2840#[inline]
2841#[target_feature(enable = "avx512bw,avx512vl")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpcmp))]
2844pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2845    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
2846}
2847
2848/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2849///
2850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu16_mask&expand=985)
2851#[inline]
2852#[target_feature(enable = "avx512bw,avx512vl")]
2853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2854#[cfg_attr(test, assert_instr(vpcmp))]
2855pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2856    simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8()))
2857}
2858
2859/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu16_mask&expand=986)
2862#[inline]
2863#[target_feature(enable = "avx512bw,avx512vl")]
2864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2865#[cfg_attr(test, assert_instr(vpcmp))]
2866pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2867    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
2868}
2869
2870/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
2871///
2872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu8_mask&expand=1007)
2873#[inline]
2874#[target_feature(enable = "avx512bw")]
2875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2876#[cfg_attr(test, assert_instr(vpcmp))]
2877pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2878    simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64()))
2879}
2880
2881/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2882///
2883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu8_mask&expand=1008)
2884#[inline]
2885#[target_feature(enable = "avx512bw")]
2886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2887#[cfg_attr(test, assert_instr(vpcmp))]
2888pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2889    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
2890}
2891
2892/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
2893///
2894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu8_mask&expand=1005)
2895#[inline]
2896#[target_feature(enable = "avx512bw,avx512vl")]
2897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2898#[cfg_attr(test, assert_instr(vpcmp))]
2899pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2900    simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32()))
2901}
2902
2903/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2904///
2905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu8_mask&expand=1006)
2906#[inline]
2907#[target_feature(enable = "avx512bw,avx512vl")]
2908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2909#[cfg_attr(test, assert_instr(vpcmp))]
2910pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2911    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
2912}
2913
2914/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.   
2915///
2916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu8_mask&expand=1003)
2917#[inline]
2918#[target_feature(enable = "avx512bw,avx512vl")]
2919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2920#[cfg_attr(test, assert_instr(vpcmp))]
2921pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2922    simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16()))
2923}
2924
2925/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2926///
2927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu8_mask&expand=1004)
2928#[inline]
2929#[target_feature(enable = "avx512bw,avx512vl")]
2930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2931#[cfg_attr(test, assert_instr(vpcmp))]
2932pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2933    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
2934}
2935
2936/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2937///
2938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi16_mask&expand=965)
2939#[inline]
2940#[target_feature(enable = "avx512bw")]
2941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2942#[cfg_attr(test, assert_instr(vpcmp))]
2943pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2944    simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32()))
2945}
2946
2947/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2948///
2949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi16_mask&expand=966)
2950#[inline]
2951#[target_feature(enable = "avx512bw")]
2952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2953#[cfg_attr(test, assert_instr(vpcmp))]
2954pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2955    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
2956}
2957
2958/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2959///
2960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi16_mask&expand=963)
2961#[inline]
2962#[target_feature(enable = "avx512bw,avx512vl")]
2963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2964#[cfg_attr(test, assert_instr(vpcmp))]
2965pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2966    simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16()))
2967}
2968
2969/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2970///
2971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi16_mask&expand=964)
2972#[inline]
2973#[target_feature(enable = "avx512bw,avx512vl")]
2974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2975#[cfg_attr(test, assert_instr(vpcmp))]
2976pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2977    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
2978}
2979
2980/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2981///
2982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi16_mask&expand=961)
2983#[inline]
2984#[target_feature(enable = "avx512bw,avx512vl")]
2985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2986#[cfg_attr(test, assert_instr(vpcmp))]
2987pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2988    simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8()))
2989}
2990
2991/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2992///
2993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi16_mask&expand=962)
2994#[inline]
2995#[target_feature(enable = "avx512bw,avx512vl")]
2996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2997#[cfg_attr(test, assert_instr(vpcmp))]
2998pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2999    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
3000}
3001
3002/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3003///
3004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi8_mask&expand=983)
3005#[inline]
3006#[target_feature(enable = "avx512bw")]
3007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3008#[cfg_attr(test, assert_instr(vpcmp))]
3009pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3010    simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64()))
3011}
3012
3013/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3014///
3015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi8_mask&expand=984)
3016#[inline]
3017#[target_feature(enable = "avx512bw")]
3018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3019#[cfg_attr(test, assert_instr(vpcmp))]
3020pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3021    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3022}
3023
3024/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3025///
3026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi8_mask&expand=981)
3027#[inline]
3028#[target_feature(enable = "avx512bw,avx512vl")]
3029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3030#[cfg_attr(test, assert_instr(vpcmp))]
3031pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3032    simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32()))
3033}
3034
3035/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3036///
3037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi8_mask&expand=982)
3038#[inline]
3039#[target_feature(enable = "avx512bw,avx512vl")]
3040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3041#[cfg_attr(test, assert_instr(vpcmp))]
3042pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3043    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3044}
3045
3046/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
3047///
3048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi8_mask&expand=979)
3049#[inline]
3050#[target_feature(enable = "avx512bw,avx512vl")]
3051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3052#[cfg_attr(test, assert_instr(vpcmp))]
3053pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3054    simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16()))
3055}
3056
3057/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3058///
3059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi8_mask&expand=980)
3060#[inline]
3061#[target_feature(enable = "avx512bw,avx512vl")]
3062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3063#[cfg_attr(test, assert_instr(vpcmp))]
3064pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3065    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
3066}
3067
3068/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3069///
3070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu16_mask&expand=867)
3071#[inline]
3072#[target_feature(enable = "avx512bw")]
3073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3074#[cfg_attr(test, assert_instr(vpcmp))]
3075pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3076    simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32()))
3077}
3078
3079/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3080///
3081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu16_mask&expand=868)
3082#[inline]
3083#[target_feature(enable = "avx512bw")]
3084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3085#[cfg_attr(test, assert_instr(vpcmp))]
3086pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3087    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3088}
3089
3090/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3091///
3092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu16_mask&expand=865)
3093#[inline]
3094#[target_feature(enable = "avx512bw,avx512vl")]
3095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3096#[cfg_attr(test, assert_instr(vpcmp))]
3097pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3098    simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16()))
3099}
3100
3101/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3102///
3103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3104#[inline]
3105#[target_feature(enable = "avx512bw,avx512vl")]
3106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3107#[cfg_attr(test, assert_instr(vpcmp))]
3108pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3109    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3110}
3111
3112/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3113///
3114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu16_mask&expand=863)
3115#[inline]
3116#[target_feature(enable = "avx512bw,avx512vl")]
3117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3118#[cfg_attr(test, assert_instr(vpcmp))]
3119pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3120    simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8()))
3121}
3122
3123/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu16_mask&expand=864)
3126#[inline]
3127#[target_feature(enable = "avx512bw,avx512vl")]
3128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3129#[cfg_attr(test, assert_instr(vpcmp))]
3130pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3131    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3132}
3133
3134/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu8_mask&expand=885)
3137#[inline]
3138#[target_feature(enable = "avx512bw")]
3139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140#[cfg_attr(test, assert_instr(vpcmp))]
3141pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3142    simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64()))
3143}
3144
3145/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3146///
3147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3148#[inline]
3149#[target_feature(enable = "avx512bw")]
3150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3151#[cfg_attr(test, assert_instr(vpcmp))]
3152pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3153    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3154}
3155
3156/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3157///
3158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu8_mask&expand=883)
3159#[inline]
3160#[target_feature(enable = "avx512bw,avx512vl")]
3161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3162#[cfg_attr(test, assert_instr(vpcmp))]
3163pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3164    simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32()))
3165}
3166
3167/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3168///
3169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3170#[inline]
3171#[target_feature(enable = "avx512bw,avx512vl")]
3172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3173#[cfg_attr(test, assert_instr(vpcmp))]
3174pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3175    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3176}
3177
3178/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3179///
3180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu8_mask&expand=881)
3181#[inline]
3182#[target_feature(enable = "avx512bw,avx512vl")]
3183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3184#[cfg_attr(test, assert_instr(vpcmp))]
3185pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3186    simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16()))
3187}
3188
3189/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3190///
3191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu8_mask&expand=882)
3192#[inline]
3193#[target_feature(enable = "avx512bw,avx512vl")]
3194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3195#[cfg_attr(test, assert_instr(vpcmp))]
3196pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3197    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3198}
3199
3200/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3201///
3202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi16_mask&expand=843)
3203#[inline]
3204#[target_feature(enable = "avx512bw")]
3205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3206#[cfg_attr(test, assert_instr(vpcmp))]
3207pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3208    simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32()))
3209}
3210
3211/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3212///
3213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3214#[inline]
3215#[target_feature(enable = "avx512bw")]
3216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3217#[cfg_attr(test, assert_instr(vpcmp))]
3218pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3219    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3220}
3221
3222/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3223///
3224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi16_mask&expand=841)
3225#[inline]
3226#[target_feature(enable = "avx512bw,avx512vl")]
3227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3228#[cfg_attr(test, assert_instr(vpcmp))]
3229pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3230    simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16()))
3231}
3232
3233/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3236#[inline]
3237#[target_feature(enable = "avx512bw,avx512vl")]
3238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239#[cfg_attr(test, assert_instr(vpcmp))]
3240pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3241    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3242}
3243
3244/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3245///
3246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi16_mask&expand=839)
3247#[inline]
3248#[target_feature(enable = "avx512bw,avx512vl")]
3249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3250#[cfg_attr(test, assert_instr(vpcmp))]
3251pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3252    simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8()))
3253}
3254
3255/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3256///
3257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi16_mask&expand=840)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3261#[cfg_attr(test, assert_instr(vpcmp))]
3262pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3263    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
3264}
3265
3266/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3267///
3268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi8_mask&expand=861)
3269#[inline]
3270#[target_feature(enable = "avx512bw")]
3271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3272#[cfg_attr(test, assert_instr(vpcmp))]
3273pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3274    simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64()))
3275}
3276
3277/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3278///
3279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3280#[inline]
3281#[target_feature(enable = "avx512bw")]
3282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3283#[cfg_attr(test, assert_instr(vpcmp))]
3284pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3285    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3286}
3287
3288/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3289///
3290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi8_mask&expand=859)
3291#[inline]
3292#[target_feature(enable = "avx512bw,avx512vl")]
3293#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3294#[cfg_attr(test, assert_instr(vpcmp))]
3295pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3296    simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32()))
3297}
3298
3299/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3300///
3301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3302#[inline]
3303#[target_feature(enable = "avx512bw,avx512vl")]
3304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3305#[cfg_attr(test, assert_instr(vpcmp))]
3306pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3307    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3308}
3309
3310/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3311///
3312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi8_mask&expand=857)
3313#[inline]
3314#[target_feature(enable = "avx512bw,avx512vl")]
3315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3316#[cfg_attr(test, assert_instr(vpcmp))]
3317pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3318    simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16()))
3319}
3320
3321/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3322///
3323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi8_mask&expand=858)
3324#[inline]
3325#[target_feature(enable = "avx512bw,avx512vl")]
3326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3327#[cfg_attr(test, assert_instr(vpcmp))]
3328pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3329    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
3330}
3331
3332/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3333///
3334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu16_mask&expand=801)
3335#[inline]
3336#[target_feature(enable = "avx512bw")]
3337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3338#[cfg_attr(test, assert_instr(vpcmp))]
3339pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3340    simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32()))
3341}
3342
3343/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3344///
3345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3346#[inline]
3347#[target_feature(enable = "avx512bw")]
3348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3349#[cfg_attr(test, assert_instr(vpcmp))]
3350pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3351    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3352}
3353
3354/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3355///
3356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu16_mask&expand=799)
3357#[inline]
3358#[target_feature(enable = "avx512bw,avx512vl")]
3359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3360#[cfg_attr(test, assert_instr(vpcmp))]
3361pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3362    simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16()))
3363}
3364
3365/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3366///
3367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3368#[inline]
3369#[target_feature(enable = "avx512bw,avx512vl")]
3370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3371#[cfg_attr(test, assert_instr(vpcmp))]
3372pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3373    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3374}
3375
3376/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu16_mask&expand=797)
3379#[inline]
3380#[target_feature(enable = "avx512bw,avx512vl")]
3381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382#[cfg_attr(test, assert_instr(vpcmp))]
3383pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3384    simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8()))
3385}
3386
3387/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3388///
3389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3390#[inline]
3391#[target_feature(enable = "avx512bw,avx512vl")]
3392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3393#[cfg_attr(test, assert_instr(vpcmp))]
3394pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3395    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3396}
3397
3398/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3399///
3400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu8_mask&expand=819)
3401#[inline]
3402#[target_feature(enable = "avx512bw")]
3403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3404#[cfg_attr(test, assert_instr(vpcmp))]
3405pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3406    simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64()))
3407}
3408
3409/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3410///
3411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3412#[inline]
3413#[target_feature(enable = "avx512bw")]
3414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3415#[cfg_attr(test, assert_instr(vpcmp))]
3416pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3417    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3418}
3419
3420/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3421///
3422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu8_mask&expand=817)
3423#[inline]
3424#[target_feature(enable = "avx512bw,avx512vl")]
3425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3426#[cfg_attr(test, assert_instr(vpcmp))]
3427pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3428    simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32()))
3429}
3430
3431/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3432///
3433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3434#[inline]
3435#[target_feature(enable = "avx512bw,avx512vl")]
3436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3437#[cfg_attr(test, assert_instr(vpcmp))]
3438pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3439    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3440}
3441
3442/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3443///
3444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu8_mask&expand=815)
3445#[inline]
3446#[target_feature(enable = "avx512bw,avx512vl")]
3447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3448#[cfg_attr(test, assert_instr(vpcmp))]
3449pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3450    simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16()))
3451}
3452
3453/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3454///
3455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3456#[inline]
3457#[target_feature(enable = "avx512bw,avx512vl")]
3458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3459#[cfg_attr(test, assert_instr(vpcmp))]
3460pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3461    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3462}
3463
3464/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3465///
3466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi16_mask&expand=771)
3467#[inline]
3468#[target_feature(enable = "avx512bw")]
3469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3470#[cfg_attr(test, assert_instr(vpcmp))]
3471pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3472    simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32()))
3473}
3474
3475/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3476///
3477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3478#[inline]
3479#[target_feature(enable = "avx512bw")]
3480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3481#[cfg_attr(test, assert_instr(vpcmp))]
3482pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3483    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3484}
3485
3486/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3487///
3488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16_mask&expand=769)
3489#[inline]
3490#[target_feature(enable = "avx512bw,avx512vl")]
3491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3492#[cfg_attr(test, assert_instr(vpcmp))]
3493pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3494    simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16()))
3495}
3496
3497/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3498///
3499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3500#[inline]
3501#[target_feature(enable = "avx512bw,avx512vl")]
3502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3503#[cfg_attr(test, assert_instr(vpcmp))]
3504pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3505    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3506}
3507
3508/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3509///
3510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi16_mask&expand=767)
3511#[inline]
3512#[target_feature(enable = "avx512bw,avx512vl")]
3513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3514#[cfg_attr(test, assert_instr(vpcmp))]
3515pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3516    simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
3517}
3518
3519/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3520///
3521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3522#[inline]
3523#[target_feature(enable = "avx512bw,avx512vl")]
3524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3525#[cfg_attr(test, assert_instr(vpcmp))]
3526pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3527    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
3528}
3529
3530/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3531///
3532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi8_mask&expand=795)
3533#[inline]
3534#[target_feature(enable = "avx512bw")]
3535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3536#[cfg_attr(test, assert_instr(vpcmp))]
3537pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3538    simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64()))
3539}
3540
3541/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3542///
3543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3544#[inline]
3545#[target_feature(enable = "avx512bw")]
3546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3547#[cfg_attr(test, assert_instr(vpcmp))]
3548pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3549    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3550}
3551
3552/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3553///
3554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8_mask&expand=793)
3555#[inline]
3556#[target_feature(enable = "avx512bw,avx512vl")]
3557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3558#[cfg_attr(test, assert_instr(vpcmp))]
3559pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3560    simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32()))
3561}
3562
3563/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3564///
3565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3566#[inline]
3567#[target_feature(enable = "avx512bw,avx512vl")]
3568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3569#[cfg_attr(test, assert_instr(vpcmp))]
3570pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3571    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3572}
3573
3574/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3575///
3576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi8_mask&expand=791)
3577#[inline]
3578#[target_feature(enable = "avx512bw,avx512vl")]
3579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3580#[cfg_attr(test, assert_instr(vpcmp))]
3581pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3582    simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
3583}
3584
3585/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3586///
3587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3588#[inline]
3589#[target_feature(enable = "avx512bw,avx512vl")]
3590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3591#[cfg_attr(test, assert_instr(vpcmp))]
3592pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3593    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
3594}
3595
3596/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3597///
3598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu16_mask&expand=1106)
3599#[inline]
3600#[target_feature(enable = "avx512bw")]
3601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3602#[cfg_attr(test, assert_instr(vpcmp))]
3603pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3604    simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32()))
3605}
3606
3607/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3608///
3609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3610#[inline]
3611#[target_feature(enable = "avx512bw")]
3612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3613#[cfg_attr(test, assert_instr(vpcmp))]
3614pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3615    _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3616}
3617
3618/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3619///
3620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu16_mask&expand=1104)
3621#[inline]
3622#[target_feature(enable = "avx512bw,avx512vl")]
3623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3624#[cfg_attr(test, assert_instr(vpcmp))]
3625pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3626    simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16()))
3627}
3628
3629/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3630///
3631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3632#[inline]
3633#[target_feature(enable = "avx512bw,avx512vl")]
3634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3635#[cfg_attr(test, assert_instr(vpcmp))]
3636pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3637    _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3638}
3639
3640/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3641///
3642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu16_mask&expand=1102)
3643#[inline]
3644#[target_feature(enable = "avx512bw,avx512vl")]
3645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3646#[cfg_attr(test, assert_instr(vpcmp))]
3647pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3648    simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8()))
3649}
3650
3651/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3652///
3653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3654#[inline]
3655#[target_feature(enable = "avx512bw,avx512vl")]
3656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3657#[cfg_attr(test, assert_instr(vpcmp))]
3658pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3659    _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
3660}
3661
3662/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3663///
3664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu8_mask&expand=1124)
3665#[inline]
3666#[target_feature(enable = "avx512bw")]
3667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3668#[cfg_attr(test, assert_instr(vpcmp))]
3669pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3670    simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64()))
3671}
3672
3673/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3674///
3675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3676#[inline]
3677#[target_feature(enable = "avx512bw")]
3678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3679#[cfg_attr(test, assert_instr(vpcmp))]
3680pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3681    _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3682}
3683
3684/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3685///
3686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu8_mask&expand=1122)
3687#[inline]
3688#[target_feature(enable = "avx512bw,avx512vl")]
3689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3690#[cfg_attr(test, assert_instr(vpcmp))]
3691pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3692    simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32()))
3693}
3694
3695/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3696///
3697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3698#[inline]
3699#[target_feature(enable = "avx512bw,avx512vl")]
3700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3701#[cfg_attr(test, assert_instr(vpcmp))]
3702pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3703    _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3704}
3705
3706/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3707///
3708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu8_mask&expand=1120)
3709#[inline]
3710#[target_feature(enable = "avx512bw,avx512vl")]
3711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3712#[cfg_attr(test, assert_instr(vpcmp))]
3713pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3714    simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16()))
3715}
3716
3717/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3718///
3719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3720#[inline]
3721#[target_feature(enable = "avx512bw,avx512vl")]
3722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3723#[cfg_attr(test, assert_instr(vpcmp))]
3724pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3725    _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
3726}
3727
3728/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3729///
3730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi16_mask&expand=1082)
3731#[inline]
3732#[target_feature(enable = "avx512bw")]
3733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3734#[cfg_attr(test, assert_instr(vpcmp))]
3735pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3736    simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32()))
3737}
3738
3739/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3740///
3741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3742#[inline]
3743#[target_feature(enable = "avx512bw")]
3744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3745#[cfg_attr(test, assert_instr(vpcmp))]
3746pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3747    _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
3748}
3749
3750/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3751///
3752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi16_mask&expand=1080)
3753#[inline]
3754#[target_feature(enable = "avx512bw,avx512vl")]
3755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3756#[cfg_attr(test, assert_instr(vpcmp))]
3757pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3758    simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16()))
3759}
3760
3761/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3762///
3763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
3764#[inline]
3765#[target_feature(enable = "avx512bw,avx512vl")]
3766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3767#[cfg_attr(test, assert_instr(vpcmp))]
3768pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3769    _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
3770}
3771
3772/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3773///
3774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi16_mask&expand=1078)
3775#[inline]
3776#[target_feature(enable = "avx512bw,avx512vl")]
3777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3778#[cfg_attr(test, assert_instr(vpcmp))]
3779pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3780    simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8()))
3781}
3782
3783/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3784///
3785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
3786#[inline]
3787#[target_feature(enable = "avx512bw,avx512vl")]
3788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3789#[cfg_attr(test, assert_instr(vpcmp))]
3790pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3791    _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
3792}
3793
3794/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3795///
3796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi8_mask&expand=1100)
3797#[inline]
3798#[target_feature(enable = "avx512bw")]
3799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3800#[cfg_attr(test, assert_instr(vpcmp))]
3801pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3802    simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64()))
3803}
3804
3805/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3806///
3807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
3808#[inline]
3809#[target_feature(enable = "avx512bw")]
3810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3811#[cfg_attr(test, assert_instr(vpcmp))]
3812pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3813    _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
3814}
3815
3816/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3817///
3818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi8_mask&expand=1098)
3819#[inline]
3820#[target_feature(enable = "avx512bw,avx512vl")]
3821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3822#[cfg_attr(test, assert_instr(vpcmp))]
3823pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3824    simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32()))
3825}
3826
3827/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3828///
3829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
3830#[inline]
3831#[target_feature(enable = "avx512bw,avx512vl")]
3832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3833#[cfg_attr(test, assert_instr(vpcmp))]
3834pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3835    _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
3836}
3837
3838/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3839///
3840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi8_mask&expand=1096)
3841#[inline]
3842#[target_feature(enable = "avx512bw,avx512vl")]
3843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3844#[cfg_attr(test, assert_instr(vpcmp))]
3845pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3846    simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16()))
3847}
3848
3849/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3850///
3851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
3852#[inline]
3853#[target_feature(enable = "avx512bw,avx512vl")]
3854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3855#[cfg_attr(test, assert_instr(vpcmp))]
3856pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3857    _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
3858}
3859
3860/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
3861///
3862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu16_mask&expand=715)
3863#[inline]
3864#[target_feature(enable = "avx512bw")]
3865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3866#[rustc_legacy_const_generics(2)]
3867#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3868pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
3869    static_assert_uimm_bits!(IMM8, 3);
3870    let a = a.as_u16x32();
3871    let b = b.as_u16x32();
3872    let r = match IMM8 {
3873        0 => simd_eq(a, b),
3874        1 => simd_lt(a, b),
3875        2 => simd_le(a, b),
3876        3 => i16x32::ZERO,
3877        4 => simd_ne(a, b),
3878        5 => simd_ge(a, b),
3879        6 => simd_gt(a, b),
3880        _ => i16x32::splat(-1),
3881    };
3882    simd_bitmask(r)
3883}
3884
3885/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu16_mask&expand=716)
3888#[inline]
3889#[target_feature(enable = "avx512bw")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[rustc_legacy_const_generics(3)]
3892#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3893pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
3894    k1: __mmask32,
3895    a: __m512i,
3896    b: __m512i,
3897) -> __mmask32 {
3898    static_assert_uimm_bits!(IMM8, 3);
3899    let a = a.as_u16x32();
3900    let b = b.as_u16x32();
3901    let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
3902    let r = match IMM8 {
3903        0 => simd_and(k1, simd_eq(a, b)),
3904        1 => simd_and(k1, simd_lt(a, b)),
3905        2 => simd_and(k1, simd_le(a, b)),
3906        3 => i16x32::ZERO,
3907        4 => simd_and(k1, simd_ne(a, b)),
3908        5 => simd_and(k1, simd_ge(a, b)),
3909        6 => simd_and(k1, simd_gt(a, b)),
3910        _ => i16x32::splat(-1),
3911    };
3912    simd_bitmask(r)
3913}
3914
3915/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3916///
3917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu16_mask&expand=713)
3918#[inline]
3919#[target_feature(enable = "avx512bw,avx512vl")]
3920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3921#[rustc_legacy_const_generics(2)]
3922#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3923pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
3924    static_assert_uimm_bits!(IMM8, 3);
3925    let a = a.as_u16x16();
3926    let b = b.as_u16x16();
3927    let r = match IMM8 {
3928        0 => simd_eq(a, b),
3929        1 => simd_lt(a, b),
3930        2 => simd_le(a, b),
3931        3 => i16x16::ZERO,
3932        4 => simd_ne(a, b),
3933        5 => simd_ge(a, b),
3934        6 => simd_gt(a, b),
3935        _ => i16x16::splat(-1),
3936    };
3937    simd_bitmask(r)
3938}
3939
3940/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu16_mask&expand=714)
3943#[inline]
3944#[target_feature(enable = "avx512bw,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[rustc_legacy_const_generics(3)]
3947#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3948pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
3949    k1: __mmask16,
3950    a: __m256i,
3951    b: __m256i,
3952) -> __mmask16 {
3953    static_assert_uimm_bits!(IMM8, 3);
3954    let a = a.as_u16x16();
3955    let b = b.as_u16x16();
3956    let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
3957    let r = match IMM8 {
3958        0 => simd_and(k1, simd_eq(a, b)),
3959        1 => simd_and(k1, simd_lt(a, b)),
3960        2 => simd_and(k1, simd_le(a, b)),
3961        3 => i16x16::ZERO,
3962        4 => simd_and(k1, simd_ne(a, b)),
3963        5 => simd_and(k1, simd_ge(a, b)),
3964        6 => simd_and(k1, simd_gt(a, b)),
3965        _ => i16x16::splat(-1),
3966    };
3967    simd_bitmask(r)
3968}
3969
3970/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3971///
3972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu16_mask&expand=711)
3973#[inline]
3974#[target_feature(enable = "avx512bw,avx512vl")]
3975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3976#[rustc_legacy_const_generics(2)]
3977#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3978pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
3979    static_assert_uimm_bits!(IMM8, 3);
3980    let a = a.as_u16x8();
3981    let b = b.as_u16x8();
3982    let r = match IMM8 {
3983        0 => simd_eq(a, b),
3984        1 => simd_lt(a, b),
3985        2 => simd_le(a, b),
3986        3 => i16x8::ZERO,
3987        4 => simd_ne(a, b),
3988        5 => simd_ge(a, b),
3989        6 => simd_gt(a, b),
3990        _ => i16x8::splat(-1),
3991    };
3992    simd_bitmask(r)
3993}
3994
3995/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3996///
3997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu16_mask&expand=712)
3998#[inline]
3999#[target_feature(enable = "avx512bw,avx512vl")]
4000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4001#[rustc_legacy_const_generics(3)]
4002#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4003pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
4004    k1: __mmask8,
4005    a: __m128i,
4006    b: __m128i,
4007) -> __mmask8 {
4008    static_assert_uimm_bits!(IMM8, 3);
4009    let a = a.as_u16x8();
4010    let b = b.as_u16x8();
4011    let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4012    let r = match IMM8 {
4013        0 => simd_and(k1, simd_eq(a, b)),
4014        1 => simd_and(k1, simd_lt(a, b)),
4015        2 => simd_and(k1, simd_le(a, b)),
4016        3 => i16x8::ZERO,
4017        4 => simd_and(k1, simd_ne(a, b)),
4018        5 => simd_and(k1, simd_ge(a, b)),
4019        6 => simd_and(k1, simd_gt(a, b)),
4020        _ => i16x8::splat(-1),
4021    };
4022    simd_bitmask(r)
4023}
4024
4025/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu8_mask&expand=733)
4028#[inline]
4029#[target_feature(enable = "avx512bw")]
4030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031#[rustc_legacy_const_generics(2)]
4032#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4033pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4034    static_assert_uimm_bits!(IMM8, 3);
4035    let a = a.as_u8x64();
4036    let b = b.as_u8x64();
4037    let r = match IMM8 {
4038        0 => simd_eq(a, b),
4039        1 => simd_lt(a, b),
4040        2 => simd_le(a, b),
4041        3 => i8x64::ZERO,
4042        4 => simd_ne(a, b),
4043        5 => simd_ge(a, b),
4044        6 => simd_gt(a, b),
4045        _ => i8x64::splat(-1),
4046    };
4047    simd_bitmask(r)
4048}
4049
4050/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4051///
4052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu8_mask&expand=734)
4053#[inline]
4054#[target_feature(enable = "avx512bw")]
4055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4056#[rustc_legacy_const_generics(3)]
4057#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4058pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
4059    k1: __mmask64,
4060    a: __m512i,
4061    b: __m512i,
4062) -> __mmask64 {
4063    static_assert_uimm_bits!(IMM8, 3);
4064    let a = a.as_u8x64();
4065    let b = b.as_u8x64();
4066    let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4067    let r = match IMM8 {
4068        0 => simd_and(k1, simd_eq(a, b)),
4069        1 => simd_and(k1, simd_lt(a, b)),
4070        2 => simd_and(k1, simd_le(a, b)),
4071        3 => i8x64::ZERO,
4072        4 => simd_and(k1, simd_ne(a, b)),
4073        5 => simd_and(k1, simd_ge(a, b)),
4074        6 => simd_and(k1, simd_gt(a, b)),
4075        _ => i8x64::splat(-1),
4076    };
4077    simd_bitmask(r)
4078}
4079
4080/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4081///
4082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu8_mask&expand=731)
4083#[inline]
4084#[target_feature(enable = "avx512bw,avx512vl")]
4085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086#[rustc_legacy_const_generics(2)]
4087#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4088pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4089    static_assert_uimm_bits!(IMM8, 3);
4090    let a = a.as_u8x32();
4091    let b = b.as_u8x32();
4092    let r = match IMM8 {
4093        0 => simd_eq(a, b),
4094        1 => simd_lt(a, b),
4095        2 => simd_le(a, b),
4096        3 => i8x32::ZERO,
4097        4 => simd_ne(a, b),
4098        5 => simd_ge(a, b),
4099        6 => simd_gt(a, b),
4100        _ => i8x32::splat(-1),
4101    };
4102    simd_bitmask(r)
4103}
4104
4105/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4106///
4107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu8_mask&expand=732)
4108#[inline]
4109#[target_feature(enable = "avx512bw,avx512vl")]
4110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4111#[rustc_legacy_const_generics(3)]
4112#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4113pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
4114    k1: __mmask32,
4115    a: __m256i,
4116    b: __m256i,
4117) -> __mmask32 {
4118    static_assert_uimm_bits!(IMM8, 3);
4119    let a = a.as_u8x32();
4120    let b = b.as_u8x32();
4121    let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4122    let r = match IMM8 {
4123        0 => simd_and(k1, simd_eq(a, b)),
4124        1 => simd_and(k1, simd_lt(a, b)),
4125        2 => simd_and(k1, simd_le(a, b)),
4126        3 => i8x32::ZERO,
4127        4 => simd_and(k1, simd_ne(a, b)),
4128        5 => simd_and(k1, simd_ge(a, b)),
4129        6 => simd_and(k1, simd_gt(a, b)),
4130        _ => i8x32::splat(-1),
4131    };
4132    simd_bitmask(r)
4133}
4134
4135/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4136///
4137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu8_mask&expand=729)
4138#[inline]
4139#[target_feature(enable = "avx512bw,avx512vl")]
4140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4141#[rustc_legacy_const_generics(2)]
4142#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4143pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4144    static_assert_uimm_bits!(IMM8, 3);
4145    let a = a.as_u8x16();
4146    let b = b.as_u8x16();
4147    let r = match IMM8 {
4148        0 => simd_eq(a, b),
4149        1 => simd_lt(a, b),
4150        2 => simd_le(a, b),
4151        3 => i8x16::ZERO,
4152        4 => simd_ne(a, b),
4153        5 => simd_ge(a, b),
4154        6 => simd_gt(a, b),
4155        _ => i8x16::splat(-1),
4156    };
4157    simd_bitmask(r)
4158}
4159
4160/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4161///
4162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu8_mask&expand=730)
4163#[inline]
4164#[target_feature(enable = "avx512bw,avx512vl")]
4165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4166#[rustc_legacy_const_generics(3)]
4167#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4168pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
4169    k1: __mmask16,
4170    a: __m128i,
4171    b: __m128i,
4172) -> __mmask16 {
4173    static_assert_uimm_bits!(IMM8, 3);
4174    let a = a.as_u8x16();
4175    let b = b.as_u8x16();
4176    let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4177    let r = match IMM8 {
4178        0 => simd_and(k1, simd_eq(a, b)),
4179        1 => simd_and(k1, simd_lt(a, b)),
4180        2 => simd_and(k1, simd_le(a, b)),
4181        3 => i8x16::ZERO,
4182        4 => simd_and(k1, simd_ne(a, b)),
4183        5 => simd_and(k1, simd_ge(a, b)),
4184        6 => simd_and(k1, simd_gt(a, b)),
4185        _ => i8x16::splat(-1),
4186    };
4187    simd_bitmask(r)
4188}
4189
4190/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4191///
4192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi16_mask&expand=691)
4193#[inline]
4194#[target_feature(enable = "avx512bw")]
4195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4196#[rustc_legacy_const_generics(2)]
4197#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4198pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
4199    static_assert_uimm_bits!(IMM8, 3);
4200    let a = a.as_i16x32();
4201    let b = b.as_i16x32();
4202    let r = match IMM8 {
4203        0 => simd_eq(a, b),
4204        1 => simd_lt(a, b),
4205        2 => simd_le(a, b),
4206        3 => i16x32::ZERO,
4207        4 => simd_ne(a, b),
4208        5 => simd_ge(a, b),
4209        6 => simd_gt(a, b),
4210        _ => i16x32::splat(-1),
4211    };
4212    simd_bitmask(r)
4213}
4214
4215/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4216///
4217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi16_mask&expand=692)
4218#[inline]
4219#[target_feature(enable = "avx512bw")]
4220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4221#[rustc_legacy_const_generics(3)]
4222#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4223pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
4224    k1: __mmask32,
4225    a: __m512i,
4226    b: __m512i,
4227) -> __mmask32 {
4228    static_assert_uimm_bits!(IMM8, 3);
4229    let a = a.as_i16x32();
4230    let b = b.as_i16x32();
4231    let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO);
4232    let r = match IMM8 {
4233        0 => simd_and(k1, simd_eq(a, b)),
4234        1 => simd_and(k1, simd_lt(a, b)),
4235        2 => simd_and(k1, simd_le(a, b)),
4236        3 => i16x32::ZERO,
4237        4 => simd_and(k1, simd_ne(a, b)),
4238        5 => simd_and(k1, simd_ge(a, b)),
4239        6 => simd_and(k1, simd_gt(a, b)),
4240        _ => i16x32::splat(-1),
4241    };
4242    simd_bitmask(r)
4243}
4244
4245/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4246///
4247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi16_mask&expand=689)
4248#[inline]
4249#[target_feature(enable = "avx512bw,avx512vl")]
4250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4251#[rustc_legacy_const_generics(2)]
4252#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4253pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
4254    static_assert_uimm_bits!(IMM8, 3);
4255    let a = a.as_i16x16();
4256    let b = b.as_i16x16();
4257    let r = match IMM8 {
4258        0 => simd_eq(a, b),
4259        1 => simd_lt(a, b),
4260        2 => simd_le(a, b),
4261        3 => i16x16::ZERO,
4262        4 => simd_ne(a, b),
4263        5 => simd_ge(a, b),
4264        6 => simd_gt(a, b),
4265        _ => i16x16::splat(-1),
4266    };
4267    simd_bitmask(r)
4268}
4269
4270/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4271///
4272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi16_mask&expand=690)
4273#[inline]
4274#[target_feature(enable = "avx512bw,avx512vl")]
4275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4276#[rustc_legacy_const_generics(3)]
4277#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4278pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
4279    k1: __mmask16,
4280    a: __m256i,
4281    b: __m256i,
4282) -> __mmask16 {
4283    static_assert_uimm_bits!(IMM8, 3);
4284    let a = a.as_i16x16();
4285    let b = b.as_i16x16();
4286    let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO);
4287    let r = match IMM8 {
4288        0 => simd_and(k1, simd_eq(a, b)),
4289        1 => simd_and(k1, simd_lt(a, b)),
4290        2 => simd_and(k1, simd_le(a, b)),
4291        3 => i16x16::ZERO,
4292        4 => simd_and(k1, simd_ne(a, b)),
4293        5 => simd_and(k1, simd_ge(a, b)),
4294        6 => simd_and(k1, simd_gt(a, b)),
4295        _ => i16x16::splat(-1),
4296    };
4297    simd_bitmask(r)
4298}
4299
4300/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4301///
4302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi16_mask&expand=687)
4303#[inline]
4304#[target_feature(enable = "avx512bw,avx512vl")]
4305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4306#[rustc_legacy_const_generics(2)]
4307#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4308pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
4309    static_assert_uimm_bits!(IMM8, 3);
4310    let a = a.as_i16x8();
4311    let b = b.as_i16x8();
4312    let r = match IMM8 {
4313        0 => simd_eq(a, b),
4314        1 => simd_lt(a, b),
4315        2 => simd_le(a, b),
4316        3 => i16x8::ZERO,
4317        4 => simd_ne(a, b),
4318        5 => simd_ge(a, b),
4319        6 => simd_gt(a, b),
4320        _ => i16x8::splat(-1),
4321    };
4322    simd_bitmask(r)
4323}
4324
4325/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4326///
4327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi16_mask&expand=688)
4328#[inline]
4329#[target_feature(enable = "avx512bw,avx512vl")]
4330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4331#[rustc_legacy_const_generics(3)]
4332#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4333pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
4334    k1: __mmask8,
4335    a: __m128i,
4336    b: __m128i,
4337) -> __mmask8 {
4338    static_assert_uimm_bits!(IMM8, 3);
4339    let a = a.as_i16x8();
4340    let b = b.as_i16x8();
4341    let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO);
4342    let r = match IMM8 {
4343        0 => simd_and(k1, simd_eq(a, b)),
4344        1 => simd_and(k1, simd_lt(a, b)),
4345        2 => simd_and(k1, simd_le(a, b)),
4346        3 => i16x8::ZERO,
4347        4 => simd_and(k1, simd_ne(a, b)),
4348        5 => simd_and(k1, simd_ge(a, b)),
4349        6 => simd_and(k1, simd_gt(a, b)),
4350        _ => i16x8::splat(-1),
4351    };
4352    simd_bitmask(r)
4353}
4354
4355/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4356///
4357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi8_mask&expand=709)
4358#[inline]
4359#[target_feature(enable = "avx512bw")]
4360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4361#[rustc_legacy_const_generics(2)]
4362#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4363pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4364    static_assert_uimm_bits!(IMM8, 3);
4365    let a = a.as_i8x64();
4366    let b = b.as_i8x64();
4367    let r = match IMM8 {
4368        0 => simd_eq(a, b),
4369        1 => simd_lt(a, b),
4370        2 => simd_le(a, b),
4371        3 => i8x64::ZERO,
4372        4 => simd_ne(a, b),
4373        5 => simd_ge(a, b),
4374        6 => simd_gt(a, b),
4375        _ => i8x64::splat(-1),
4376    };
4377    simd_bitmask(r)
4378}
4379
4380/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4381///
4382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi8_mask&expand=710)
4383#[inline]
4384#[target_feature(enable = "avx512bw")]
4385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4386#[rustc_legacy_const_generics(3)]
4387#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4388pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4389    k1: __mmask64,
4390    a: __m512i,
4391    b: __m512i,
4392) -> __mmask64 {
4393    static_assert_uimm_bits!(IMM8, 3);
4394    let a = a.as_i8x64();
4395    let b = b.as_i8x64();
4396    let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO);
4397    let r = match IMM8 {
4398        0 => simd_and(k1, simd_eq(a, b)),
4399        1 => simd_and(k1, simd_lt(a, b)),
4400        2 => simd_and(k1, simd_le(a, b)),
4401        3 => i8x64::ZERO,
4402        4 => simd_and(k1, simd_ne(a, b)),
4403        5 => simd_and(k1, simd_ge(a, b)),
4404        6 => simd_and(k1, simd_gt(a, b)),
4405        _ => i8x64::splat(-1),
4406    };
4407    simd_bitmask(r)
4408}
4409
4410/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4411///
4412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi8_mask&expand=707)
4413#[inline]
4414#[target_feature(enable = "avx512bw,avx512vl")]
4415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4416#[rustc_legacy_const_generics(2)]
4417#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4418pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4419    static_assert_uimm_bits!(IMM8, 3);
4420    let a = a.as_i8x32();
4421    let b = b.as_i8x32();
4422    let r = match IMM8 {
4423        0 => simd_eq(a, b),
4424        1 => simd_lt(a, b),
4425        2 => simd_le(a, b),
4426        3 => i8x32::ZERO,
4427        4 => simd_ne(a, b),
4428        5 => simd_ge(a, b),
4429        6 => simd_gt(a, b),
4430        _ => i8x32::splat(-1),
4431    };
4432    simd_bitmask(r)
4433}
4434
4435/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4436///
4437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi8_mask&expand=708)
4438#[inline]
4439#[target_feature(enable = "avx512bw,avx512vl")]
4440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4441#[rustc_legacy_const_generics(3)]
4442#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4443pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4444    k1: __mmask32,
4445    a: __m256i,
4446    b: __m256i,
4447) -> __mmask32 {
4448    static_assert_uimm_bits!(IMM8, 3);
4449    let a = a.as_i8x32();
4450    let b = b.as_i8x32();
4451    let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO);
4452    let r = match IMM8 {
4453        0 => simd_and(k1, simd_eq(a, b)),
4454        1 => simd_and(k1, simd_lt(a, b)),
4455        2 => simd_and(k1, simd_le(a, b)),
4456        3 => i8x32::ZERO,
4457        4 => simd_and(k1, simd_ne(a, b)),
4458        5 => simd_and(k1, simd_ge(a, b)),
4459        6 => simd_and(k1, simd_gt(a, b)),
4460        _ => i8x32::splat(-1),
4461    };
4462    simd_bitmask(r)
4463}
4464
4465/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4466///
4467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi8_mask&expand=705)
4468#[inline]
4469#[target_feature(enable = "avx512bw,avx512vl")]
4470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4471#[rustc_legacy_const_generics(2)]
4472#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4473pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4474    static_assert_uimm_bits!(IMM8, 3);
4475    let a = a.as_i8x16();
4476    let b = b.as_i8x16();
4477    let r = match IMM8 {
4478        0 => simd_eq(a, b),
4479        1 => simd_lt(a, b),
4480        2 => simd_le(a, b),
4481        3 => i8x16::ZERO,
4482        4 => simd_ne(a, b),
4483        5 => simd_ge(a, b),
4484        6 => simd_gt(a, b),
4485        _ => i8x16::splat(-1),
4486    };
4487    simd_bitmask(r)
4488}
4489
4490/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4491///
4492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi8_mask&expand=706)
4493#[inline]
4494#[target_feature(enable = "avx512bw,avx512vl")]
4495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4496#[rustc_legacy_const_generics(3)]
4497#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4498pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
4499    k1: __mmask16,
4500    a: __m128i,
4501    b: __m128i,
4502) -> __mmask16 {
4503    static_assert_uimm_bits!(IMM8, 3);
4504    let a = a.as_i8x16();
4505    let b = b.as_i8x16();
4506    let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO);
4507    let r = match IMM8 {
4508        0 => simd_and(k1, simd_eq(a, b)),
4509        1 => simd_and(k1, simd_lt(a, b)),
4510        2 => simd_and(k1, simd_le(a, b)),
4511        3 => i8x16::ZERO,
4512        4 => simd_and(k1, simd_ne(a, b)),
4513        5 => simd_and(k1, simd_ge(a, b)),
4514        6 => simd_and(k1, simd_gt(a, b)),
4515        _ => i8x16::splat(-1),
4516    };
4517    simd_bitmask(r)
4518}
4519
4520/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4521///
4522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi16)
4523#[inline]
4524#[target_feature(enable = "avx512bw,avx512vl")]
4525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4526pub unsafe fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4527    simd_reduce_add_unordered(a.as_i16x16())
4528}
4529
4530/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4531///
4532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi16)
4533#[inline]
4534#[target_feature(enable = "avx512bw,avx512vl")]
4535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4536pub unsafe fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4537    simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO))
4538}
4539
4540/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
4541///
4542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi16)
4543#[inline]
4544#[target_feature(enable = "avx512bw,avx512vl")]
4545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4546pub unsafe fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4547    simd_reduce_add_unordered(a.as_i16x8())
4548}
4549
4550/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4551///
4552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi16)
4553#[inline]
4554#[target_feature(enable = "avx512bw,avx512vl")]
4555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4556pub unsafe fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4557    simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO))
4558}
4559
4560/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4561///
4562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_add_epi8)
4563#[inline]
4564#[target_feature(enable = "avx512bw,avx512vl")]
4565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4566pub unsafe fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4567    simd_reduce_add_unordered(a.as_i8x32())
4568}
4569
4570/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4571///
4572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_add_epi8)
4573#[inline]
4574#[target_feature(enable = "avx512bw,avx512vl")]
4575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4576pub unsafe fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4577    simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO))
4578}
4579
4580/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_add_epi8)
4583#[inline]
4584#[target_feature(enable = "avx512bw,avx512vl")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586pub unsafe fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4587    simd_reduce_add_unordered(a.as_i8x16())
4588}
4589
4590/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
4591///
4592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_add_epi8)
4593#[inline]
4594#[target_feature(enable = "avx512bw,avx512vl")]
4595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4596pub unsafe fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4597    simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO))
4598}
4599
4600/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4601///
4602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi16)
4603#[inline]
4604#[target_feature(enable = "avx512bw,avx512vl")]
4605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4606pub unsafe fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
4607    simd_reduce_and(a.as_i16x16())
4608}
4609
4610/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4611///
4612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi16)
4613#[inline]
4614#[target_feature(enable = "avx512bw,avx512vl")]
4615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4616pub unsafe fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
4617    simd_reduce_and(simd_select_bitmask(
4618        k,
4619        a.as_i16x16(),
4620        _mm256_set1_epi64x(-1).as_i16x16(),
4621    ))
4622}
4623
4624/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi16)
4627#[inline]
4628#[target_feature(enable = "avx512bw,avx512vl")]
4629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630pub unsafe fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
4631    simd_reduce_and(a.as_i16x8())
4632}
4633
4634/// Reduce the packed 16-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4635///
4636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi16)
4637#[inline]
4638#[target_feature(enable = "avx512bw,avx512vl")]
4639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4640pub unsafe fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
4641    simd_reduce_and(simd_select_bitmask(
4642        k,
4643        a.as_i16x8(),
4644        _mm_set1_epi64x(-1).as_i16x8(),
4645    ))
4646}
4647
4648/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4649///
4650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_and_epi8)
4651#[inline]
4652#[target_feature(enable = "avx512bw,avx512vl")]
4653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4654pub unsafe fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
4655    simd_reduce_and(a.as_i8x32())
4656}
4657
4658/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4659///
4660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_and_epi8)
4661#[inline]
4662#[target_feature(enable = "avx512bw,avx512vl")]
4663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4664pub unsafe fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
4665    simd_reduce_and(simd_select_bitmask(
4666        k,
4667        a.as_i8x32(),
4668        _mm256_set1_epi64x(-1).as_i8x32(),
4669    ))
4670}
4671
4672/// Reduce the packed 8-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
4673///
4674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_and_epi8)
4675#[inline]
4676#[target_feature(enable = "avx512bw,avx512vl")]
4677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4678pub unsafe fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
4679    simd_reduce_and(a.as_i8x16())
4680}
4681
4682/// Reduce the packed 8-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
4683///
4684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_and_epi8)
4685#[inline]
4686#[target_feature(enable = "avx512bw,avx512vl")]
4687#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4688pub unsafe fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
4689    simd_reduce_and(simd_select_bitmask(
4690        k,
4691        a.as_i8x16(),
4692        _mm_set1_epi64x(-1).as_i8x16(),
4693    ))
4694}
4695
4696/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4697///
4698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi16)
4699#[inline]
4700#[target_feature(enable = "avx512bw,avx512vl")]
4701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4702pub unsafe fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
4703    simd_reduce_max(a.as_i16x16())
4704}
4705
4706/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4707///
4708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi16)
4709#[inline]
4710#[target_feature(enable = "avx512bw,avx512vl")]
4711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4712pub unsafe fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
4713    simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768)))
4714}
4715
4716/// Reduce the packed 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4717///
4718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi16)
4719#[inline]
4720#[target_feature(enable = "avx512bw,avx512vl")]
4721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4722pub unsafe fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
4723    simd_reduce_max(a.as_i16x8())
4724}
4725
4726/// Reduce the packed 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4727///
4728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi16)
4729#[inline]
4730#[target_feature(enable = "avx512bw,avx512vl")]
4731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4732pub unsafe fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
4733    simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768)))
4734}
4735
4736/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
4737///
4738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epi8)
4739#[inline]
4740#[target_feature(enable = "avx512bw,avx512vl")]
4741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4742pub unsafe fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
4743    simd_reduce_max(a.as_i8x32())
4744}
4745
4746/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4747///
4748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epi8)
4749#[inline]
4750#[target_feature(enable = "avx512bw,avx512vl")]
4751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4752pub unsafe fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
4753    simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128)))
4754}
4755
4756/// Reduce the packed 8-bit integers in a by maximum. Returns the maximum of all elements in a.
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epi8)
4759#[inline]
4760#[target_feature(enable = "avx512bw,avx512vl")]
4761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762pub unsafe fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
4763    simd_reduce_max(a.as_i8x16())
4764}
4765
4766/// Reduce the packed 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4767///
4768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epi8)
4769#[inline]
4770#[target_feature(enable = "avx512bw,avx512vl")]
4771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4772pub unsafe fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
4773    simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128)))
4774}
4775
4776/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4777///
4778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu16)
4779#[inline]
4780#[target_feature(enable = "avx512bw,avx512vl")]
4781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4782pub unsafe fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
4783    simd_reduce_max(a.as_u16x16())
4784}
4785
4786/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4787///
4788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu16)
4789#[inline]
4790#[target_feature(enable = "avx512bw,avx512vl")]
4791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4792pub unsafe fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
4793    simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO))
4794}
4795
4796/// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a.
4797///
4798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu16)
4799#[inline]
4800#[target_feature(enable = "avx512bw,avx512vl")]
4801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4802pub unsafe fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
4803    simd_reduce_max(a.as_u16x8())
4804}
4805
4806/// Reduce the packed unsigned 16-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4807///
4808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu16)
4809#[inline]
4810#[target_feature(enable = "avx512bw,avx512vl")]
4811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4812pub unsafe fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
4813    simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO))
4814}
4815
4816/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
4817///
4818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_max_epu8)
4819#[inline]
4820#[target_feature(enable = "avx512bw,avx512vl")]
4821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4822pub unsafe fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
4823    simd_reduce_max(a.as_u8x32())
4824}
4825
4826/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4827///
4828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_max_epu8)
4829#[inline]
4830#[target_feature(enable = "avx512bw,avx512vl")]
4831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4832pub unsafe fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
4833    simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO))
4834}
4835
4836/// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a.
4837///
4838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_max_epu8)
4839#[inline]
4840#[target_feature(enable = "avx512bw,avx512vl")]
4841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4842pub unsafe fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
4843    simd_reduce_max(a.as_u8x16())
4844}
4845
4846/// Reduce the packed unsigned 8-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
4847///
4848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_max_epu8)
4849#[inline]
4850#[target_feature(enable = "avx512bw,avx512vl")]
4851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4852pub unsafe fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
4853    simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO))
4854}
4855
4856/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
4857///
4858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi16)
4859#[inline]
4860#[target_feature(enable = "avx512bw,avx512vl")]
4861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4862pub unsafe fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
4863    simd_reduce_min(a.as_i16x16())
4864}
4865
4866/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi16)
4869#[inline]
4870#[target_feature(enable = "avx512bw,avx512vl")]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872pub unsafe fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
4873    simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff)))
4874}
4875
4876/// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a.
4877///
4878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi16)
4879#[inline]
4880#[target_feature(enable = "avx512bw,avx512vl")]
4881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4882pub unsafe fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
4883    simd_reduce_min(a.as_i16x8())
4884}
4885
4886/// Reduce the packed 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4887///
4888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi16)
4889#[inline]
4890#[target_feature(enable = "avx512bw,avx512vl")]
4891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4892pub unsafe fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
4893    simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff)))
4894}
4895
4896/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
4897///
4898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epi8)
4899#[inline]
4900#[target_feature(enable = "avx512bw,avx512vl")]
4901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4902pub unsafe fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
4903    simd_reduce_min(a.as_i8x32())
4904}
4905
4906/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4907///
4908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epi8)
4909#[inline]
4910#[target_feature(enable = "avx512bw,avx512vl")]
4911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4912pub unsafe fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
4913    simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f)))
4914}
4915
4916/// Reduce the packed 8-bit integers in a by minimum. Returns the minimum of all elements in a.
4917///
4918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epi8)
4919#[inline]
4920#[target_feature(enable = "avx512bw,avx512vl")]
4921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4922pub unsafe fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
4923    simd_reduce_min(a.as_i8x16())
4924}
4925
4926/// Reduce the packed 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4927///
4928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epi8)
4929#[inline]
4930#[target_feature(enable = "avx512bw,avx512vl")]
4931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4932pub unsafe fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
4933    simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f)))
4934}
4935
4936/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
4937///
4938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu16)
4939#[inline]
4940#[target_feature(enable = "avx512bw,avx512vl")]
4941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4942pub unsafe fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
4943    simd_reduce_min(a.as_u16x16())
4944}
4945
4946/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4947///
4948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu16)
4949#[inline]
4950#[target_feature(enable = "avx512bw,avx512vl")]
4951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4952pub unsafe fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
4953    simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff)))
4954}
4955
4956/// Reduce the packed unsigned 16-bit integers in a by minimum. Returns the minimum of all elements in a.
4957///
4958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu16)
4959#[inline]
4960#[target_feature(enable = "avx512bw,avx512vl")]
4961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4962pub unsafe fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
4963    simd_reduce_min(a.as_u16x8())
4964}
4965
4966/// Reduce the packed unsigned 16-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4967///
4968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu16)
4969#[inline]
4970#[target_feature(enable = "avx512bw,avx512vl")]
4971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4972pub unsafe fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
4973    simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff)))
4974}
4975
4976/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_min_epu8)
4979#[inline]
4980#[target_feature(enable = "avx512bw,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982pub unsafe fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
4983    simd_reduce_min(a.as_u8x32())
4984}
4985
4986/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
4987///
4988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_min_epu8)
4989#[inline]
4990#[target_feature(enable = "avx512bw,avx512vl")]
4991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4992pub unsafe fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
4993    simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff)))
4994}
4995
4996/// Reduce the packed unsigned 8-bit integers in a by minimum. Returns the minimum of all elements in a.
4997///
4998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_min_epu8)
4999#[inline]
5000#[target_feature(enable = "avx512bw,avx512vl")]
5001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5002pub unsafe fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
5003    simd_reduce_min(a.as_u8x16())
5004}
5005
5006/// Reduce the packed unsigned 8-bit integers in a by minimum using mask k. Returns the minimum of all active elements in a.
5007///
5008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_min_epu8)
5009#[inline]
5010#[target_feature(enable = "avx512bw,avx512vl")]
5011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5012pub unsafe fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
5013    simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff)))
5014}
5015
5016/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5017///
5018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi16)
5019#[inline]
5020#[target_feature(enable = "avx512bw,avx512vl")]
5021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5022pub unsafe fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5023    simd_reduce_mul_unordered(a.as_i16x16())
5024}
5025
5026/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5027///
5028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi16)
5029#[inline]
5030#[target_feature(enable = "avx512bw,avx512vl")]
5031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5032pub unsafe fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5033    simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)))
5034}
5035
5036/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
5037///
5038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi16)
5039#[inline]
5040#[target_feature(enable = "avx512bw,avx512vl")]
5041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5042pub unsafe fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5043    simd_reduce_mul_unordered(a.as_i16x8())
5044}
5045
5046/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5047///
5048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi16)
5049#[inline]
5050#[target_feature(enable = "avx512bw,avx512vl")]
5051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5052pub unsafe fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5053    simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)))
5054}
5055
5056/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5057///
5058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_mul_epi8)
5059#[inline]
5060#[target_feature(enable = "avx512bw,avx512vl")]
5061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5062pub unsafe fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5063    simd_reduce_mul_unordered(a.as_i8x32())
5064}
5065
5066/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5067///
5068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_mul_epi8)
5069#[inline]
5070#[target_feature(enable = "avx512bw,avx512vl")]
5071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5072pub unsafe fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5073    simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)))
5074}
5075
5076/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
5077///
5078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_mul_epi8)
5079#[inline]
5080#[target_feature(enable = "avx512bw,avx512vl")]
5081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5082pub unsafe fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5083    simd_reduce_mul_unordered(a.as_i8x16())
5084}
5085
5086/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_mul_epi8)
5089#[inline]
5090#[target_feature(enable = "avx512bw,avx512vl")]
5091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092pub unsafe fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5093    simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)))
5094}
5095
5096/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5097///
5098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi16)
5099#[inline]
5100#[target_feature(enable = "avx512bw,avx512vl")]
5101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5102pub unsafe fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
5103    simd_reduce_or(a.as_i16x16())
5104}
5105
5106/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5107///
5108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi16)
5109#[inline]
5110#[target_feature(enable = "avx512bw,avx512vl")]
5111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5112pub unsafe fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
5113    simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO))
5114}
5115
5116/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5117///
5118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi16)
5119#[inline]
5120#[target_feature(enable = "avx512bw,avx512vl")]
5121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5122pub unsafe fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
5123    simd_reduce_or(a.as_i16x8())
5124}
5125
5126/// Reduce the packed 16-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5127///
5128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi16)
5129#[inline]
5130#[target_feature(enable = "avx512bw,avx512vl")]
5131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5132pub unsafe fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
5133    simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO))
5134}
5135
5136/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5137///
5138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_or_epi8)
5139#[inline]
5140#[target_feature(enable = "avx512bw,avx512vl")]
5141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5142pub unsafe fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
5143    simd_reduce_or(a.as_i8x32())
5144}
5145
5146/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5147///
5148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_or_epi8)
5149#[inline]
5150#[target_feature(enable = "avx512bw,avx512vl")]
5151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5152pub unsafe fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
5153    simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO))
5154}
5155
5156/// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
5157///
5158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_or_epi8)
5159#[inline]
5160#[target_feature(enable = "avx512bw,avx512vl")]
5161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5162pub unsafe fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
5163    simd_reduce_or(a.as_i8x16())
5164}
5165
5166/// Reduce the packed 8-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
5167///
5168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_or_epi8)
5169#[inline]
5170#[target_feature(enable = "avx512bw,avx512vl")]
5171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5172pub unsafe fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
5173    simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO))
5174}
5175
5176/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5177///
5178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi16&expand=3368)
5179#[inline]
5180#[target_feature(enable = "avx512bw")]
5181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5182#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5183pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
5184    ptr::read_unaligned(mem_addr as *const __m512i)
5185}
5186
5187/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5188///
5189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi16&expand=3365)
5190#[inline]
5191#[target_feature(enable = "avx512bw,avx512vl")]
5192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5193#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5194pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
5195    ptr::read_unaligned(mem_addr as *const __m256i)
5196}
5197
5198/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5199///
5200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi16&expand=3362)
5201#[inline]
5202#[target_feature(enable = "avx512bw,avx512vl")]
5203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5204#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5205pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
5206    ptr::read_unaligned(mem_addr as *const __m128i)
5207}
5208
5209/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5210///
5211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi8&expand=3395)
5212#[inline]
5213#[target_feature(enable = "avx512bw")]
5214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5215#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5216pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
5217    ptr::read_unaligned(mem_addr as *const __m512i)
5218}
5219
5220/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5221///
5222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi8&expand=3392)
5223#[inline]
5224#[target_feature(enable = "avx512bw,avx512vl")]
5225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5226#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5227pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
5228    ptr::read_unaligned(mem_addr as *const __m256i)
5229}
5230
5231/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
5232///
5233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi8&expand=3389)
5234#[inline]
5235#[target_feature(enable = "avx512bw,avx512vl")]
5236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5237#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5238pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
5239    ptr::read_unaligned(mem_addr as *const __m128i)
5240}
5241
5242/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5243///
5244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi16&expand=5622)
5245#[inline]
5246#[target_feature(enable = "avx512bw")]
5247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5248#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5249pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
5250    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5251}
5252
5253/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5254///
5255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi16&expand=5620)
5256#[inline]
5257#[target_feature(enable = "avx512bw,avx512vl")]
5258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5259#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5260pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
5261    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5262}
5263
5264/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5265///
5266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi16&expand=5618)
5267#[inline]
5268#[target_feature(enable = "avx512bw,avx512vl")]
5269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5270#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
5271pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
5272    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5273}
5274
5275/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5276///
5277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi8&expand=5640)
5278#[inline]
5279#[target_feature(enable = "avx512bw")]
5280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5281#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5282pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
5283    ptr::write_unaligned(mem_addr as *mut __m512i, a);
5284}
5285
5286/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5287///
5288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi8&expand=5638)
5289#[inline]
5290#[target_feature(enable = "avx512bw,avx512vl")]
5291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5292#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5293pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
5294    ptr::write_unaligned(mem_addr as *mut __m256i, a);
5295}
5296
5297/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
5298///
5299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi8&expand=5636)
5300#[inline]
5301#[target_feature(enable = "avx512bw,avx512vl")]
5302#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5303#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
5304pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
5305    ptr::write_unaligned(mem_addr as *mut __m128i, a);
5306}
5307
5308/// Load packed 16-bit integers from memory into dst using writemask k
5309/// (elements are copied from src when the corresponding mask bit is not set).
5310/// mem_addr does not need to be aligned on any particular boundary.
5311///
5312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi16)
5313#[inline]
5314#[target_feature(enable = "avx512bw")]
5315#[cfg_attr(test, assert_instr(vmovdqu16))]
5316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5317pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
5318    transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
5319}
5320
5321/// Load packed 16-bit integers from memory into dst using zeromask k
5322/// (elements are zeroed out when the corresponding mask bit is not set).
5323/// mem_addr does not need to be aligned on any particular boundary.
5324///
5325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi16)
5326#[inline]
5327#[target_feature(enable = "avx512bw")]
5328#[cfg_attr(test, assert_instr(vmovdqu16))]
5329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5330pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
5331    _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
5332}
5333
5334/// Load packed 8-bit integers from memory into dst using writemask k
5335/// (elements are copied from src when the corresponding mask bit is not set).
5336/// mem_addr does not need to be aligned on any particular boundary.
5337///
5338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi8)
5339#[inline]
5340#[target_feature(enable = "avx512bw")]
5341#[cfg_attr(test, assert_instr(vmovdqu8))]
5342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5343pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
5344    transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
5345}
5346
5347/// Load packed 8-bit integers from memory into dst using zeromask k
5348/// (elements are zeroed out when the corresponding mask bit is not set).
5349/// mem_addr does not need to be aligned on any particular boundary.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi8)
5352#[inline]
5353#[target_feature(enable = "avx512bw")]
5354#[cfg_attr(test, assert_instr(vmovdqu8))]
5355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5356pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
5357    _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
5358}
5359
5360/// Load packed 16-bit integers from memory into dst using writemask k
5361/// (elements are copied from src when the corresponding mask bit is not set).
5362/// mem_addr does not need to be aligned on any particular boundary.
5363///
5364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi16)
5365#[inline]
5366#[target_feature(enable = "avx512bw,avx512vl")]
5367#[cfg_attr(test, assert_instr(vmovdqu16))]
5368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5369pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
5370    transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
5371}
5372
5373/// Load packed 16-bit integers from memory into dst using zeromask k
5374/// (elements are zeroed out when the corresponding mask bit is not set).
5375/// mem_addr does not need to be aligned on any particular boundary.
5376///
5377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi16)
5378#[inline]
5379#[target_feature(enable = "avx512bw,avx512vl")]
5380#[cfg_attr(test, assert_instr(vmovdqu16))]
5381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5382pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
5383    _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
5384}
5385
5386/// Load packed 8-bit integers from memory into dst using writemask k
5387/// (elements are copied from src when the corresponding mask bit is not set).
5388/// mem_addr does not need to be aligned on any particular boundary.
5389///
5390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi8)
5391#[inline]
5392#[target_feature(enable = "avx512bw,avx512vl")]
5393#[cfg_attr(test, assert_instr(vmovdqu8))]
5394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5395pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
5396    transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
5397}
5398
5399/// Load packed 8-bit integers from memory into dst using zeromask k
5400/// (elements are zeroed out when the corresponding mask bit is not set).
5401/// mem_addr does not need to be aligned on any particular boundary.
5402///
5403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi8)
5404#[inline]
5405#[target_feature(enable = "avx512bw,avx512vl")]
5406#[cfg_attr(test, assert_instr(vmovdqu8))]
5407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5408pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
5409    _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
5410}
5411
5412/// Load packed 16-bit integers from memory into dst using writemask k
5413/// (elements are copied from src when the corresponding mask bit is not set).
5414/// mem_addr does not need to be aligned on any particular boundary.
5415///
5416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi16)
5417#[inline]
5418#[target_feature(enable = "avx512bw,avx512vl")]
5419#[cfg_attr(test, assert_instr(vmovdqu16))]
5420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5421pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
5422    transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
5423}
5424
5425/// Load packed 16-bit integers from memory into dst using zeromask k
5426/// (elements are zeroed out when the corresponding mask bit is not set).
5427/// mem_addr does not need to be aligned on any particular boundary.
5428///
5429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi16)
5430#[inline]
5431#[target_feature(enable = "avx512bw,avx512vl")]
5432#[cfg_attr(test, assert_instr(vmovdqu16))]
5433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5434pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
5435    _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
5436}
5437
5438/// Load packed 8-bit integers from memory into dst using writemask k
5439/// (elements are copied from src when the corresponding mask bit is not set).
5440/// mem_addr does not need to be aligned on any particular boundary.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi8)
5443#[inline]
5444#[target_feature(enable = "avx512bw,avx512vl")]
5445#[cfg_attr(test, assert_instr(vmovdqu8))]
5446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5447pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
5448    transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
5449}
5450
5451/// Load packed 8-bit integers from memory into dst using zeromask k
5452/// (elements are zeroed out when the corresponding mask bit is not set).
5453/// mem_addr does not need to be aligned on any particular boundary.
5454///
5455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi8)
5456#[inline]
5457#[target_feature(enable = "avx512bw,avx512vl")]
5458#[cfg_attr(test, assert_instr(vmovdqu8))]
5459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5460pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
5461    _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
5462}
5463
5464/// Store packed 16-bit integers from a into memory using writemask k.
5465/// mem_addr does not need to be aligned on any particular boundary.
5466///
5467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi16)
5468#[inline]
5469#[target_feature(enable = "avx512bw")]
5470#[cfg_attr(test, assert_instr(vmovdqu16))]
5471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5472pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
5473    storedqu16_512(mem_addr, a.as_i16x32(), mask)
5474}
5475
5476/// Store packed 8-bit integers from a into memory using writemask k.
5477/// mem_addr does not need to be aligned on any particular boundary.
5478///
5479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi8)
5480#[inline]
5481#[target_feature(enable = "avx512bw")]
5482#[cfg_attr(test, assert_instr(vmovdqu8))]
5483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5484pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
5485    storedqu8_512(mem_addr, a.as_i8x64(), mask)
5486}
5487
5488/// Store packed 16-bit integers from a into memory using writemask k.
5489/// mem_addr does not need to be aligned on any particular boundary.
5490///
5491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi16)
5492#[inline]
5493#[target_feature(enable = "avx512bw,avx512vl")]
5494#[cfg_attr(test, assert_instr(vmovdqu16))]
5495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5496pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
5497    storedqu16_256(mem_addr, a.as_i16x16(), mask)
5498}
5499
5500/// Store packed 8-bit integers from a into memory using writemask k.
5501/// mem_addr does not need to be aligned on any particular boundary.
5502///
5503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi8)
5504#[inline]
5505#[target_feature(enable = "avx512bw,avx512vl")]
5506#[cfg_attr(test, assert_instr(vmovdqu8))]
5507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
5509    storedqu8_256(mem_addr, a.as_i8x32(), mask)
5510}
5511
5512/// Store packed 16-bit integers from a into memory using writemask k.
5513/// mem_addr does not need to be aligned on any particular boundary.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi16)
5516#[inline]
5517#[target_feature(enable = "avx512bw,avx512vl")]
5518#[cfg_attr(test, assert_instr(vmovdqu16))]
5519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5520pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
5521    storedqu16_128(mem_addr, a.as_i16x8(), mask)
5522}
5523
5524/// Store packed 8-bit integers from a into memory using writemask k.
5525/// mem_addr does not need to be aligned on any particular boundary.
5526///
5527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi8)
5528#[inline]
5529#[target_feature(enable = "avx512bw,avx512vl")]
5530#[cfg_attr(test, assert_instr(vmovdqu8))]
5531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5532pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
5533    storedqu8_128(mem_addr, a.as_i8x16(), mask)
5534}
5535
5536/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
5537///
5538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_madd_epi16&expand=3511)
5539#[inline]
5540#[target_feature(enable = "avx512bw")]
5541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5542#[cfg_attr(test, assert_instr(vpmaddwd))]
5543pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
5544    transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32()))
5545}
5546
5547/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5548///
5549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_madd_epi16&expand=3512)
5550#[inline]
5551#[target_feature(enable = "avx512bw")]
5552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5553#[cfg_attr(test, assert_instr(vpmaddwd))]
5554pub unsafe fn _mm512_mask_madd_epi16(
5555    src: __m512i,
5556    k: __mmask16,
5557    a: __m512i,
5558    b: __m512i,
5559) -> __m512i {
5560    let madd = _mm512_madd_epi16(a, b).as_i32x16();
5561    transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
5562}
5563
5564/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5565///
5566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_madd_epi16&expand=3513)
5567#[inline]
5568#[target_feature(enable = "avx512bw")]
5569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5570#[cfg_attr(test, assert_instr(vpmaddwd))]
5571pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
5572    let madd = _mm512_madd_epi16(a, b).as_i32x16();
5573    transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
5574}
5575
5576/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5577///
5578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_madd_epi16&expand=3509)
5579#[inline]
5580#[target_feature(enable = "avx512bw,avx512vl")]
5581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5582#[cfg_attr(test, assert_instr(vpmaddwd))]
5583pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5584    let madd = _mm256_madd_epi16(a, b).as_i32x8();
5585    transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
5586}
5587
5588/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5589///
5590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_madd_epi16&expand=3510)
5591#[inline]
5592#[target_feature(enable = "avx512bw,avx512vl")]
5593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5594#[cfg_attr(test, assert_instr(vpmaddwd))]
5595pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
5596    let madd = _mm256_madd_epi16(a, b).as_i32x8();
5597    transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
5598}
5599
5600/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5601///
5602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_madd_epi16&expand=3506)
5603#[inline]
5604#[target_feature(enable = "avx512bw,avx512vl")]
5605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5606#[cfg_attr(test, assert_instr(vpmaddwd))]
5607pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5608    let madd = _mm_madd_epi16(a, b).as_i32x4();
5609    transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
5610}
5611
5612/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5613///
5614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_madd_epi16&expand=3507)
5615#[inline]
5616#[target_feature(enable = "avx512bw,avx512vl")]
5617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5618#[cfg_attr(test, assert_instr(vpmaddwd))]
5619pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5620    let madd = _mm_madd_epi16(a, b).as_i32x4();
5621    transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
5622}
5623
5624/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
5625///
5626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maddubs_epi16&expand=3539)
5627#[inline]
5628#[target_feature(enable = "avx512bw")]
5629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5630#[cfg_attr(test, assert_instr(vpmaddubsw))]
5631pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5632    transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64()))
5633}
5634
5635/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5636///
5637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_maddubs_epi16&expand=3540)
5638#[inline]
5639#[target_feature(enable = "avx512bw")]
5640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5641#[cfg_attr(test, assert_instr(vpmaddubsw))]
5642pub unsafe fn _mm512_mask_maddubs_epi16(
5643    src: __m512i,
5644    k: __mmask32,
5645    a: __m512i,
5646    b: __m512i,
5647) -> __m512i {
5648    let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5649    transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
5650}
5651
5652/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5653///
5654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_maddubs_epi16&expand=3541)
5655#[inline]
5656#[target_feature(enable = "avx512bw")]
5657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5658#[cfg_attr(test, assert_instr(vpmaddubsw))]
5659pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5660    let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
5661    transmute(simd_select_bitmask(k, madd, i16x32::ZERO))
5662}
5663
5664/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5665///
5666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_maddubs_epi16&expand=3537)
5667#[inline]
5668#[target_feature(enable = "avx512bw,avx512vl")]
5669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5670#[cfg_attr(test, assert_instr(vpmaddubsw))]
5671pub unsafe fn _mm256_mask_maddubs_epi16(
5672    src: __m256i,
5673    k: __mmask16,
5674    a: __m256i,
5675    b: __m256i,
5676) -> __m256i {
5677    let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5678    transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
5679}
5680
5681/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5682///
5683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_maddubs_epi16&expand=3538)
5684#[inline]
5685#[target_feature(enable = "avx512bw,avx512vl")]
5686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5687#[cfg_attr(test, assert_instr(vpmaddubsw))]
5688pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5689    let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
5690    transmute(simd_select_bitmask(k, madd, i16x16::ZERO))
5691}
5692
5693/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5694///
5695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_maddubs_epi16&expand=3534)
5696#[inline]
5697#[target_feature(enable = "avx512bw,avx512vl")]
5698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5699#[cfg_attr(test, assert_instr(vpmaddubsw))]
5700pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5701    let madd = _mm_maddubs_epi16(a, b).as_i16x8();
5702    transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
5703}
5704
5705/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5706///
5707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_maddubs_epi16&expand=3535)
5708#[inline]
5709#[target_feature(enable = "avx512bw,avx512vl")]
5710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5711#[cfg_attr(test, assert_instr(vpmaddubsw))]
5712pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5713    let madd = _mm_maddubs_epi16(a, b).as_i16x8();
5714    transmute(simd_select_bitmask(k, madd, i16x8::ZERO))
5715}
5716
5717/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
5718///
5719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi32&expand=4091)
5720#[inline]
5721#[target_feature(enable = "avx512bw")]
5722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5723#[cfg_attr(test, assert_instr(vpackssdw))]
5724pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
5725    transmute(vpackssdw(a.as_i32x16(), b.as_i32x16()))
5726}
5727
5728/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5729///
5730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi32&expand=4089)
5731#[inline]
5732#[target_feature(enable = "avx512bw")]
5733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5734#[cfg_attr(test, assert_instr(vpackssdw))]
5735pub unsafe fn _mm512_mask_packs_epi32(
5736    src: __m512i,
5737    k: __mmask32,
5738    a: __m512i,
5739    b: __m512i,
5740) -> __m512i {
5741    let pack = _mm512_packs_epi32(a, b).as_i16x32();
5742    transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
5743}
5744
5745/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5746///
5747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi32&expand=4090)
5748#[inline]
5749#[target_feature(enable = "avx512bw")]
5750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5751#[cfg_attr(test, assert_instr(vpackssdw))]
5752pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5753    let pack = _mm512_packs_epi32(a, b).as_i16x32();
5754    transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
5755}
5756
5757/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5758///
5759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi32&expand=4086)
5760#[inline]
5761#[target_feature(enable = "avx512bw,avx512vl")]
5762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5763#[cfg_attr(test, assert_instr(vpackssdw))]
5764pub unsafe fn _mm256_mask_packs_epi32(
5765    src: __m256i,
5766    k: __mmask16,
5767    a: __m256i,
5768    b: __m256i,
5769) -> __m256i {
5770    let pack = _mm256_packs_epi32(a, b).as_i16x16();
5771    transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
5772}
5773
5774/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5775///
5776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packs_epi32&expand=4087)
5777#[inline]
5778#[target_feature(enable = "avx512bw,avx512vl")]
5779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5780#[cfg_attr(test, assert_instr(vpackssdw))]
5781pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5782    let pack = _mm256_packs_epi32(a, b).as_i16x16();
5783    transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
5784}
5785
5786/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5787///
5788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi32&expand=4083)
5789#[inline]
5790#[target_feature(enable = "avx512bw,avx512vl")]
5791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5792#[cfg_attr(test, assert_instr(vpackssdw))]
5793pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5794    let pack = _mm_packs_epi32(a, b).as_i16x8();
5795    transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
5796}
5797
5798/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5799///
5800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi32&expand=4084)
5801#[inline]
5802#[target_feature(enable = "avx512bw,avx512vl")]
5803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5804#[cfg_attr(test, assert_instr(vpackssdw))]
5805pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5806    let pack = _mm_packs_epi32(a, b).as_i16x8();
5807    transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
5808}
5809
5810/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
5811///
5812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packs_epi16&expand=4082)
5813#[inline]
5814#[target_feature(enable = "avx512bw")]
5815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5816#[cfg_attr(test, assert_instr(vpacksswb))]
5817pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
5818    transmute(vpacksswb(a.as_i16x32(), b.as_i16x32()))
5819}
5820
5821/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5822///
5823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packs_epi16&expand=4080)
5824#[inline]
5825#[target_feature(enable = "avx512bw")]
5826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5827#[cfg_attr(test, assert_instr(vpacksswb))]
5828pub unsafe fn _mm512_mask_packs_epi16(
5829    src: __m512i,
5830    k: __mmask64,
5831    a: __m512i,
5832    b: __m512i,
5833) -> __m512i {
5834    let pack = _mm512_packs_epi16(a, b).as_i8x64();
5835    transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
5836}
5837
5838/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5839///
5840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packs_epi16&expand=4081)
5841#[inline]
5842#[target_feature(enable = "avx512bw")]
5843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5844#[cfg_attr(test, assert_instr(vpacksswb))]
5845pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
5846    let pack = _mm512_packs_epi16(a, b).as_i8x64();
5847    transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
5848}
5849
5850/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5851///
5852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packs_epi16&expand=4077)
5853#[inline]
5854#[target_feature(enable = "avx512bw,avx512vl")]
5855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5856#[cfg_attr(test, assert_instr(vpacksswb))]
5857pub unsafe fn _mm256_mask_packs_epi16(
5858    src: __m256i,
5859    k: __mmask32,
5860    a: __m256i,
5861    b: __m256i,
5862) -> __m256i {
5863    let pack = _mm256_packs_epi16(a, b).as_i8x32();
5864    transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
5865}
5866
5867/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5868///
5869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_maskz_packs_epi16&expand=4078)
5870#[inline]
5871#[target_feature(enable = "avx512bw,avx512vl")]
5872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5873#[cfg_attr(test, assert_instr(vpacksswb))]
5874pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
5875    let pack = _mm256_packs_epi16(a, b).as_i8x32();
5876    transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
5877}
5878
5879/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5880///
5881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packs_epi16&expand=4074)
5882#[inline]
5883#[target_feature(enable = "avx512bw,avx512vl")]
5884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5885#[cfg_attr(test, assert_instr(vpacksswb))]
5886pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
5887    let pack = _mm_packs_epi16(a, b).as_i8x16();
5888    transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
5889}
5890
5891/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5892///
5893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packs_epi16&expand=4075)
5894#[inline]
5895#[target_feature(enable = "avx512bw,avx512vl")]
5896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5897#[cfg_attr(test, assert_instr(vpacksswb))]
5898pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
5899    let pack = _mm_packs_epi16(a, b).as_i8x16();
5900    transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
5901}
5902
5903/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
5904///
5905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi32&expand=4130)
5906#[inline]
5907#[target_feature(enable = "avx512bw")]
5908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5909#[cfg_attr(test, assert_instr(vpackusdw))]
5910pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
5911    transmute(vpackusdw(a.as_i32x16(), b.as_i32x16()))
5912}
5913
5914/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5915///
5916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi32&expand=4128)
5917#[inline]
5918#[target_feature(enable = "avx512bw")]
5919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5920#[cfg_attr(test, assert_instr(vpackusdw))]
5921pub unsafe fn _mm512_mask_packus_epi32(
5922    src: __m512i,
5923    k: __mmask32,
5924    a: __m512i,
5925    b: __m512i,
5926) -> __m512i {
5927    let pack = _mm512_packus_epi32(a, b).as_i16x32();
5928    transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
5929}
5930
5931/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5932///
5933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi32&expand=4129)
5934#[inline]
5935#[target_feature(enable = "avx512bw")]
5936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5937#[cfg_attr(test, assert_instr(vpackusdw))]
5938pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5939    let pack = _mm512_packus_epi32(a, b).as_i16x32();
5940    transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
5941}
5942
5943/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5944///
5945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi32&expand=4125)
5946#[inline]
5947#[target_feature(enable = "avx512bw,avx512vl")]
5948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5949#[cfg_attr(test, assert_instr(vpackusdw))]
5950pub unsafe fn _mm256_mask_packus_epi32(
5951    src: __m256i,
5952    k: __mmask16,
5953    a: __m256i,
5954    b: __m256i,
5955) -> __m256i {
5956    let pack = _mm256_packus_epi32(a, b).as_i16x16();
5957    transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
5958}
5959
5960/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5961///
5962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi32&expand=4126)
5963#[inline]
5964#[target_feature(enable = "avx512bw,avx512vl")]
5965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5966#[cfg_attr(test, assert_instr(vpackusdw))]
5967pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5968    let pack = _mm256_packus_epi32(a, b).as_i16x16();
5969    transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
5970}
5971
5972/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5973///
5974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi32&expand=4122)
5975#[inline]
5976#[target_feature(enable = "avx512bw,avx512vl")]
5977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5978#[cfg_attr(test, assert_instr(vpackusdw))]
5979pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5980    let pack = _mm_packus_epi32(a, b).as_i16x8();
5981    transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
5982}
5983
5984/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5985///
5986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi32&expand=4123)
5987#[inline]
5988#[target_feature(enable = "avx512bw,avx512vl")]
5989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5990#[cfg_attr(test, assert_instr(vpackusdw))]
5991pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5992    let pack = _mm_packus_epi32(a, b).as_i16x8();
5993    transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
5994}
5995
5996/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_packus_epi16&expand=4121)
5999#[inline]
6000#[target_feature(enable = "avx512bw")]
6001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002#[cfg_attr(test, assert_instr(vpackuswb))]
6003pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
6004    transmute(vpackuswb(a.as_i16x32(), b.as_i16x32()))
6005}
6006
6007/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6008///
6009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_packus_epi16&expand=4119)
6010#[inline]
6011#[target_feature(enable = "avx512bw")]
6012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6013#[cfg_attr(test, assert_instr(vpackuswb))]
6014pub unsafe fn _mm512_mask_packus_epi16(
6015    src: __m512i,
6016    k: __mmask64,
6017    a: __m512i,
6018    b: __m512i,
6019) -> __m512i {
6020    let pack = _mm512_packus_epi16(a, b).as_i8x64();
6021    transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
6022}
6023
6024/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6025///
6026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_packus_epi16&expand=4120)
6027#[inline]
6028#[target_feature(enable = "avx512bw")]
6029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6030#[cfg_attr(test, assert_instr(vpackuswb))]
6031pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6032    let pack = _mm512_packus_epi16(a, b).as_i8x64();
6033    transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
6034}
6035
6036/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6037///
6038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_packus_epi16&expand=4116)
6039#[inline]
6040#[target_feature(enable = "avx512bw,avx512vl")]
6041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6042#[cfg_attr(test, assert_instr(vpackuswb))]
6043pub unsafe fn _mm256_mask_packus_epi16(
6044    src: __m256i,
6045    k: __mmask32,
6046    a: __m256i,
6047    b: __m256i,
6048) -> __m256i {
6049    let pack = _mm256_packus_epi16(a, b).as_i8x32();
6050    transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
6051}
6052
6053/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6054///
6055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_packus_epi16&expand=4117)
6056#[inline]
6057#[target_feature(enable = "avx512bw,avx512vl")]
6058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6059#[cfg_attr(test, assert_instr(vpackuswb))]
6060pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6061    let pack = _mm256_packus_epi16(a, b).as_i8x32();
6062    transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
6063}
6064
6065/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6066///
6067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_packus_epi16&expand=4113)
6068#[inline]
6069#[target_feature(enable = "avx512bw,avx512vl")]
6070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6071#[cfg_attr(test, assert_instr(vpackuswb))]
6072pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6073    let pack = _mm_packus_epi16(a, b).as_i8x16();
6074    transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
6075}
6076
6077/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6078///
6079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_packus_epi16&expand=4114)
6080#[inline]
6081#[target_feature(enable = "avx512bw,avx512vl")]
6082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6083#[cfg_attr(test, assert_instr(vpackuswb))]
6084pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6085    let pack = _mm_packus_epi16(a, b).as_i8x16();
6086    transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
6087}
6088
6089/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
6090///
6091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu16&expand=388)
6092#[inline]
6093#[target_feature(enable = "avx512bw")]
6094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6095#[cfg_attr(test, assert_instr(vpavgw))]
6096pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
6097    let a = simd_cast::<_, u32x32>(a.as_u16x32());
6098    let b = simd_cast::<_, u32x32>(b.as_u16x32());
6099    let r = simd_shr(simd_add(simd_add(a, b), u32x32::splat(1)), u32x32::splat(1));
6100    transmute(simd_cast::<_, u16x32>(r))
6101}
6102
6103/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6104///
6105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu16&expand=389)
6106#[inline]
6107#[target_feature(enable = "avx512bw")]
6108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6109#[cfg_attr(test, assert_instr(vpavgw))]
6110pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6111    let avg = _mm512_avg_epu16(a, b).as_u16x32();
6112    transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
6113}
6114
6115/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6116///
6117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu16&expand=390)
6118#[inline]
6119#[target_feature(enable = "avx512bw")]
6120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6121#[cfg_attr(test, assert_instr(vpavgw))]
6122pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6123    let avg = _mm512_avg_epu16(a, b).as_u16x32();
6124    transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
6125}
6126
6127/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6128///
6129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu16&expand=386)
6130#[inline]
6131#[target_feature(enable = "avx512bw,avx512vl")]
6132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6133#[cfg_attr(test, assert_instr(vpavgw))]
6134pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6135    let avg = _mm256_avg_epu16(a, b).as_u16x16();
6136    transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
6137}
6138
6139/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6140///
6141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu16&expand=387)
6142#[inline]
6143#[target_feature(enable = "avx512bw,avx512vl")]
6144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6145#[cfg_attr(test, assert_instr(vpavgw))]
6146pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6147    let avg = _mm256_avg_epu16(a, b).as_u16x16();
6148    transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
6149}
6150
6151/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6152///
6153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu16&expand=383)
6154#[inline]
6155#[target_feature(enable = "avx512bw,avx512vl")]
6156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6157#[cfg_attr(test, assert_instr(vpavgw))]
6158pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6159    let avg = _mm_avg_epu16(a, b).as_u16x8();
6160    transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
6161}
6162
6163/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6164///
6165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu16&expand=384)
6166#[inline]
6167#[target_feature(enable = "avx512bw,avx512vl")]
6168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6169#[cfg_attr(test, assert_instr(vpavgw))]
6170pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6171    let avg = _mm_avg_epu16(a, b).as_u16x8();
6172    transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
6173}
6174
6175/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
6176///
6177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_avg_epu8&expand=397)
6178#[inline]
6179#[target_feature(enable = "avx512bw")]
6180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6181#[cfg_attr(test, assert_instr(vpavgb))]
6182pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
6183    let a = simd_cast::<_, u16x64>(a.as_u8x64());
6184    let b = simd_cast::<_, u16x64>(b.as_u8x64());
6185    let r = simd_shr(simd_add(simd_add(a, b), u16x64::splat(1)), u16x64::splat(1));
6186    transmute(simd_cast::<_, u8x64>(r))
6187}
6188
6189/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6190///
6191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_avg_epu8&expand=398)
6192#[inline]
6193#[target_feature(enable = "avx512bw")]
6194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6195#[cfg_attr(test, assert_instr(vpavgb))]
6196pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6197    let avg = _mm512_avg_epu8(a, b).as_u8x64();
6198    transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
6199}
6200
6201/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6202///
6203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_avg_epu8&expand=399)
6204#[inline]
6205#[target_feature(enable = "avx512bw")]
6206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6207#[cfg_attr(test, assert_instr(vpavgb))]
6208pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6209    let avg = _mm512_avg_epu8(a, b).as_u8x64();
6210    transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
6211}
6212
6213/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6214///
6215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_avg_epu8&expand=395)
6216#[inline]
6217#[target_feature(enable = "avx512bw,avx512vl")]
6218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6219#[cfg_attr(test, assert_instr(vpavgb))]
6220pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6221    let avg = _mm256_avg_epu8(a, b).as_u8x32();
6222    transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
6223}
6224
6225/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6226///
6227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_avg_epu8&expand=396)
6228#[inline]
6229#[target_feature(enable = "avx512bw,avx512vl")]
6230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6231#[cfg_attr(test, assert_instr(vpavgb))]
6232pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6233    let avg = _mm256_avg_epu8(a, b).as_u8x32();
6234    transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
6235}
6236
6237/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6238///
6239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_avg_epu8&expand=392)
6240#[inline]
6241#[target_feature(enable = "avx512bw,avx512vl")]
6242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6243#[cfg_attr(test, assert_instr(vpavgb))]
6244pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6245    let avg = _mm_avg_epu8(a, b).as_u8x16();
6246    transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
6247}
6248
6249/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6250///
6251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_avg_epu8&expand=393)
6252#[inline]
6253#[target_feature(enable = "avx512bw,avx512vl")]
6254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6255#[cfg_attr(test, assert_instr(vpavgb))]
6256pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6257    let avg = _mm_avg_epu8(a, b).as_u8x16();
6258    transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
6259}
6260
6261/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
6262///
6263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi16&expand=5271)
6264#[inline]
6265#[target_feature(enable = "avx512bw")]
6266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6267#[cfg_attr(test, assert_instr(vpsllw))]
6268pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
6269    transmute(vpsllw(a.as_i16x32(), count.as_i16x8()))
6270}
6271
6272/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6273///
6274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi16&expand=5269)
6275#[inline]
6276#[target_feature(enable = "avx512bw")]
6277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6278#[cfg_attr(test, assert_instr(vpsllw))]
6279pub unsafe fn _mm512_mask_sll_epi16(
6280    src: __m512i,
6281    k: __mmask32,
6282    a: __m512i,
6283    count: __m128i,
6284) -> __m512i {
6285    let shf = _mm512_sll_epi16(a, count).as_i16x32();
6286    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6287}
6288
6289/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6290///
6291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi16&expand=5270)
6292#[inline]
6293#[target_feature(enable = "avx512bw")]
6294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6295#[cfg_attr(test, assert_instr(vpsllw))]
6296pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6297    let shf = _mm512_sll_epi16(a, count).as_i16x32();
6298    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6299}
6300
6301/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6302///
6303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi16&expand=5266)
6304#[inline]
6305#[target_feature(enable = "avx512bw,avx512vl")]
6306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6307#[cfg_attr(test, assert_instr(vpsllw))]
6308pub unsafe fn _mm256_mask_sll_epi16(
6309    src: __m256i,
6310    k: __mmask16,
6311    a: __m256i,
6312    count: __m128i,
6313) -> __m256i {
6314    let shf = _mm256_sll_epi16(a, count).as_i16x16();
6315    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6316}
6317
6318/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi16&expand=5267)
6321#[inline]
6322#[target_feature(enable = "avx512bw,avx512vl")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vpsllw))]
6325pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6326    let shf = _mm256_sll_epi16(a, count).as_i16x16();
6327    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6328}
6329
6330/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6331///
6332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi16&expand=5263)
6333#[inline]
6334#[target_feature(enable = "avx512bw,avx512vl")]
6335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6336#[cfg_attr(test, assert_instr(vpsllw))]
6337pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6338    let shf = _mm_sll_epi16(a, count).as_i16x8();
6339    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6340}
6341
6342/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi16&expand=5264)
6345#[inline]
6346#[target_feature(enable = "avx512bw,avx512vl")]
6347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348#[cfg_attr(test, assert_instr(vpsllw))]
6349pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6350    let shf = _mm_sll_epi16(a, count).as_i16x8();
6351    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6352}
6353
6354/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi16&expand=5301)
6357#[inline]
6358#[target_feature(enable = "avx512bw")]
6359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6360#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6361#[rustc_legacy_const_generics(1)]
6362pub unsafe fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6363    static_assert_uimm_bits!(IMM8, 8);
6364    if IMM8 >= 16 {
6365        _mm512_setzero_si512()
6366    } else {
6367        transmute(simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6368    }
6369}
6370
6371/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6372///
6373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi16&expand=5299)
6374#[inline]
6375#[target_feature(enable = "avx512bw")]
6376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6377#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6378#[rustc_legacy_const_generics(3)]
6379pub unsafe fn _mm512_mask_slli_epi16<const IMM8: u32>(
6380    src: __m512i,
6381    k: __mmask32,
6382    a: __m512i,
6383) -> __m512i {
6384    static_assert_uimm_bits!(IMM8, 8);
6385    let shf = if IMM8 >= 16 {
6386        u16x32::ZERO
6387    } else {
6388        simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6389    };
6390    transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6391}
6392
6393/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6394///
6395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi16&expand=5300)
6396#[inline]
6397#[target_feature(enable = "avx512bw")]
6398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6399#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6400#[rustc_legacy_const_generics(2)]
6401pub unsafe fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6402    static_assert_uimm_bits!(IMM8, 8);
6403    if IMM8 >= 16 {
6404        _mm512_setzero_si512()
6405    } else {
6406        let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6407        transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6408    }
6409}
6410
6411/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6412///
6413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi16&expand=5296)
6414#[inline]
6415#[target_feature(enable = "avx512bw,avx512vl")]
6416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6417#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6418#[rustc_legacy_const_generics(3)]
6419pub unsafe fn _mm256_mask_slli_epi16<const IMM8: u32>(
6420    src: __m256i,
6421    k: __mmask16,
6422    a: __m256i,
6423) -> __m256i {
6424    static_assert_uimm_bits!(IMM8, 8);
6425    let shf = if IMM8 >= 16 {
6426        u16x16::ZERO
6427    } else {
6428        simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16))
6429    };
6430    transmute(simd_select_bitmask(k, shf, src.as_u16x16()))
6431}
6432
6433/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi16&expand=5297)
6436#[inline]
6437#[target_feature(enable = "avx512bw,avx512vl")]
6438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6440#[rustc_legacy_const_generics(2)]
6441pub unsafe fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6442    static_assert_uimm_bits!(IMM8, 8);
6443    if IMM8 >= 16 {
6444        _mm256_setzero_si256()
6445    } else {
6446        let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16));
6447        transmute(simd_select_bitmask(k, shf, u16x16::ZERO))
6448    }
6449}
6450
6451/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6452///
6453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi16&expand=5293)
6454#[inline]
6455#[target_feature(enable = "avx512bw,avx512vl")]
6456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6457#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6458#[rustc_legacy_const_generics(3)]
6459pub unsafe fn _mm_mask_slli_epi16<const IMM8: u32>(
6460    src: __m128i,
6461    k: __mmask8,
6462    a: __m128i,
6463) -> __m128i {
6464    static_assert_uimm_bits!(IMM8, 8);
6465    let shf = if IMM8 >= 16 {
6466        u16x8::ZERO
6467    } else {
6468        simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16))
6469    };
6470    transmute(simd_select_bitmask(k, shf, src.as_u16x8()))
6471}
6472
6473/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6474///
6475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi16&expand=5294)
6476#[inline]
6477#[target_feature(enable = "avx512bw,avx512vl")]
6478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6479#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
6480#[rustc_legacy_const_generics(2)]
6481pub unsafe fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6482    static_assert_uimm_bits!(IMM8, 8);
6483    if IMM8 >= 16 {
6484        _mm_setzero_si128()
6485    } else {
6486        let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16));
6487        transmute(simd_select_bitmask(k, shf, u16x8::ZERO))
6488    }
6489}
6490
6491/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi16&expand=5333)
6494#[inline]
6495#[target_feature(enable = "avx512bw")]
6496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6497#[cfg_attr(test, assert_instr(vpsllvw))]
6498pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
6499    transmute(vpsllvw(a.as_i16x32(), count.as_i16x32()))
6500}
6501
6502/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6503///
6504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi16&expand=5331)
6505#[inline]
6506#[target_feature(enable = "avx512bw")]
6507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6508#[cfg_attr(test, assert_instr(vpsllvw))]
6509pub unsafe fn _mm512_mask_sllv_epi16(
6510    src: __m512i,
6511    k: __mmask32,
6512    a: __m512i,
6513    count: __m512i,
6514) -> __m512i {
6515    let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6516    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6517}
6518
6519/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi16&expand=5332)
6522#[inline]
6523#[target_feature(enable = "avx512bw")]
6524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525#[cfg_attr(test, assert_instr(vpsllvw))]
6526pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6527    let shf = _mm512_sllv_epi16(a, count).as_i16x32();
6528    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6529}
6530
6531/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6532///
6533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi16&expand=5330)
6534#[inline]
6535#[target_feature(enable = "avx512bw,avx512vl")]
6536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6537#[cfg_attr(test, assert_instr(vpsllvw))]
6538pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
6539    transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16()))
6540}
6541
6542/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6543///
6544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi16&expand=5328)
6545#[inline]
6546#[target_feature(enable = "avx512bw,avx512vl")]
6547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6548#[cfg_attr(test, assert_instr(vpsllvw))]
6549pub unsafe fn _mm256_mask_sllv_epi16(
6550    src: __m256i,
6551    k: __mmask16,
6552    a: __m256i,
6553    count: __m256i,
6554) -> __m256i {
6555    let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6556    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6557}
6558
6559/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6560///
6561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi16&expand=5329)
6562#[inline]
6563#[target_feature(enable = "avx512bw,avx512vl")]
6564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6565#[cfg_attr(test, assert_instr(vpsllvw))]
6566pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6567    let shf = _mm256_sllv_epi16(a, count).as_i16x16();
6568    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6569}
6570
6571/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6572///
6573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi16&expand=5327)
6574#[inline]
6575#[target_feature(enable = "avx512bw,avx512vl")]
6576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6577#[cfg_attr(test, assert_instr(vpsllvw))]
6578pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
6579    transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8()))
6580}
6581
6582/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6583///
6584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi16&expand=5325)
6585#[inline]
6586#[target_feature(enable = "avx512bw,avx512vl")]
6587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6588#[cfg_attr(test, assert_instr(vpsllvw))]
6589pub unsafe fn _mm_mask_sllv_epi16(
6590    src: __m128i,
6591    k: __mmask8,
6592    a: __m128i,
6593    count: __m128i,
6594) -> __m128i {
6595    let shf = _mm_sllv_epi16(a, count).as_i16x8();
6596    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6597}
6598
6599/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6600///
6601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi16&expand=5326)
6602#[inline]
6603#[target_feature(enable = "avx512bw,avx512vl")]
6604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6605#[cfg_attr(test, assert_instr(vpsllvw))]
6606pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6607    let shf = _mm_sllv_epi16(a, count).as_i16x8();
6608    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6609}
6610
6611/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
6612///
6613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi16&expand=5483)
6614#[inline]
6615#[target_feature(enable = "avx512bw")]
6616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6617#[cfg_attr(test, assert_instr(vpsrlw))]
6618pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
6619    transmute(vpsrlw(a.as_i16x32(), count.as_i16x8()))
6620}
6621
6622/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6623///
6624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi16&expand=5481)
6625#[inline]
6626#[target_feature(enable = "avx512bw")]
6627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6628#[cfg_attr(test, assert_instr(vpsrlw))]
6629pub unsafe fn _mm512_mask_srl_epi16(
6630    src: __m512i,
6631    k: __mmask32,
6632    a: __m512i,
6633    count: __m128i,
6634) -> __m512i {
6635    let shf = _mm512_srl_epi16(a, count).as_i16x32();
6636    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6637}
6638
6639/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6640///
6641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi16&expand=5482)
6642#[inline]
6643#[target_feature(enable = "avx512bw")]
6644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6645#[cfg_attr(test, assert_instr(vpsrlw))]
6646pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6647    let shf = _mm512_srl_epi16(a, count).as_i16x32();
6648    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6649}
6650
6651/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6652///
6653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi16&expand=5478)
6654#[inline]
6655#[target_feature(enable = "avx512bw,avx512vl")]
6656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6657#[cfg_attr(test, assert_instr(vpsrlw))]
6658pub unsafe fn _mm256_mask_srl_epi16(
6659    src: __m256i,
6660    k: __mmask16,
6661    a: __m256i,
6662    count: __m128i,
6663) -> __m256i {
6664    let shf = _mm256_srl_epi16(a, count).as_i16x16();
6665    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6666}
6667
6668/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6669///
6670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi16&expand=5479)
6671#[inline]
6672#[target_feature(enable = "avx512bw,avx512vl")]
6673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6674#[cfg_attr(test, assert_instr(vpsrlw))]
6675pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
6676    let shf = _mm256_srl_epi16(a, count).as_i16x16();
6677    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6678}
6679
6680/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6681///
6682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi16&expand=5475)
6683#[inline]
6684#[target_feature(enable = "avx512bw,avx512vl")]
6685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6686#[cfg_attr(test, assert_instr(vpsrlw))]
6687pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6688    let shf = _mm_srl_epi16(a, count).as_i16x8();
6689    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6690}
6691
6692/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6693///
6694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi16&expand=5476)
6695#[inline]
6696#[target_feature(enable = "avx512bw,avx512vl")]
6697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6698#[cfg_attr(test, assert_instr(vpsrlw))]
6699pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6700    let shf = _mm_srl_epi16(a, count).as_i16x8();
6701    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6702}
6703
6704/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
6705///
6706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi16&expand=5513)
6707#[inline]
6708#[target_feature(enable = "avx512bw")]
6709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6710#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6711#[rustc_legacy_const_generics(1)]
6712pub unsafe fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
6713    static_assert_uimm_bits!(IMM8, 8);
6714    if IMM8 >= 16 {
6715        _mm512_setzero_si512()
6716    } else {
6717        transmute(simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)))
6718    }
6719}
6720
6721/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6722///
6723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi16&expand=5511)
6724#[inline]
6725#[target_feature(enable = "avx512bw")]
6726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6727#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6728#[rustc_legacy_const_generics(3)]
6729pub unsafe fn _mm512_mask_srli_epi16<const IMM8: u32>(
6730    src: __m512i,
6731    k: __mmask32,
6732    a: __m512i,
6733) -> __m512i {
6734    static_assert_uimm_bits!(IMM8, 8);
6735    let shf = if IMM8 >= 16 {
6736        u16x32::ZERO
6737    } else {
6738        simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16))
6739    };
6740    transmute(simd_select_bitmask(k, shf, src.as_u16x32()))
6741}
6742
6743/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6744///
6745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi16&expand=5512)
6746#[inline]
6747#[target_feature(enable = "avx512bw")]
6748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6749#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6750#[rustc_legacy_const_generics(2)]
6751pub unsafe fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
6752    static_assert_uimm_bits!(IMM8, 8);
6753    //imm8 should be u32, it seems the document to verify is incorrect
6754    if IMM8 >= 16 {
6755        _mm512_setzero_si512()
6756    } else {
6757        let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16));
6758        transmute(simd_select_bitmask(k, shf, u16x32::ZERO))
6759    }
6760}
6761
6762/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6763///
6764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi16&expand=5508)
6765#[inline]
6766#[target_feature(enable = "avx512bw,avx512vl")]
6767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6768#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6769#[rustc_legacy_const_generics(3)]
6770pub unsafe fn _mm256_mask_srli_epi16<const IMM8: i32>(
6771    src: __m256i,
6772    k: __mmask16,
6773    a: __m256i,
6774) -> __m256i {
6775    static_assert_uimm_bits!(IMM8, 8);
6776    let shf = _mm256_srli_epi16::<IMM8>(a);
6777    transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
6778}
6779
6780/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6781///
6782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi16&expand=5509)
6783#[inline]
6784#[target_feature(enable = "avx512bw,avx512vl")]
6785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6786#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6787#[rustc_legacy_const_generics(2)]
6788pub unsafe fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
6789    static_assert_uimm_bits!(IMM8, 8);
6790    let shf = _mm256_srli_epi16::<IMM8>(a);
6791    transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO))
6792}
6793
6794/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6795///
6796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi16&expand=5505)
6797#[inline]
6798#[target_feature(enable = "avx512bw,avx512vl")]
6799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6800#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6801#[rustc_legacy_const_generics(3)]
6802pub unsafe fn _mm_mask_srli_epi16<const IMM8: i32>(
6803    src: __m128i,
6804    k: __mmask8,
6805    a: __m128i,
6806) -> __m128i {
6807    static_assert_uimm_bits!(IMM8, 8);
6808    let shf = _mm_srli_epi16::<IMM8>(a);
6809    transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
6810}
6811
6812/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6813///
6814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi16&expand=5506)
6815#[inline]
6816#[target_feature(enable = "avx512bw,avx512vl")]
6817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6818#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
6819#[rustc_legacy_const_generics(2)]
6820pub unsafe fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
6821    static_assert_uimm_bits!(IMM8, 8);
6822    let shf = _mm_srli_epi16::<IMM8>(a);
6823    transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO))
6824}
6825
6826/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6827///
6828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi16&expand=5545)
6829#[inline]
6830#[target_feature(enable = "avx512bw")]
6831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6832#[cfg_attr(test, assert_instr(vpsrlvw))]
6833pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
6834    transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32()))
6835}
6836
6837/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6838///
6839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi16&expand=5543)
6840#[inline]
6841#[target_feature(enable = "avx512bw")]
6842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6843#[cfg_attr(test, assert_instr(vpsrlvw))]
6844pub unsafe fn _mm512_mask_srlv_epi16(
6845    src: __m512i,
6846    k: __mmask32,
6847    a: __m512i,
6848    count: __m512i,
6849) -> __m512i {
6850    let shf = _mm512_srlv_epi16(a, count).as_i16x32();
6851    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6852}
6853
6854/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi16&expand=5544)
6857#[inline]
6858#[target_feature(enable = "avx512bw")]
6859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860#[cfg_attr(test, assert_instr(vpsrlvw))]
6861pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6862    let shf = _mm512_srlv_epi16(a, count).as_i16x32();
6863    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6864}
6865
6866/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6867///
6868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi16&expand=5542)
6869#[inline]
6870#[target_feature(enable = "avx512bw,avx512vl")]
6871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6872#[cfg_attr(test, assert_instr(vpsrlvw))]
6873pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
6874    transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16()))
6875}
6876
6877/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6878///
6879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi16&expand=5540)
6880#[inline]
6881#[target_feature(enable = "avx512bw,avx512vl")]
6882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6883#[cfg_attr(test, assert_instr(vpsrlvw))]
6884pub unsafe fn _mm256_mask_srlv_epi16(
6885    src: __m256i,
6886    k: __mmask16,
6887    a: __m256i,
6888    count: __m256i,
6889) -> __m256i {
6890    let shf = _mm256_srlv_epi16(a, count).as_i16x16();
6891    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6892}
6893
6894/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6895///
6896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi16&expand=5541)
6897#[inline]
6898#[target_feature(enable = "avx512bw,avx512vl")]
6899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6900#[cfg_attr(test, assert_instr(vpsrlvw))]
6901pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6902    let shf = _mm256_srlv_epi16(a, count).as_i16x16();
6903    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
6904}
6905
6906/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
6907///
6908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi16&expand=5539)
6909#[inline]
6910#[target_feature(enable = "avx512bw,avx512vl")]
6911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6912#[cfg_attr(test, assert_instr(vpsrlvw))]
6913pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
6914    transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8()))
6915}
6916
6917/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6918///
6919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi16&expand=5537)
6920#[inline]
6921#[target_feature(enable = "avx512bw,avx512vl")]
6922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6923#[cfg_attr(test, assert_instr(vpsrlvw))]
6924pub unsafe fn _mm_mask_srlv_epi16(
6925    src: __m128i,
6926    k: __mmask8,
6927    a: __m128i,
6928    count: __m128i,
6929) -> __m128i {
6930    let shf = _mm_srlv_epi16(a, count).as_i16x8();
6931    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6932}
6933
6934/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6935///
6936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi16&expand=5538)
6937#[inline]
6938#[target_feature(enable = "avx512bw,avx512vl")]
6939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6940#[cfg_attr(test, assert_instr(vpsrlvw))]
6941pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6942    let shf = _mm_srlv_epi16(a, count).as_i16x8();
6943    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
6944}
6945
6946/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
6947///
6948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi16&expand=5398)
6949#[inline]
6950#[target_feature(enable = "avx512bw")]
6951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6952#[cfg_attr(test, assert_instr(vpsraw))]
6953pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
6954    transmute(vpsraw(a.as_i16x32(), count.as_i16x8()))
6955}
6956
6957/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6958///
6959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi16&expand=5396)
6960#[inline]
6961#[target_feature(enable = "avx512bw")]
6962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6963#[cfg_attr(test, assert_instr(vpsraw))]
6964pub unsafe fn _mm512_mask_sra_epi16(
6965    src: __m512i,
6966    k: __mmask32,
6967    a: __m512i,
6968    count: __m128i,
6969) -> __m512i {
6970    let shf = _mm512_sra_epi16(a, count).as_i16x32();
6971    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6972}
6973
6974/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6975///
6976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi16&expand=5397)
6977#[inline]
6978#[target_feature(enable = "avx512bw")]
6979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6980#[cfg_attr(test, assert_instr(vpsraw))]
6981pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
6982    let shf = _mm512_sra_epi16(a, count).as_i16x32();
6983    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
6984}
6985
6986/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6987///
6988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi16&expand=5393)
6989#[inline]
6990#[target_feature(enable = "avx512bw,avx512vl")]
6991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6992#[cfg_attr(test, assert_instr(vpsraw))]
6993pub unsafe fn _mm256_mask_sra_epi16(
6994    src: __m256i,
6995    k: __mmask16,
6996    a: __m256i,
6997    count: __m128i,
6998) -> __m256i {
6999    let shf = _mm256_sra_epi16(a, count).as_i16x16();
7000    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7001}
7002
7003/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7004///
7005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi16&expand=5394)
7006#[inline]
7007#[target_feature(enable = "avx512bw,avx512vl")]
7008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7009#[cfg_attr(test, assert_instr(vpsraw))]
7010pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
7011    let shf = _mm256_sra_epi16(a, count).as_i16x16();
7012    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7013}
7014
7015/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7016///
7017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi16&expand=5390)
7018#[inline]
7019#[target_feature(enable = "avx512bw,avx512vl")]
7020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7021#[cfg_attr(test, assert_instr(vpsraw))]
7022pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7023    let shf = _mm_sra_epi16(a, count).as_i16x8();
7024    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7025}
7026
7027/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7028///
7029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi16&expand=5391)
7030#[inline]
7031#[target_feature(enable = "avx512bw,avx512vl")]
7032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7033#[cfg_attr(test, assert_instr(vpsraw))]
7034pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7035    let shf = _mm_sra_epi16(a, count).as_i16x8();
7036    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7037}
7038
7039/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7040///
7041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi16&expand=5427)
7042#[inline]
7043#[target_feature(enable = "avx512bw")]
7044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7045#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7046#[rustc_legacy_const_generics(1)]
7047pub unsafe fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
7048    static_assert_uimm_bits!(IMM8, 8);
7049    transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
7050}
7051
7052/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7053///
7054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi16&expand=5425)
7055#[inline]
7056#[target_feature(enable = "avx512bw")]
7057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7058#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7059#[rustc_legacy_const_generics(3)]
7060pub unsafe fn _mm512_mask_srai_epi16<const IMM8: u32>(
7061    src: __m512i,
7062    k: __mmask32,
7063    a: __m512i,
7064) -> __m512i {
7065    static_assert_uimm_bits!(IMM8, 8);
7066    let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7067    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7068}
7069
7070/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7071///
7072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi16&expand=5426)
7073#[inline]
7074#[target_feature(enable = "avx512bw")]
7075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7076#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7077#[rustc_legacy_const_generics(2)]
7078pub unsafe fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
7079    static_assert_uimm_bits!(IMM8, 8);
7080    let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
7081    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7082}
7083
7084/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7085///
7086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi16&expand=5422)
7087#[inline]
7088#[target_feature(enable = "avx512bw,avx512vl")]
7089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7090#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7091#[rustc_legacy_const_generics(3)]
7092pub unsafe fn _mm256_mask_srai_epi16<const IMM8: u32>(
7093    src: __m256i,
7094    k: __mmask16,
7095    a: __m256i,
7096) -> __m256i {
7097    static_assert_uimm_bits!(IMM8, 8);
7098    let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7099    transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7100}
7101
7102/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7103///
7104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi16&expand=5423)
7105#[inline]
7106#[target_feature(enable = "avx512bw,avx512vl")]
7107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7108#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7109#[rustc_legacy_const_generics(2)]
7110pub unsafe fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
7111    static_assert_uimm_bits!(IMM8, 8);
7112    let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
7113    transmute(simd_select_bitmask(k, r, i16x16::ZERO))
7114}
7115
7116/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi16&expand=5419)
7119#[inline]
7120#[target_feature(enable = "avx512bw,avx512vl")]
7121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7123#[rustc_legacy_const_generics(3)]
7124pub unsafe fn _mm_mask_srai_epi16<const IMM8: u32>(
7125    src: __m128i,
7126    k: __mmask8,
7127    a: __m128i,
7128) -> __m128i {
7129    static_assert_uimm_bits!(IMM8, 8);
7130    let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7131    transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7132}
7133
7134/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7135///
7136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi16&expand=5420)
7137#[inline]
7138#[target_feature(enable = "avx512bw,avx512vl")]
7139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7140#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
7141#[rustc_legacy_const_generics(2)]
7142pub unsafe fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
7143    static_assert_uimm_bits!(IMM8, 8);
7144    let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
7145    transmute(simd_select_bitmask(k, r, i16x8::ZERO))
7146}
7147
7148/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7149///
7150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi16&expand=5456)
7151#[inline]
7152#[target_feature(enable = "avx512bw")]
7153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7154#[cfg_attr(test, assert_instr(vpsravw))]
7155pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
7156    transmute(vpsravw(a.as_i16x32(), count.as_i16x32()))
7157}
7158
7159/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7160///
7161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi16&expand=5454)
7162#[inline]
7163#[target_feature(enable = "avx512bw")]
7164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7165#[cfg_attr(test, assert_instr(vpsravw))]
7166pub unsafe fn _mm512_mask_srav_epi16(
7167    src: __m512i,
7168    k: __mmask32,
7169    a: __m512i,
7170    count: __m512i,
7171) -> __m512i {
7172    let shf = _mm512_srav_epi16(a, count).as_i16x32();
7173    transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
7174}
7175
7176/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7177///
7178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi16&expand=5455)
7179#[inline]
7180#[target_feature(enable = "avx512bw")]
7181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7182#[cfg_attr(test, assert_instr(vpsravw))]
7183pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
7184    let shf = _mm512_srav_epi16(a, count).as_i16x32();
7185    transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
7186}
7187
7188/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7189///
7190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi16&expand=5453)
7191#[inline]
7192#[target_feature(enable = "avx512bw,avx512vl")]
7193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7194#[cfg_attr(test, assert_instr(vpsravw))]
7195pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
7196    transmute(vpsravw256(a.as_i16x16(), count.as_i16x16()))
7197}
7198
7199/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7200///
7201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi16&expand=5451)
7202#[inline]
7203#[target_feature(enable = "avx512bw,avx512vl")]
7204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7205#[cfg_attr(test, assert_instr(vpsravw))]
7206pub unsafe fn _mm256_mask_srav_epi16(
7207    src: __m256i,
7208    k: __mmask16,
7209    a: __m256i,
7210    count: __m256i,
7211) -> __m256i {
7212    let shf = _mm256_srav_epi16(a, count).as_i16x16();
7213    transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
7214}
7215
7216/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7217///
7218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi16&expand=5452)
7219#[inline]
7220#[target_feature(enable = "avx512bw,avx512vl")]
7221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7222#[cfg_attr(test, assert_instr(vpsravw))]
7223pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
7224    let shf = _mm256_srav_epi16(a, count).as_i16x16();
7225    transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
7226}
7227
7228/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7229///
7230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi16&expand=5450)
7231#[inline]
7232#[target_feature(enable = "avx512bw,avx512vl")]
7233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7234#[cfg_attr(test, assert_instr(vpsravw))]
7235pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
7236    transmute(vpsravw128(a.as_i16x8(), count.as_i16x8()))
7237}
7238
7239/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7240///
7241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi16&expand=5448)
7242#[inline]
7243#[target_feature(enable = "avx512bw,avx512vl")]
7244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7245#[cfg_attr(test, assert_instr(vpsravw))]
7246pub unsafe fn _mm_mask_srav_epi16(
7247    src: __m128i,
7248    k: __mmask8,
7249    a: __m128i,
7250    count: __m128i,
7251) -> __m128i {
7252    let shf = _mm_srav_epi16(a, count).as_i16x8();
7253    transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
7254}
7255
7256/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7257///
7258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi16&expand=5449)
7259#[inline]
7260#[target_feature(enable = "avx512bw,avx512vl")]
7261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7262#[cfg_attr(test, assert_instr(vpsravw))]
7263pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
7264    let shf = _mm_srav_epi16(a, count).as_i16x8();
7265    transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
7266}
7267
7268/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7269///
7270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi16&expand=4226)
7271#[inline]
7272#[target_feature(enable = "avx512bw")]
7273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7274#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7275pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
7276    transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32()))
7277}
7278
7279/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7280///
7281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi16&expand=4223)
7282#[inline]
7283#[target_feature(enable = "avx512bw")]
7284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7285#[cfg_attr(test, assert_instr(vpermt2w))]
7286pub unsafe fn _mm512_mask_permutex2var_epi16(
7287    a: __m512i,
7288    k: __mmask32,
7289    idx: __m512i,
7290    b: __m512i,
7291) -> __m512i {
7292    let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7293    transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
7294}
7295
7296/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7297///
7298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi16&expand=4225)
7299#[inline]
7300#[target_feature(enable = "avx512bw")]
7301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7302#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7303pub unsafe fn _mm512_maskz_permutex2var_epi16(
7304    k: __mmask32,
7305    a: __m512i,
7306    idx: __m512i,
7307    b: __m512i,
7308) -> __m512i {
7309    let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7310    transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7311}
7312
7313/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7314///
7315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi16&expand=4224)
7316#[inline]
7317#[target_feature(enable = "avx512bw")]
7318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7319#[cfg_attr(test, assert_instr(vpermi2w))]
7320pub unsafe fn _mm512_mask2_permutex2var_epi16(
7321    a: __m512i,
7322    idx: __m512i,
7323    k: __mmask32,
7324    b: __m512i,
7325) -> __m512i {
7326    let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
7327    transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
7328}
7329
7330/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7331///
7332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi16&expand=4222)
7333#[inline]
7334#[target_feature(enable = "avx512bw,avx512vl")]
7335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7336#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7337pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
7338    transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16()))
7339}
7340
7341/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7342///
7343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi16&expand=4219)
7344#[inline]
7345#[target_feature(enable = "avx512bw,avx512vl")]
7346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7347#[cfg_attr(test, assert_instr(vpermt2w))]
7348pub unsafe fn _mm256_mask_permutex2var_epi16(
7349    a: __m256i,
7350    k: __mmask16,
7351    idx: __m256i,
7352    b: __m256i,
7353) -> __m256i {
7354    let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7355    transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
7356}
7357
7358/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7359///
7360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi16&expand=4221)
7361#[inline]
7362#[target_feature(enable = "avx512bw,avx512vl")]
7363#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7364#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7365pub unsafe fn _mm256_maskz_permutex2var_epi16(
7366    k: __mmask16,
7367    a: __m256i,
7368    idx: __m256i,
7369    b: __m256i,
7370) -> __m256i {
7371    let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7372    transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7373}
7374
7375/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7376///
7377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi16&expand=4220)
7378#[inline]
7379#[target_feature(enable = "avx512bw,avx512vl")]
7380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7381#[cfg_attr(test, assert_instr(vpermi2w))]
7382pub unsafe fn _mm256_mask2_permutex2var_epi16(
7383    a: __m256i,
7384    idx: __m256i,
7385    k: __mmask16,
7386    b: __m256i,
7387) -> __m256i {
7388    let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
7389    transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
7390}
7391
7392/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
7393///
7394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi16&expand=4218)
7395#[inline]
7396#[target_feature(enable = "avx512bw,avx512vl")]
7397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7398#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7399pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
7400    transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8()))
7401}
7402
7403/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
7404///
7405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi16&expand=4215)
7406#[inline]
7407#[target_feature(enable = "avx512bw,avx512vl")]
7408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7409#[cfg_attr(test, assert_instr(vpermt2w))]
7410pub unsafe fn _mm_mask_permutex2var_epi16(
7411    a: __m128i,
7412    k: __mmask8,
7413    idx: __m128i,
7414    b: __m128i,
7415) -> __m128i {
7416    let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7417    transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
7418}
7419
7420/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi16&expand=4217)
7423#[inline]
7424#[target_feature(enable = "avx512bw,avx512vl")]
7425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
7427pub unsafe fn _mm_maskz_permutex2var_epi16(
7428    k: __mmask8,
7429    a: __m128i,
7430    idx: __m128i,
7431    b: __m128i,
7432) -> __m128i {
7433    let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7434    transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7435}
7436
7437/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
7438///
7439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi16&expand=4216)
7440#[inline]
7441#[target_feature(enable = "avx512bw,avx512vl")]
7442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7443#[cfg_attr(test, assert_instr(vpermi2w))]
7444pub unsafe fn _mm_mask2_permutex2var_epi16(
7445    a: __m128i,
7446    idx: __m128i,
7447    k: __mmask8,
7448    b: __m128i,
7449) -> __m128i {
7450    let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
7451    transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
7452}
7453
7454/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7455///
7456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi16&expand=4295)
7457#[inline]
7458#[target_feature(enable = "avx512bw")]
7459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7460#[cfg_attr(test, assert_instr(vpermw))]
7461pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
7462    transmute(vpermw(a.as_i16x32(), idx.as_i16x32()))
7463}
7464
7465/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7466///
7467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi16&expand=4293)
7468#[inline]
7469#[target_feature(enable = "avx512bw")]
7470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7471#[cfg_attr(test, assert_instr(vpermw))]
7472pub unsafe fn _mm512_mask_permutexvar_epi16(
7473    src: __m512i,
7474    k: __mmask32,
7475    idx: __m512i,
7476    a: __m512i,
7477) -> __m512i {
7478    let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7479    transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
7480}
7481
7482/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7483///
7484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi16&expand=4294)
7485#[inline]
7486#[target_feature(enable = "avx512bw")]
7487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7488#[cfg_attr(test, assert_instr(vpermw))]
7489pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
7490    let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
7491    transmute(simd_select_bitmask(k, permute, i16x32::ZERO))
7492}
7493
7494/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7495///
7496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi16&expand=4292)
7497#[inline]
7498#[target_feature(enable = "avx512bw,avx512vl")]
7499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7500#[cfg_attr(test, assert_instr(vpermw))]
7501pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
7502    transmute(vpermw256(a.as_i16x16(), idx.as_i16x16()))
7503}
7504
7505/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7506///
7507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi16&expand=4290)
7508#[inline]
7509#[target_feature(enable = "avx512bw,avx512vl")]
7510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7511#[cfg_attr(test, assert_instr(vpermw))]
7512pub unsafe fn _mm256_mask_permutexvar_epi16(
7513    src: __m256i,
7514    k: __mmask16,
7515    idx: __m256i,
7516    a: __m256i,
7517) -> __m256i {
7518    let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7519    transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
7520}
7521
7522/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7523///
7524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi16&expand=4291)
7525#[inline]
7526#[target_feature(enable = "avx512bw,avx512vl")]
7527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7528#[cfg_attr(test, assert_instr(vpermw))]
7529pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
7530    let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
7531    transmute(simd_select_bitmask(k, permute, i16x16::ZERO))
7532}
7533
7534/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
7535///
7536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutexvar_epi16&expand=4289)
7537#[inline]
7538#[target_feature(enable = "avx512bw,avx512vl")]
7539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7540#[cfg_attr(test, assert_instr(vpermw))]
7541pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
7542    transmute(vpermw128(a.as_i16x8(), idx.as_i16x8()))
7543}
7544
7545/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7546///
7547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutexvar_epi16&expand=4287)
7548#[inline]
7549#[target_feature(enable = "avx512bw,avx512vl")]
7550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7551#[cfg_attr(test, assert_instr(vpermw))]
7552pub unsafe fn _mm_mask_permutexvar_epi16(
7553    src: __m128i,
7554    k: __mmask8,
7555    idx: __m128i,
7556    a: __m128i,
7557) -> __m128i {
7558    let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7559    transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
7560}
7561
7562/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7563///
7564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutexvar_epi16&expand=4288)
7565#[inline]
7566#[target_feature(enable = "avx512bw,avx512vl")]
7567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7568#[cfg_attr(test, assert_instr(vpermw))]
7569pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
7570    let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
7571    transmute(simd_select_bitmask(k, permute, i16x8::ZERO))
7572}
7573
7574/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7575///
7576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi16&expand=430)
7577#[inline]
7578#[target_feature(enable = "avx512bw")]
7579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7580#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7581pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7582    transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32()))
7583}
7584
7585/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi16&expand=429)
7588#[inline]
7589#[target_feature(enable = "avx512bw,avx512vl")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7592pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7593    transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16()))
7594}
7595
7596/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
7597///
7598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi16&expand=427)
7599#[inline]
7600#[target_feature(enable = "avx512bw,avx512vl")]
7601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7602#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
7603pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7604    transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8()))
7605}
7606
7607/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7608///
7609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi8&expand=441)
7610#[inline]
7611#[target_feature(enable = "avx512bw")]
7612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7613#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7614pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7615    transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64()))
7616}
7617
7618/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7619///
7620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi8&expand=440)
7621#[inline]
7622#[target_feature(enable = "avx512bw,avx512vl")]
7623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7624#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7625pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7626    transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32()))
7627}
7628
7629/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
7630///
7631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi8&expand=439)
7632#[inline]
7633#[target_feature(enable = "avx512bw,avx512vl")]
7634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7635#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
7636pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7637    transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16()))
7638}
7639
7640/// Broadcast the low packed 16-bit integer from a to all elements of dst.
7641///
7642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastw_epi16&expand=587)
7643#[inline]
7644#[target_feature(enable = "avx512bw")]
7645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7646#[cfg_attr(test, assert_instr(vpbroadcastw))]
7647pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
7648    let a = _mm512_castsi128_si512(a).as_i16x32();
7649    let ret: i16x32 = simd_shuffle!(
7650        a,
7651        a,
7652        [
7653            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7654            0, 0, 0,
7655        ],
7656    );
7657    transmute(ret)
7658}
7659
7660/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7661///
7662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastw_epi16&expand=588)
7663#[inline]
7664#[target_feature(enable = "avx512bw")]
7665#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7666#[cfg_attr(test, assert_instr(vpbroadcastw))]
7667pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
7668    let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
7669    transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
7670}
7671
7672/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7673///
7674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastw_epi16&expand=589)
7675#[inline]
7676#[target_feature(enable = "avx512bw")]
7677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7678#[cfg_attr(test, assert_instr(vpbroadcastw))]
7679pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
7680    let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
7681    transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
7682}
7683
7684/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7685///
7686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastw_epi16&expand=585)
7687#[inline]
7688#[target_feature(enable = "avx512bw,avx512vl")]
7689#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7690#[cfg_attr(test, assert_instr(vpbroadcastw))]
7691pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
7692    let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
7693    transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
7694}
7695
7696/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7697///
7698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastw_epi16&expand=586)
7699#[inline]
7700#[target_feature(enable = "avx512bw,avx512vl")]
7701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7702#[cfg_attr(test, assert_instr(vpbroadcastw))]
7703pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
7704    let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
7705    transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
7706}
7707
7708/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7709///
7710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastw_epi16&expand=582)
7711#[inline]
7712#[target_feature(enable = "avx512bw,avx512vl")]
7713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7714#[cfg_attr(test, assert_instr(vpbroadcastw))]
7715pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7716    let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
7717    transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
7718}
7719
7720/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7721///
7722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastw_epi16&expand=583)
7723#[inline]
7724#[target_feature(enable = "avx512bw,avx512vl")]
7725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7726#[cfg_attr(test, assert_instr(vpbroadcastw))]
7727pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
7728    let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
7729    transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
7730}
7731
7732/// Broadcast the low packed 8-bit integer from a to all elements of dst.
7733///
7734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastb_epi8&expand=536)
7735#[inline]
7736#[target_feature(enable = "avx512bw")]
7737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7738#[cfg_attr(test, assert_instr(vpbroadcastb))]
7739pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
7740    let a = _mm512_castsi128_si512(a).as_i8x64();
7741    let ret: i8x64 = simd_shuffle!(
7742        a,
7743        a,
7744        [
7745            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7746            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7747            0, 0, 0, 0, 0, 0,
7748        ],
7749    );
7750    transmute(ret)
7751}
7752
7753/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7754///
7755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastb_epi8&expand=537)
7756#[inline]
7757#[target_feature(enable = "avx512bw")]
7758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7759#[cfg_attr(test, assert_instr(vpbroadcastb))]
7760pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
7761    let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
7762    transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
7763}
7764
7765/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7766///
7767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastb_epi8&expand=538)
7768#[inline]
7769#[target_feature(enable = "avx512bw")]
7770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7771#[cfg_attr(test, assert_instr(vpbroadcastb))]
7772pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
7773    let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
7774    transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
7775}
7776
7777/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7778///
7779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastb_epi8&expand=534)
7780#[inline]
7781#[target_feature(enable = "avx512bw,avx512vl")]
7782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7783#[cfg_attr(test, assert_instr(vpbroadcastb))]
7784pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
7785    let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
7786    transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
7787}
7788
7789/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7790///
7791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastb_epi8&expand=535)
7792#[inline]
7793#[target_feature(enable = "avx512bw,avx512vl")]
7794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7795#[cfg_attr(test, assert_instr(vpbroadcastb))]
7796pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
7797    let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
7798    transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
7799}
7800
7801/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7802///
7803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastb_epi8&expand=531)
7804#[inline]
7805#[target_feature(enable = "avx512bw,avx512vl")]
7806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7807#[cfg_attr(test, assert_instr(vpbroadcastb))]
7808pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
7809    let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
7810    transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
7811}
7812
7813/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7814///
7815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastb_epi8&expand=532)
7816#[inline]
7817#[target_feature(enable = "avx512bw,avx512vl")]
7818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7819#[cfg_attr(test, assert_instr(vpbroadcastb))]
7820pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
7821    let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
7822    transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
7823}
7824
7825/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
7826///
7827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi16&expand=6012)
7828#[inline]
7829#[target_feature(enable = "avx512bw")]
7830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7831#[cfg_attr(test, assert_instr(vpunpckhwd))]
7832pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
7833    let a = a.as_i16x32();
7834    let b = b.as_i16x32();
7835    #[rustfmt::skip]
7836    let r: i16x32 = simd_shuffle!(
7837        a,
7838        b,
7839        [
7840            4, 32 + 4, 5, 32 + 5,
7841            6, 32 + 6, 7, 32 + 7,
7842            12, 32 + 12, 13, 32 + 13,
7843            14, 32 + 14, 15, 32 + 15,
7844            20, 32 + 20, 21, 32 + 21,
7845            22, 32 + 22, 23, 32 + 23,
7846            28, 32 + 28, 29, 32 + 29,
7847            30, 32 + 30, 31, 32 + 31,
7848        ],
7849    );
7850    transmute(r)
7851}
7852
7853/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7854///
7855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi16&expand=6010)
7856#[inline]
7857#[target_feature(enable = "avx512bw")]
7858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7859#[cfg_attr(test, assert_instr(vpunpckhwd))]
7860pub unsafe fn _mm512_mask_unpackhi_epi16(
7861    src: __m512i,
7862    k: __mmask32,
7863    a: __m512i,
7864    b: __m512i,
7865) -> __m512i {
7866    let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
7867    transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
7868}
7869
7870/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7871///
7872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi16&expand=6011)
7873#[inline]
7874#[target_feature(enable = "avx512bw")]
7875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7876#[cfg_attr(test, assert_instr(vpunpckhwd))]
7877pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
7878    let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
7879    transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
7880}
7881
7882/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7883///
7884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi16&expand=6007)
7885#[inline]
7886#[target_feature(enable = "avx512bw,avx512vl")]
7887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7888#[cfg_attr(test, assert_instr(vpunpckhwd))]
7889pub unsafe fn _mm256_mask_unpackhi_epi16(
7890    src: __m256i,
7891    k: __mmask16,
7892    a: __m256i,
7893    b: __m256i,
7894) -> __m256i {
7895    let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
7896    transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
7897}
7898
7899/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7900///
7901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi16&expand=6008)
7902#[inline]
7903#[target_feature(enable = "avx512bw,avx512vl")]
7904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7905#[cfg_attr(test, assert_instr(vpunpckhwd))]
7906pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7907    let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
7908    transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
7909}
7910
7911/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7912///
7913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi16&expand=6004)
7914#[inline]
7915#[target_feature(enable = "avx512bw,avx512vl")]
7916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7917#[cfg_attr(test, assert_instr(vpunpckhwd))]
7918pub unsafe fn _mm_mask_unpackhi_epi16(
7919    src: __m128i,
7920    k: __mmask8,
7921    a: __m128i,
7922    b: __m128i,
7923) -> __m128i {
7924    let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
7925    transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
7926}
7927
7928/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi16&expand=6005)
7931#[inline]
7932#[target_feature(enable = "avx512bw,avx512vl")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vpunpckhwd))]
7935pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7936    let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
7937    transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
7938}
7939
7940/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
7941///
7942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi8&expand=6039)
7943#[inline]
7944#[target_feature(enable = "avx512bw")]
7945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7946#[cfg_attr(test, assert_instr(vpunpckhbw))]
7947pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
7948    let a = a.as_i8x64();
7949    let b = b.as_i8x64();
7950    #[rustfmt::skip]
7951    let r: i8x64 = simd_shuffle!(
7952        a,
7953        b,
7954        [
7955            8,  64+8,   9, 64+9,
7956            10, 64+10, 11, 64+11,
7957            12, 64+12, 13, 64+13,
7958            14, 64+14, 15, 64+15,
7959            24, 64+24, 25, 64+25,
7960            26, 64+26, 27, 64+27,
7961            28, 64+28, 29, 64+29,
7962            30, 64+30, 31, 64+31,
7963            40, 64+40, 41, 64+41,
7964            42, 64+42, 43, 64+43,
7965            44, 64+44, 45, 64+45,
7966            46, 64+46, 47, 64+47,
7967            56, 64+56, 57, 64+57,
7968            58, 64+58, 59, 64+59,
7969            60, 64+60, 61, 64+61,
7970            62, 64+62, 63, 64+63,
7971        ],
7972    );
7973    transmute(r)
7974}
7975
7976/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7977///
7978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi8&expand=6037)
7979#[inline]
7980#[target_feature(enable = "avx512bw")]
7981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7982#[cfg_attr(test, assert_instr(vpunpckhbw))]
7983pub unsafe fn _mm512_mask_unpackhi_epi8(
7984    src: __m512i,
7985    k: __mmask64,
7986    a: __m512i,
7987    b: __m512i,
7988) -> __m512i {
7989    let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
7990    transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
7991}
7992
7993/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7994///
7995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi8&expand=6038)
7996#[inline]
7997#[target_feature(enable = "avx512bw")]
7998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7999#[cfg_attr(test, assert_instr(vpunpckhbw))]
8000pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8001    let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
8002    transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
8003}
8004
8005/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8006///
8007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi8&expand=6034)
8008#[inline]
8009#[target_feature(enable = "avx512bw,avx512vl")]
8010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8011#[cfg_attr(test, assert_instr(vpunpckhbw))]
8012pub unsafe fn _mm256_mask_unpackhi_epi8(
8013    src: __m256i,
8014    k: __mmask32,
8015    a: __m256i,
8016    b: __m256i,
8017) -> __m256i {
8018    let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8019    transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
8020}
8021
8022/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8023///
8024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi8&expand=6035)
8025#[inline]
8026#[target_feature(enable = "avx512bw,avx512vl")]
8027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8028#[cfg_attr(test, assert_instr(vpunpckhbw))]
8029pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8030    let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
8031    transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
8032}
8033
8034/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8035///
8036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi8&expand=6031)
8037#[inline]
8038#[target_feature(enable = "avx512bw,avx512vl")]
8039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8040#[cfg_attr(test, assert_instr(vpunpckhbw))]
8041pub unsafe fn _mm_mask_unpackhi_epi8(
8042    src: __m128i,
8043    k: __mmask16,
8044    a: __m128i,
8045    b: __m128i,
8046) -> __m128i {
8047    let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8048    transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
8049}
8050
8051/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8052///
8053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi8&expand=6032)
8054#[inline]
8055#[target_feature(enable = "avx512bw,avx512vl")]
8056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8057#[cfg_attr(test, assert_instr(vpunpckhbw))]
8058pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8059    let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
8060    transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
8061}
8062
8063/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8064///
8065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi16&expand=6069)
8066#[inline]
8067#[target_feature(enable = "avx512bw")]
8068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8069#[cfg_attr(test, assert_instr(vpunpcklwd))]
8070pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
8071    let a = a.as_i16x32();
8072    let b = b.as_i16x32();
8073    #[rustfmt::skip]
8074    let r: i16x32 = simd_shuffle!(
8075        a,
8076        b,
8077        [
8078            0,  32+0,   1, 32+1,
8079            2,  32+2,   3, 32+3,
8080            8,  32+8,   9, 32+9,
8081            10, 32+10, 11, 32+11,
8082            16, 32+16, 17, 32+17,
8083            18, 32+18, 19, 32+19,
8084            24, 32+24, 25, 32+25,
8085            26, 32+26, 27, 32+27
8086        ],
8087    );
8088    transmute(r)
8089}
8090
8091/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8092///
8093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi16&expand=6067)
8094#[inline]
8095#[target_feature(enable = "avx512bw")]
8096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8097#[cfg_attr(test, assert_instr(vpunpcklwd))]
8098pub unsafe fn _mm512_mask_unpacklo_epi16(
8099    src: __m512i,
8100    k: __mmask32,
8101    a: __m512i,
8102    b: __m512i,
8103) -> __m512i {
8104    let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8105    transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
8106}
8107
8108/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8109///
8110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi16&expand=6068)
8111#[inline]
8112#[target_feature(enable = "avx512bw")]
8113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8114#[cfg_attr(test, assert_instr(vpunpcklwd))]
8115pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
8116    let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
8117    transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
8118}
8119
8120/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8121///
8122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi16&expand=6064)
8123#[inline]
8124#[target_feature(enable = "avx512bw,avx512vl")]
8125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8126#[cfg_attr(test, assert_instr(vpunpcklwd))]
8127pub unsafe fn _mm256_mask_unpacklo_epi16(
8128    src: __m256i,
8129    k: __mmask16,
8130    a: __m256i,
8131    b: __m256i,
8132) -> __m256i {
8133    let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8134    transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
8135}
8136
8137/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8138///
8139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi16&expand=6065)
8140#[inline]
8141#[target_feature(enable = "avx512bw,avx512vl")]
8142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8143#[cfg_attr(test, assert_instr(vpunpcklwd))]
8144pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
8145    let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
8146    transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
8147}
8148
8149/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8150///
8151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi16&expand=6061)
8152#[inline]
8153#[target_feature(enable = "avx512bw,avx512vl")]
8154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8155#[cfg_attr(test, assert_instr(vpunpcklwd))]
8156pub unsafe fn _mm_mask_unpacklo_epi16(
8157    src: __m128i,
8158    k: __mmask8,
8159    a: __m128i,
8160    b: __m128i,
8161) -> __m128i {
8162    let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8163    transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
8164}
8165
8166/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8167///
8168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi16&expand=6062)
8169#[inline]
8170#[target_feature(enable = "avx512bw,avx512vl")]
8171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8172#[cfg_attr(test, assert_instr(vpunpcklwd))]
8173pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
8174    let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
8175    transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
8176}
8177
8178/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
8179///
8180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi8&expand=6096)
8181#[inline]
8182#[target_feature(enable = "avx512bw")]
8183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8184#[cfg_attr(test, assert_instr(vpunpcklbw))]
8185pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
8186    let a = a.as_i8x64();
8187    let b = b.as_i8x64();
8188    #[rustfmt::skip]
8189    let r: i8x64 = simd_shuffle!(
8190        a,
8191        b,
8192        [
8193            0,  64+0,   1, 64+1,
8194            2,  64+2,   3, 64+3,
8195            4,  64+4,   5, 64+5,
8196            6,  64+6,   7, 64+7,
8197            16, 64+16, 17, 64+17,
8198            18, 64+18, 19, 64+19,
8199            20, 64+20, 21, 64+21,
8200            22, 64+22, 23, 64+23,
8201            32, 64+32, 33, 64+33,
8202            34, 64+34, 35, 64+35,
8203            36, 64+36, 37, 64+37,
8204            38, 64+38, 39, 64+39,
8205            48, 64+48, 49, 64+49,
8206            50, 64+50, 51, 64+51,
8207            52, 64+52, 53, 64+53,
8208            54, 64+54, 55, 64+55,
8209        ],
8210    );
8211    transmute(r)
8212}
8213
8214/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8215///
8216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi8&expand=6094)
8217#[inline]
8218#[target_feature(enable = "avx512bw")]
8219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8220#[cfg_attr(test, assert_instr(vpunpcklbw))]
8221pub unsafe fn _mm512_mask_unpacklo_epi8(
8222    src: __m512i,
8223    k: __mmask64,
8224    a: __m512i,
8225    b: __m512i,
8226) -> __m512i {
8227    let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8228    transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
8229}
8230
8231/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8232///
8233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi8&expand=6095)
8234#[inline]
8235#[target_feature(enable = "avx512bw")]
8236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8237#[cfg_attr(test, assert_instr(vpunpcklbw))]
8238pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8239    let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
8240    transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
8241}
8242
8243/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8244///
8245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi8&expand=6091)
8246#[inline]
8247#[target_feature(enable = "avx512bw,avx512vl")]
8248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8249#[cfg_attr(test, assert_instr(vpunpcklbw))]
8250pub unsafe fn _mm256_mask_unpacklo_epi8(
8251    src: __m256i,
8252    k: __mmask32,
8253    a: __m256i,
8254    b: __m256i,
8255) -> __m256i {
8256    let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8257    transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
8258}
8259
8260/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8261///
8262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi8&expand=6092)
8263#[inline]
8264#[target_feature(enable = "avx512bw,avx512vl")]
8265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8266#[cfg_attr(test, assert_instr(vpunpcklbw))]
8267pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8268    let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
8269    transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
8270}
8271
8272/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8273///
8274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi8&expand=6088)
8275#[inline]
8276#[target_feature(enable = "avx512bw,avx512vl")]
8277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8278#[cfg_attr(test, assert_instr(vpunpcklbw))]
8279pub unsafe fn _mm_mask_unpacklo_epi8(
8280    src: __m128i,
8281    k: __mmask16,
8282    a: __m128i,
8283    b: __m128i,
8284) -> __m128i {
8285    let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8286    transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
8287}
8288
8289/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8290///
8291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi8&expand=6089)
8292#[inline]
8293#[target_feature(enable = "avx512bw,avx512vl")]
8294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8295#[cfg_attr(test, assert_instr(vpunpcklbw))]
8296pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8297    let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
8298    transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
8299}
8300
8301/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8302///
8303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi16&expand=3795)
8304#[inline]
8305#[target_feature(enable = "avx512bw")]
8306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8307#[cfg_attr(test, assert_instr(vmovdqu16))]
8308pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
8309    let mov = a.as_i16x32();
8310    transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
8311}
8312
8313/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8314///
8315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi16&expand=3796)
8316#[inline]
8317#[target_feature(enable = "avx512bw")]
8318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8319#[cfg_attr(test, assert_instr(vmovdqu16))]
8320pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
8321    let mov = a.as_i16x32();
8322    transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
8323}
8324
8325/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8326///
8327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi16&expand=3793)
8328#[inline]
8329#[target_feature(enable = "avx512bw,avx512vl")]
8330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8331#[cfg_attr(test, assert_instr(vmovdqu16))]
8332pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
8333    let mov = a.as_i16x16();
8334    transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
8335}
8336
8337/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8338///
8339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi16&expand=3794)
8340#[inline]
8341#[target_feature(enable = "avx512bw,avx512vl")]
8342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8343#[cfg_attr(test, assert_instr(vmovdqu16))]
8344pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
8345    let mov = a.as_i16x16();
8346    transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
8347}
8348
8349/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8350///
8351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi16&expand=3791)
8352#[inline]
8353#[target_feature(enable = "avx512bw,avx512vl")]
8354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8355#[cfg_attr(test, assert_instr(vmovdqu16))]
8356pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8357    let mov = a.as_i16x8();
8358    transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
8359}
8360
8361/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8362///
8363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi16&expand=3792)
8364#[inline]
8365#[target_feature(enable = "avx512bw,avx512vl")]
8366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8367#[cfg_attr(test, assert_instr(vmovdqu16))]
8368pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
8369    let mov = a.as_i16x8();
8370    transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
8371}
8372
8373/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8374///
8375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi8&expand=3813)
8376#[inline]
8377#[target_feature(enable = "avx512bw")]
8378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8379#[cfg_attr(test, assert_instr(vmovdqu8))]
8380pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
8381    let mov = a.as_i8x64();
8382    transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
8383}
8384
8385/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8386///
8387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi8&expand=3814)
8388#[inline]
8389#[target_feature(enable = "avx512bw")]
8390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8391#[cfg_attr(test, assert_instr(vmovdqu8))]
8392pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
8393    let mov = a.as_i8x64();
8394    transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
8395}
8396
8397/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8398///
8399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi8&expand=3811)
8400#[inline]
8401#[target_feature(enable = "avx512bw,avx512vl")]
8402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8403#[cfg_attr(test, assert_instr(vmovdqu8))]
8404pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
8405    let mov = a.as_i8x32();
8406    transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
8407}
8408
8409/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8410///
8411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi8&expand=3812)
8412#[inline]
8413#[target_feature(enable = "avx512bw,avx512vl")]
8414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8415#[cfg_attr(test, assert_instr(vmovdqu8))]
8416pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
8417    let mov = a.as_i8x32();
8418    transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
8419}
8420
8421/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8422///
8423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi8&expand=3809)
8424#[inline]
8425#[target_feature(enable = "avx512bw,avx512vl")]
8426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8427#[cfg_attr(test, assert_instr(vmovdqu8))]
8428pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
8429    let mov = a.as_i8x16();
8430    transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
8431}
8432
8433/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8434///
8435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi8&expand=3810)
8436#[inline]
8437#[target_feature(enable = "avx512bw,avx512vl")]
8438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8439#[cfg_attr(test, assert_instr(vmovdqu8))]
8440pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
8441    let mov = a.as_i8x16();
8442    transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
8443}
8444
8445/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8446///
8447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi16&expand=4942)
8448#[inline]
8449#[target_feature(enable = "avx512bw")]
8450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8451#[cfg_attr(test, assert_instr(vpbroadcastw))]
8452pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
8453    let r = _mm512_set1_epi16(a).as_i16x32();
8454    transmute(simd_select_bitmask(k, r, src.as_i16x32()))
8455}
8456
8457/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8458///
8459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi16&expand=4943)
8460#[inline]
8461#[target_feature(enable = "avx512bw")]
8462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8463#[cfg_attr(test, assert_instr(vpbroadcastw))]
8464pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
8465    let r = _mm512_set1_epi16(a).as_i16x32();
8466    transmute(simd_select_bitmask(k, r, i16x32::ZERO))
8467}
8468
8469/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8470///
8471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi16&expand=4939)
8472#[inline]
8473#[target_feature(enable = "avx512bw,avx512vl")]
8474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8475#[cfg_attr(test, assert_instr(vpbroadcastw))]
8476pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
8477    let r = _mm256_set1_epi16(a).as_i16x16();
8478    transmute(simd_select_bitmask(k, r, src.as_i16x16()))
8479}
8480
8481/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8482///
8483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi16&expand=4940)
8484#[inline]
8485#[target_feature(enable = "avx512bw,avx512vl")]
8486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8487#[cfg_attr(test, assert_instr(vpbroadcastw))]
8488pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
8489    let r = _mm256_set1_epi16(a).as_i16x16();
8490    transmute(simd_select_bitmask(k, r, i16x16::ZERO))
8491}
8492
8493/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8494///
8495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi16&expand=4936)
8496#[inline]
8497#[target_feature(enable = "avx512bw,avx512vl")]
8498#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8499#[cfg_attr(test, assert_instr(vpbroadcastw))]
8500pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
8501    let r = _mm_set1_epi16(a).as_i16x8();
8502    transmute(simd_select_bitmask(k, r, src.as_i16x8()))
8503}
8504
8505/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8506///
8507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi16&expand=4937)
8508#[inline]
8509#[target_feature(enable = "avx512bw,avx512vl")]
8510#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8511#[cfg_attr(test, assert_instr(vpbroadcastw))]
8512pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
8513    let r = _mm_set1_epi16(a).as_i16x8();
8514    transmute(simd_select_bitmask(k, r, i16x8::ZERO))
8515}
8516
8517/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8518///
8519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi8&expand=4970)
8520#[inline]
8521#[target_feature(enable = "avx512bw")]
8522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8523#[cfg_attr(test, assert_instr(vpbroadcast))]
8524pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
8525    let r = _mm512_set1_epi8(a).as_i8x64();
8526    transmute(simd_select_bitmask(k, r, src.as_i8x64()))
8527}
8528
8529/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8530///
8531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi8&expand=4971)
8532#[inline]
8533#[target_feature(enable = "avx512bw")]
8534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8535#[cfg_attr(test, assert_instr(vpbroadcast))]
8536pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
8537    let r = _mm512_set1_epi8(a).as_i8x64();
8538    transmute(simd_select_bitmask(k, r, i8x64::ZERO))
8539}
8540
8541/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8542///
8543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi8&expand=4967)
8544#[inline]
8545#[target_feature(enable = "avx512bw,avx512vl")]
8546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8547#[cfg_attr(test, assert_instr(vpbroadcast))]
8548pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
8549    let r = _mm256_set1_epi8(a).as_i8x32();
8550    transmute(simd_select_bitmask(k, r, src.as_i8x32()))
8551}
8552
8553/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8554///
8555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi8&expand=4968)
8556#[inline]
8557#[target_feature(enable = "avx512bw,avx512vl")]
8558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8559#[cfg_attr(test, assert_instr(vpbroadcast))]
8560pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
8561    let r = _mm256_set1_epi8(a).as_i8x32();
8562    transmute(simd_select_bitmask(k, r, i8x32::ZERO))
8563}
8564
8565/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8566///
8567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi8&expand=4964)
8568#[inline]
8569#[target_feature(enable = "avx512bw,avx512vl")]
8570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8571#[cfg_attr(test, assert_instr(vpbroadcast))]
8572pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
8573    let r = _mm_set1_epi8(a).as_i8x16();
8574    transmute(simd_select_bitmask(k, r, src.as_i8x16()))
8575}
8576
8577/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8578///
8579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi8&expand=4965)
8580#[inline]
8581#[target_feature(enable = "avx512bw,avx512vl")]
8582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8583#[cfg_attr(test, assert_instr(vpbroadcast))]
8584pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
8585    let r = _mm_set1_epi8(a).as_i8x16();
8586    transmute(simd_select_bitmask(k, r, i8x16::ZERO))
8587}
8588
8589/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
8590///
8591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflelo_epi16&expand=5221)
8592#[inline]
8593#[target_feature(enable = "avx512bw")]
8594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8595#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
8596#[rustc_legacy_const_generics(1)]
8597pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
8598    static_assert_uimm_bits!(IMM8, 8);
8599    let a = a.as_i16x32();
8600    let r: i16x32 = simd_shuffle!(
8601        a,
8602        a,
8603        [
8604            IMM8 as u32 & 0b11,
8605            (IMM8 as u32 >> 2) & 0b11,
8606            (IMM8 as u32 >> 4) & 0b11,
8607            (IMM8 as u32 >> 6) & 0b11,
8608            4,
8609            5,
8610            6,
8611            7,
8612            (IMM8 as u32 & 0b11) + 8,
8613            ((IMM8 as u32 >> 2) & 0b11) + 8,
8614            ((IMM8 as u32 >> 4) & 0b11) + 8,
8615            ((IMM8 as u32 >> 6) & 0b11) + 8,
8616            12,
8617            13,
8618            14,
8619            15,
8620            (IMM8 as u32 & 0b11) + 16,
8621            ((IMM8 as u32 >> 2) & 0b11) + 16,
8622            ((IMM8 as u32 >> 4) & 0b11) + 16,
8623            ((IMM8 as u32 >> 6) & 0b11) + 16,
8624            20,
8625            21,
8626            22,
8627            23,
8628            (IMM8 as u32 & 0b11) + 24,
8629            ((IMM8 as u32 >> 2) & 0b11) + 24,
8630            ((IMM8 as u32 >> 4) & 0b11) + 24,
8631            ((IMM8 as u32 >> 6) & 0b11) + 24,
8632            28,
8633            29,
8634            30,
8635            31,
8636        ],
8637    );
8638    transmute(r)
8639}
8640
8641/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8642///
8643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflelo_epi16&expand=5219)
8644#[inline]
8645#[target_feature(enable = "avx512bw")]
8646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8647#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
8648#[rustc_legacy_const_generics(3)]
8649pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
8650    src: __m512i,
8651    k: __mmask32,
8652    a: __m512i,
8653) -> __m512i {
8654    static_assert_uimm_bits!(IMM8, 8);
8655    let r = _mm512_shufflelo_epi16::<IMM8>(a);
8656    transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
8657}
8658
8659/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8660///
8661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflelo_epi16&expand=5220)
8662#[inline]
8663#[target_feature(enable = "avx512bw")]
8664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8665#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
8666#[rustc_legacy_const_generics(2)]
8667pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
8668    static_assert_uimm_bits!(IMM8, 8);
8669    let r = _mm512_shufflelo_epi16::<IMM8>(a);
8670    transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
8671}
8672
8673/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8674///
8675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflelo_epi16&expand=5216)
8676#[inline]
8677#[target_feature(enable = "avx512bw,avx512vl")]
8678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8679#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
8680#[rustc_legacy_const_generics(3)]
8681pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
8682    src: __m256i,
8683    k: __mmask16,
8684    a: __m256i,
8685) -> __m256i {
8686    static_assert_uimm_bits!(IMM8, 8);
8687    let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
8688    transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
8689}
8690
8691/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8692///
8693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflelo_epi16&expand=5217)
8694#[inline]
8695#[target_feature(enable = "avx512bw,avx512vl")]
8696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8697#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
8698#[rustc_legacy_const_generics(2)]
8699pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
8700    static_assert_uimm_bits!(IMM8, 8);
8701    let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
8702    transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
8703}
8704
8705/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8706///
8707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflelo_epi16&expand=5213)
8708#[inline]
8709#[target_feature(enable = "avx512bw,avx512vl")]
8710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8711#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
8712#[rustc_legacy_const_generics(3)]
8713pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
8714    src: __m128i,
8715    k: __mmask8,
8716    a: __m128i,
8717) -> __m128i {
8718    static_assert_uimm_bits!(IMM8, 8);
8719    let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
8720    transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
8721}
8722
8723/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8724///
8725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflelo_epi16&expand=5214)
8726#[inline]
8727#[target_feature(enable = "avx512bw,avx512vl")]
8728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8729#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
8730#[rustc_legacy_const_generics(2)]
8731pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
8732    static_assert_uimm_bits!(IMM8, 8);
8733    let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
8734    transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
8735}
8736
8737/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
8738///
8739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shufflehi_epi16&expand=5212)
8740#[inline]
8741#[target_feature(enable = "avx512bw")]
8742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8743#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
8744#[rustc_legacy_const_generics(1)]
8745pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
8746    static_assert_uimm_bits!(IMM8, 8);
8747    let a = a.as_i16x32();
8748    let r: i16x32 = simd_shuffle!(
8749        a,
8750        a,
8751        [
8752            0,
8753            1,
8754            2,
8755            3,
8756            (IMM8 as u32 & 0b11) + 4,
8757            ((IMM8 as u32 >> 2) & 0b11) + 4,
8758            ((IMM8 as u32 >> 4) & 0b11) + 4,
8759            ((IMM8 as u32 >> 6) & 0b11) + 4,
8760            8,
8761            9,
8762            10,
8763            11,
8764            (IMM8 as u32 & 0b11) + 12,
8765            ((IMM8 as u32 >> 2) & 0b11) + 12,
8766            ((IMM8 as u32 >> 4) & 0b11) + 12,
8767            ((IMM8 as u32 >> 6) & 0b11) + 12,
8768            16,
8769            17,
8770            18,
8771            19,
8772            (IMM8 as u32 & 0b11) + 20,
8773            ((IMM8 as u32 >> 2) & 0b11) + 20,
8774            ((IMM8 as u32 >> 4) & 0b11) + 20,
8775            ((IMM8 as u32 >> 6) & 0b11) + 20,
8776            24,
8777            25,
8778            26,
8779            27,
8780            (IMM8 as u32 & 0b11) + 28,
8781            ((IMM8 as u32 >> 2) & 0b11) + 28,
8782            ((IMM8 as u32 >> 4) & 0b11) + 28,
8783            ((IMM8 as u32 >> 6) & 0b11) + 28,
8784        ],
8785    );
8786    transmute(r)
8787}
8788
8789/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8790///
8791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shufflehi_epi16&expand=5210)
8792#[inline]
8793#[target_feature(enable = "avx512bw")]
8794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8795#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
8796#[rustc_legacy_const_generics(3)]
8797pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
8798    src: __m512i,
8799    k: __mmask32,
8800    a: __m512i,
8801) -> __m512i {
8802    static_assert_uimm_bits!(IMM8, 8);
8803    let r = _mm512_shufflehi_epi16::<IMM8>(a);
8804    transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
8805}
8806
8807/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8808///
8809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shufflehi_epi16&expand=5211)
8810#[inline]
8811#[target_feature(enable = "avx512bw")]
8812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8813#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
8814#[rustc_legacy_const_generics(2)]
8815pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
8816    static_assert_uimm_bits!(IMM8, 8);
8817    let r = _mm512_shufflehi_epi16::<IMM8>(a);
8818    transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO))
8819}
8820
8821/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8822///
8823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shufflehi_epi16&expand=5207)
8824#[inline]
8825#[target_feature(enable = "avx512bw,avx512vl")]
8826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8827#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
8828#[rustc_legacy_const_generics(3)]
8829pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
8830    src: __m256i,
8831    k: __mmask16,
8832    a: __m256i,
8833) -> __m256i {
8834    static_assert_uimm_bits!(IMM8, 8);
8835    let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
8836    transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
8837}
8838
8839/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8840///
8841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shufflehi_epi16&expand=5208)
8842#[inline]
8843#[target_feature(enable = "avx512bw,avx512vl")]
8844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8845#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
8846#[rustc_legacy_const_generics(2)]
8847pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
8848    static_assert_uimm_bits!(IMM8, 8);
8849    let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
8850    transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO))
8851}
8852
8853/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
8854///
8855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shufflehi_epi16&expand=5204)
8856#[inline]
8857#[target_feature(enable = "avx512bw,avx512vl")]
8858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8859#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
8860#[rustc_legacy_const_generics(3)]
8861pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
8862    src: __m128i,
8863    k: __mmask8,
8864    a: __m128i,
8865) -> __m128i {
8866    static_assert_uimm_bits!(IMM8, 8);
8867    let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
8868    transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
8869}
8870
8871/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8872///
8873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shufflehi_epi16&expand=5205)
8874#[inline]
8875#[target_feature(enable = "avx512bw,avx512vl")]
8876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8877#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
8878#[rustc_legacy_const_generics(2)]
8879pub unsafe fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
8880    static_assert_uimm_bits!(IMM8, 8);
8881    let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
8882    transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO))
8883}
8884
8885/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
8886///
8887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi8&expand=5159)
8888#[inline]
8889#[target_feature(enable = "avx512bw")]
8890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8891#[cfg_attr(test, assert_instr(vpshufb))]
8892pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
8893    transmute(vpshufb(a.as_i8x64(), b.as_i8x64()))
8894}
8895
8896/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8897///
8898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi8&expand=5157)
8899#[inline]
8900#[target_feature(enable = "avx512bw")]
8901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8902#[cfg_attr(test, assert_instr(vpshufb))]
8903pub unsafe fn _mm512_mask_shuffle_epi8(
8904    src: __m512i,
8905    k: __mmask64,
8906    a: __m512i,
8907    b: __m512i,
8908) -> __m512i {
8909    let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
8910    transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
8911}
8912
8913/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8914///
8915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi8&expand=5158)
8916#[inline]
8917#[target_feature(enable = "avx512bw")]
8918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8919#[cfg_attr(test, assert_instr(vpshufb))]
8920pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
8921    let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
8922    transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO))
8923}
8924
8925/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8926///
8927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi8&expand=5154)
8928#[inline]
8929#[target_feature(enable = "avx512bw,avx512vl")]
8930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8931#[cfg_attr(test, assert_instr(vpshufb))]
8932pub unsafe fn _mm256_mask_shuffle_epi8(
8933    src: __m256i,
8934    k: __mmask32,
8935    a: __m256i,
8936    b: __m256i,
8937) -> __m256i {
8938    let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
8939    transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
8940}
8941
8942/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi8&expand=5155)
8945#[inline]
8946#[target_feature(enable = "avx512bw,avx512vl")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vpshufb))]
8949pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
8950    let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
8951    transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO))
8952}
8953
8954/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8955///
8956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi8&expand=5151)
8957#[inline]
8958#[target_feature(enable = "avx512bw,avx512vl")]
8959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8960#[cfg_attr(test, assert_instr(vpshufb))]
8961pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8962    let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
8963    transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
8964}
8965
8966/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8967///
8968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi8&expand=5152)
8969#[inline]
8970#[target_feature(enable = "avx512bw,avx512vl")]
8971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8972#[cfg_attr(test, assert_instr(vpshufb))]
8973pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
8974    let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
8975    transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO))
8976}
8977
8978/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
8979///
8980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi16_mask&expand=5884)
8981#[inline]
8982#[target_feature(enable = "avx512bw")]
8983#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8984#[cfg_attr(test, assert_instr(vptestmw))]
8985pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
8986    let and = _mm512_and_si512(a, b);
8987    let zero = _mm512_setzero_si512();
8988    _mm512_cmpneq_epi16_mask(and, zero)
8989}
8990
8991/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
8992///
8993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi16_mask&expand=5883)
8994#[inline]
8995#[target_feature(enable = "avx512bw")]
8996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8997#[cfg_attr(test, assert_instr(vptestmw))]
8998pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
8999    let and = _mm512_and_si512(a, b);
9000    let zero = _mm512_setzero_si512();
9001    _mm512_mask_cmpneq_epi16_mask(k, and, zero)
9002}
9003
9004/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9005///
9006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi16_mask&expand=5882)
9007#[inline]
9008#[target_feature(enable = "avx512bw,avx512vl")]
9009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9010#[cfg_attr(test, assert_instr(vptestmw))]
9011pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9012    let and = _mm256_and_si256(a, b);
9013    let zero = _mm256_setzero_si256();
9014    _mm256_cmpneq_epi16_mask(and, zero)
9015}
9016
9017/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9018///
9019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi16_mask&expand=5881)
9020#[inline]
9021#[target_feature(enable = "avx512bw,avx512vl")]
9022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9023#[cfg_attr(test, assert_instr(vptestmw))]
9024pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9025    let and = _mm256_and_si256(a, b);
9026    let zero = _mm256_setzero_si256();
9027    _mm256_mask_cmpneq_epi16_mask(k, and, zero)
9028}
9029
9030/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9031///
9032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi16_mask&expand=5880)
9033#[inline]
9034#[target_feature(enable = "avx512bw,avx512vl")]
9035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9036#[cfg_attr(test, assert_instr(vptestmw))]
9037pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9038    let and = _mm_and_si128(a, b);
9039    let zero = _mm_setzero_si128();
9040    _mm_cmpneq_epi16_mask(and, zero)
9041}
9042
9043/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9044///
9045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi16_mask&expand=5879)
9046#[inline]
9047#[target_feature(enable = "avx512bw,avx512vl")]
9048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9049#[cfg_attr(test, assert_instr(vptestmw))]
9050pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9051    let and = _mm_and_si128(a, b);
9052    let zero = _mm_setzero_si128();
9053    _mm_mask_cmpneq_epi16_mask(k, and, zero)
9054}
9055
9056/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9057///
9058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi8_mask&expand=5902)
9059#[inline]
9060#[target_feature(enable = "avx512bw")]
9061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9062#[cfg_attr(test, assert_instr(vptestmb))]
9063pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9064    let and = _mm512_and_si512(a, b);
9065    let zero = _mm512_setzero_si512();
9066    _mm512_cmpneq_epi8_mask(and, zero)
9067}
9068
9069/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9070///
9071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi8_mask&expand=5901)
9072#[inline]
9073#[target_feature(enable = "avx512bw")]
9074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9075#[cfg_attr(test, assert_instr(vptestmb))]
9076pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9077    let and = _mm512_and_si512(a, b);
9078    let zero = _mm512_setzero_si512();
9079    _mm512_mask_cmpneq_epi8_mask(k, and, zero)
9080}
9081
9082/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9083///
9084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi8_mask&expand=5900)
9085#[inline]
9086#[target_feature(enable = "avx512bw,avx512vl")]
9087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9088#[cfg_attr(test, assert_instr(vptestmb))]
9089pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9090    let and = _mm256_and_si256(a, b);
9091    let zero = _mm256_setzero_si256();
9092    _mm256_cmpneq_epi8_mask(and, zero)
9093}
9094
9095/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9096///
9097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi8_mask&expand=5899)
9098#[inline]
9099#[target_feature(enable = "avx512bw,avx512vl")]
9100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9101#[cfg_attr(test, assert_instr(vptestmb))]
9102pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9103    let and = _mm256_and_si256(a, b);
9104    let zero = _mm256_setzero_si256();
9105    _mm256_mask_cmpneq_epi8_mask(k, and, zero)
9106}
9107
9108/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
9109///
9110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi8_mask&expand=5898)
9111#[inline]
9112#[target_feature(enable = "avx512bw,avx512vl")]
9113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9114#[cfg_attr(test, assert_instr(vptestmb))]
9115pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9116    let and = _mm_and_si128(a, b);
9117    let zero = _mm_setzero_si128();
9118    _mm_cmpneq_epi8_mask(and, zero)
9119}
9120
9121/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
9122///
9123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi8_mask&expand=5897)
9124#[inline]
9125#[target_feature(enable = "avx512bw,avx512vl")]
9126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9127#[cfg_attr(test, assert_instr(vptestmb))]
9128pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9129    let and = _mm_and_si128(a, b);
9130    let zero = _mm_setzero_si128();
9131    _mm_mask_cmpneq_epi8_mask(k, and, zero)
9132}
9133
9134/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9135///
9136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi16_mask&expand=5915)
9137#[inline]
9138#[target_feature(enable = "avx512bw")]
9139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9140#[cfg_attr(test, assert_instr(vptestnmw))]
9141pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
9142    let and = _mm512_and_si512(a, b);
9143    let zero = _mm512_setzero_si512();
9144    _mm512_cmpeq_epi16_mask(and, zero)
9145}
9146
9147/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9148///
9149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi16_mask&expand=5914)
9150#[inline]
9151#[target_feature(enable = "avx512bw")]
9152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9153#[cfg_attr(test, assert_instr(vptestnmw))]
9154pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
9155    let and = _mm512_and_si512(a, b);
9156    let zero = _mm512_setzero_si512();
9157    _mm512_mask_cmpeq_epi16_mask(k, and, zero)
9158}
9159
9160/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9161///
9162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi16_mask&expand=5913)
9163#[inline]
9164#[target_feature(enable = "avx512bw,avx512vl")]
9165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9166#[cfg_attr(test, assert_instr(vptestnmw))]
9167pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
9168    let and = _mm256_and_si256(a, b);
9169    let zero = _mm256_setzero_si256();
9170    _mm256_cmpeq_epi16_mask(and, zero)
9171}
9172
9173/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9174///
9175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi16_mask&expand=5912)
9176#[inline]
9177#[target_feature(enable = "avx512bw,avx512vl")]
9178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9179#[cfg_attr(test, assert_instr(vptestnmw))]
9180pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
9181    let and = _mm256_and_si256(a, b);
9182    let zero = _mm256_setzero_si256();
9183    _mm256_mask_cmpeq_epi16_mask(k, and, zero)
9184}
9185
9186/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9187///
9188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi16_mask&expand=5911)
9189#[inline]
9190#[target_feature(enable = "avx512bw,avx512vl")]
9191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9192#[cfg_attr(test, assert_instr(vptestnmw))]
9193pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
9194    let and = _mm_and_si128(a, b);
9195    let zero = _mm_setzero_si128();
9196    _mm_cmpeq_epi16_mask(and, zero)
9197}
9198
9199/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9200///
9201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi16_mask&expand=5910)
9202#[inline]
9203#[target_feature(enable = "avx512bw,avx512vl")]
9204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9205#[cfg_attr(test, assert_instr(vptestnmw))]
9206pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
9207    let and = _mm_and_si128(a, b);
9208    let zero = _mm_setzero_si128();
9209    _mm_mask_cmpeq_epi16_mask(k, and, zero)
9210}
9211
9212/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9213///
9214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi8_mask&expand=5933)
9215#[inline]
9216#[target_feature(enable = "avx512bw")]
9217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9218#[cfg_attr(test, assert_instr(vptestnmb))]
9219pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
9220    let and = _mm512_and_si512(a, b);
9221    let zero = _mm512_setzero_si512();
9222    _mm512_cmpeq_epi8_mask(and, zero)
9223}
9224
9225/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9226///
9227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi8_mask&expand=5932)
9228#[inline]
9229#[target_feature(enable = "avx512bw")]
9230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9231#[cfg_attr(test, assert_instr(vptestnmb))]
9232pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
9233    let and = _mm512_and_si512(a, b);
9234    let zero = _mm512_setzero_si512();
9235    _mm512_mask_cmpeq_epi8_mask(k, and, zero)
9236}
9237
9238/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9239///
9240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi8_mask&expand=5931)
9241#[inline]
9242#[target_feature(enable = "avx512bw,avx512vl")]
9243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9244#[cfg_attr(test, assert_instr(vptestnmb))]
9245pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
9246    let and = _mm256_and_si256(a, b);
9247    let zero = _mm256_setzero_si256();
9248    _mm256_cmpeq_epi8_mask(and, zero)
9249}
9250
9251/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9252///
9253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi8_mask&expand=5930)
9254#[inline]
9255#[target_feature(enable = "avx512bw,avx512vl")]
9256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9257#[cfg_attr(test, assert_instr(vptestnmb))]
9258pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
9259    let and = _mm256_and_si256(a, b);
9260    let zero = _mm256_setzero_si256();
9261    _mm256_mask_cmpeq_epi8_mask(k, and, zero)
9262}
9263
9264/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
9265///
9266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi8_mask&expand=5929)
9267#[inline]
9268#[target_feature(enable = "avx512bw,avx512vl")]
9269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9270#[cfg_attr(test, assert_instr(vptestnmb))]
9271pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
9272    let and = _mm_and_si128(a, b);
9273    let zero = _mm_setzero_si128();
9274    _mm_cmpeq_epi8_mask(and, zero)
9275}
9276
9277/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
9278///
9279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi8_mask&expand=5928)
9280#[inline]
9281#[target_feature(enable = "avx512bw,avx512vl")]
9282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9283#[cfg_attr(test, assert_instr(vptestnmb))]
9284pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
9285    let and = _mm_and_si128(a, b);
9286    let zero = _mm_setzero_si128();
9287    _mm_mask_cmpeq_epi8_mask(k, and, zero)
9288}
9289
9290/// Store 64-bit mask from a into memory.
9291///
9292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask64&expand=5578)
9293#[inline]
9294#[target_feature(enable = "avx512bw")]
9295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9296#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9297pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
9298    ptr::write(mem_addr as *mut __mmask64, a);
9299}
9300
9301/// Store 32-bit mask from a into memory.
9302///
9303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask32&expand=5577)
9304#[inline]
9305#[target_feature(enable = "avx512bw")]
9306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9307#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9308pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
9309    ptr::write(mem_addr as *mut __mmask32, a);
9310}
9311
9312/// Load 64-bit mask from memory into k.
9313///
9314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask64&expand=3318)
9315#[inline]
9316#[target_feature(enable = "avx512bw")]
9317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9318#[cfg_attr(test, assert_instr(mov))] //should be kmovq
9319pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
9320    ptr::read(mem_addr as *const __mmask64)
9321}
9322
9323/// Load 32-bit mask from memory into k.
9324///
9325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask32&expand=3317)
9326#[inline]
9327#[target_feature(enable = "avx512bw")]
9328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9329#[cfg_attr(test, assert_instr(mov))] //should be kmovd
9330pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
9331    ptr::read(mem_addr as *const __mmask32)
9332}
9333
9334/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
9335///
9336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sad_epu8&expand=4855)
9337#[inline]
9338#[target_feature(enable = "avx512bw")]
9339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9340#[cfg_attr(test, assert_instr(vpsadbw))]
9341pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
9342    transmute(vpsadbw(a.as_u8x64(), b.as_u8x64()))
9343}
9344
9345/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9346///
9347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_dbsad_epu8&expand=2114)
9348#[inline]
9349#[target_feature(enable = "avx512bw")]
9350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9351#[rustc_legacy_const_generics(2)]
9352#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9353pub unsafe fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9354    static_assert_uimm_bits!(IMM8, 8);
9355    let a = a.as_u8x64();
9356    let b = b.as_u8x64();
9357    let r = vdbpsadbw(a, b, IMM8);
9358    transmute(r)
9359}
9360
9361/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9362///
9363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_dbsad_epu8&expand=2115)
9364#[inline]
9365#[target_feature(enable = "avx512bw")]
9366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9367#[rustc_legacy_const_generics(4)]
9368#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9369pub unsafe fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
9370    src: __m512i,
9371    k: __mmask32,
9372    a: __m512i,
9373    b: __m512i,
9374) -> __m512i {
9375    static_assert_uimm_bits!(IMM8, 8);
9376    let a = a.as_u8x64();
9377    let b = b.as_u8x64();
9378    let r = vdbpsadbw(a, b, IMM8);
9379    transmute(simd_select_bitmask(k, r, src.as_u16x32()))
9380}
9381
9382/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9383///
9384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_dbsad_epu8&expand=2116)
9385#[inline]
9386#[target_feature(enable = "avx512bw")]
9387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9388#[rustc_legacy_const_generics(3)]
9389#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9390pub unsafe fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(
9391    k: __mmask32,
9392    a: __m512i,
9393    b: __m512i,
9394) -> __m512i {
9395    static_assert_uimm_bits!(IMM8, 8);
9396    let a = a.as_u8x64();
9397    let b = b.as_u8x64();
9398    let r = vdbpsadbw(a, b, IMM8);
9399    transmute(simd_select_bitmask(k, r, u16x32::ZERO))
9400}
9401
9402/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9403///
9404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dbsad_epu8&expand=2111)
9405#[inline]
9406#[target_feature(enable = "avx512bw,avx512vl")]
9407#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9408#[rustc_legacy_const_generics(2)]
9409#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9410pub unsafe fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
9411    static_assert_uimm_bits!(IMM8, 8);
9412    let a = a.as_u8x32();
9413    let b = b.as_u8x32();
9414    let r = vdbpsadbw256(a, b, IMM8);
9415    transmute(r)
9416}
9417
9418/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9419///
9420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_dbsad_epu8&expand=2112)
9421#[inline]
9422#[target_feature(enable = "avx512bw,avx512vl")]
9423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9424#[rustc_legacy_const_generics(4)]
9425#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9426pub unsafe fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
9427    src: __m256i,
9428    k: __mmask16,
9429    a: __m256i,
9430    b: __m256i,
9431) -> __m256i {
9432    static_assert_uimm_bits!(IMM8, 8);
9433    let a = a.as_u8x32();
9434    let b = b.as_u8x32();
9435    let r = vdbpsadbw256(a, b, IMM8);
9436    transmute(simd_select_bitmask(k, r, src.as_u16x16()))
9437}
9438
9439/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9440///
9441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_dbsad_epu8&expand=2113)
9442#[inline]
9443#[target_feature(enable = "avx512bw,avx512vl")]
9444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9445#[rustc_legacy_const_generics(3)]
9446#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9447pub unsafe fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(
9448    k: __mmask16,
9449    a: __m256i,
9450    b: __m256i,
9451) -> __m256i {
9452    static_assert_uimm_bits!(IMM8, 8);
9453    let a = a.as_u8x32();
9454    let b = b.as_u8x32();
9455    let r = vdbpsadbw256(a, b, IMM8);
9456    transmute(simd_select_bitmask(k, r, u16x16::ZERO))
9457}
9458
9459/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9460///
9461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dbsad_epu8&expand=2108)
9462#[inline]
9463#[target_feature(enable = "avx512bw,avx512vl")]
9464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9465#[rustc_legacy_const_generics(2)]
9466#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9467pub unsafe fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
9468    static_assert_uimm_bits!(IMM8, 8);
9469    let a = a.as_u8x16();
9470    let b = b.as_u8x16();
9471    let r = vdbpsadbw128(a, b, IMM8);
9472    transmute(r)
9473}
9474
9475/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_dbsad_epu8&expand=2109)
9478#[inline]
9479#[target_feature(enable = "avx512bw,avx512vl")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[rustc_legacy_const_generics(4)]
9482#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9483pub unsafe fn _mm_mask_dbsad_epu8<const IMM8: i32>(
9484    src: __m128i,
9485    k: __mmask8,
9486    a: __m128i,
9487    b: __m128i,
9488) -> __m128i {
9489    static_assert_uimm_bits!(IMM8, 8);
9490    let a = a.as_u8x16();
9491    let b = b.as_u8x16();
9492    let r = vdbpsadbw128(a, b, IMM8);
9493    transmute(simd_select_bitmask(k, r, src.as_u16x8()))
9494}
9495
9496/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
9497///
9498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_dbsad_epu8&expand=2110)
9499#[inline]
9500#[target_feature(enable = "avx512bw,avx512vl")]
9501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9502#[rustc_legacy_const_generics(3)]
9503#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
9504pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>(
9505    k: __mmask8,
9506    a: __m128i,
9507    b: __m128i,
9508) -> __m128i {
9509    static_assert_uimm_bits!(IMM8, 8);
9510    let a = a.as_u8x16();
9511    let b = b.as_u8x16();
9512    let r = vdbpsadbw128(a, b, IMM8);
9513    transmute(simd_select_bitmask(k, r, u16x8::ZERO))
9514}
9515
9516/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9517///
9518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi16_mask&expand=3873)
9519#[inline]
9520#[target_feature(enable = "avx512bw")]
9521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9522#[cfg_attr(test, assert_instr(vpmovw2m))]
9523pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
9524    let filter = _mm512_set1_epi16(1 << 15);
9525    let a = _mm512_and_si512(a, filter);
9526    _mm512_cmpeq_epi16_mask(a, filter)
9527}
9528
9529/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9530///
9531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi16_mask&expand=3872)
9532#[inline]
9533#[target_feature(enable = "avx512bw,avx512vl")]
9534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9535#[cfg_attr(test, assert_instr(vpmovw2m))]
9536pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
9537    let filter = _mm256_set1_epi16(1 << 15);
9538    let a = _mm256_and_si256(a, filter);
9539    _mm256_cmpeq_epi16_mask(a, filter)
9540}
9541
9542/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
9543///
9544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi16_mask&expand=3871)
9545#[inline]
9546#[target_feature(enable = "avx512bw,avx512vl")]
9547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9548#[cfg_attr(test, assert_instr(vpmovw2m))]
9549pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
9550    let filter = _mm_set1_epi16(1 << 15);
9551    let a = _mm_and_si128(a, filter);
9552    _mm_cmpeq_epi16_mask(a, filter)
9553}
9554
9555/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
9556///
9557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi8_mask&expand=3883)
9558#[inline]
9559#[target_feature(enable = "avx512bw")]
9560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9561#[cfg_attr(test, assert_instr(vpmovb2m))]
9562pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
9563    let filter = _mm512_set1_epi8(1 << 7);
9564    let a = _mm512_and_si512(a, filter);
9565    _mm512_cmpeq_epi8_mask(a, filter)
9566}
9567
9568/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
9569///
9570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi8_mask&expand=3882)
9571#[inline]
9572#[target_feature(enable = "avx512bw,avx512vl")]
9573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9574#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
9575                                           // using vpmovb2m plus converting the mask register to a standard register.
9576pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
9577    let filter = _mm256_set1_epi8(1 << 7);
9578    let a = _mm256_and_si256(a, filter);
9579    _mm256_cmpeq_epi8_mask(a, filter)
9580}
9581
9582/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
9583///
9584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi8_mask&expand=3881)
9585#[inline]
9586#[target_feature(enable = "avx512bw,avx512vl")]
9587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9588#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
9589                                           // using vpmovb2m plus converting the mask register to a standard register.
9590pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
9591    let filter = _mm_set1_epi8(1 << 7);
9592    let a = _mm_and_si128(a, filter);
9593    _mm_cmpeq_epi8_mask(a, filter)
9594}
9595
9596/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
9597///
9598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi16&expand=3886)
9599#[inline]
9600#[target_feature(enable = "avx512bw")]
9601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9602#[cfg_attr(test, assert_instr(vpmovm2w))]
9603pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
9604    let one = _mm512_set1_epi16(
9605        1 << 15
9606            | 1 << 14
9607            | 1 << 13
9608            | 1 << 12
9609            | 1 << 11
9610            | 1 << 10
9611            | 1 << 9
9612            | 1 << 8
9613            | 1 << 7
9614            | 1 << 6
9615            | 1 << 5
9616            | 1 << 4
9617            | 1 << 3
9618            | 1 << 2
9619            | 1 << 1
9620            | 1 << 0,
9621    )
9622    .as_i16x32();
9623    transmute(simd_select_bitmask(k, one, i16x32::ZERO))
9624}
9625
9626/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
9627///
9628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi16&expand=3885)
9629#[inline]
9630#[target_feature(enable = "avx512bw,avx512vl")]
9631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9632#[cfg_attr(test, assert_instr(vpmovm2w))]
9633pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
9634    let one = _mm256_set1_epi16(
9635        1 << 15
9636            | 1 << 14
9637            | 1 << 13
9638            | 1 << 12
9639            | 1 << 11
9640            | 1 << 10
9641            | 1 << 9
9642            | 1 << 8
9643            | 1 << 7
9644            | 1 << 6
9645            | 1 << 5
9646            | 1 << 4
9647            | 1 << 3
9648            | 1 << 2
9649            | 1 << 1
9650            | 1 << 0,
9651    )
9652    .as_i16x16();
9653    transmute(simd_select_bitmask(k, one, i16x16::ZERO))
9654}
9655
9656/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
9657///
9658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi16&expand=3884)
9659#[inline]
9660#[target_feature(enable = "avx512bw,avx512vl")]
9661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9662#[cfg_attr(test, assert_instr(vpmovm2w))]
9663pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i {
9664    let one = _mm_set1_epi16(
9665        1 << 15
9666            | 1 << 14
9667            | 1 << 13
9668            | 1 << 12
9669            | 1 << 11
9670            | 1 << 10
9671            | 1 << 9
9672            | 1 << 8
9673            | 1 << 7
9674            | 1 << 6
9675            | 1 << 5
9676            | 1 << 4
9677            | 1 << 3
9678            | 1 << 2
9679            | 1 << 1
9680            | 1 << 0,
9681    )
9682    .as_i16x8();
9683    transmute(simd_select_bitmask(k, one, i16x8::ZERO))
9684}
9685
9686/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
9687///
9688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi8&expand=3895)
9689#[inline]
9690#[target_feature(enable = "avx512bw")]
9691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9692#[cfg_attr(test, assert_instr(vpmovm2b))]
9693pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
9694    let one =
9695        _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
9696            .as_i8x64();
9697    transmute(simd_select_bitmask(k, one, i8x64::ZERO))
9698}
9699
9700/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
9701///
9702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi8&expand=3894)
9703#[inline]
9704#[target_feature(enable = "avx512bw,avx512vl")]
9705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9706#[cfg_attr(test, assert_instr(vpmovm2b))]
9707pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
9708    let one =
9709        _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
9710            .as_i8x32();
9711    transmute(simd_select_bitmask(k, one, i8x32::ZERO))
9712}
9713
9714/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
9715///
9716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi8&expand=3893)
9717#[inline]
9718#[target_feature(enable = "avx512bw,avx512vl")]
9719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9720#[cfg_attr(test, assert_instr(vpmovm2b))]
9721pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
9722    let one = _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
9723        .as_i8x16();
9724    transmute(simd_select_bitmask(k, one, i8x16::ZERO))
9725}
9726
9727/// Convert 32-bit mask a into an integer value, and store the result in dst.
9728///
9729/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#_cvtmask32_u32)
9730#[inline]
9731#[target_feature(enable = "avx512bw")]
9732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9733pub unsafe fn _cvtmask32_u32(a: __mmask32) -> u32 {
9734    a
9735}
9736
9737/// Convert integer value a into an 32-bit mask, and store the result in k.
9738///
9739/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask32)
9740#[inline]
9741#[target_feature(enable = "avx512bw")]
9742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743pub unsafe fn _cvtu32_mask32(a: u32) -> __mmask32 {
9744    a
9745}
9746
9747/// Add 32-bit masks in a and b, and store the result in k.
9748///
9749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask32&expand=3207)
9750#[inline]
9751#[target_feature(enable = "avx512bw")]
9752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9753pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
9754    a + b
9755}
9756
9757/// Add 64-bit masks in a and b, and store the result in k.
9758///
9759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask64&expand=3208)
9760#[inline]
9761#[target_feature(enable = "avx512bw")]
9762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9763pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
9764    a + b
9765}
9766
9767/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
9768///
9769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask32&expand=3213)
9770#[inline]
9771#[target_feature(enable = "avx512bw")]
9772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9773pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
9774    a & b
9775}
9776
9777/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
9778///
9779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask64&expand=3214)
9780#[inline]
9781#[target_feature(enable = "avx512bw")]
9782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9783pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
9784    a & b
9785}
9786
9787/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
9788///
9789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask32&expand=3234)
9790#[inline]
9791#[target_feature(enable = "avx512bw")]
9792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9793pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
9794    !a
9795}
9796
9797/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
9798///
9799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask64&expand=3235)
9800#[inline]
9801#[target_feature(enable = "avx512bw")]
9802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9803pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
9804    !a
9805}
9806
9807/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
9808///
9809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask32&expand=3219)
9810#[inline]
9811#[target_feature(enable = "avx512bw")]
9812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9813pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
9814    _knot_mask32(a) & b
9815}
9816
9817/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
9818///
9819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask64&expand=3220)
9820#[inline]
9821#[target_feature(enable = "avx512bw")]
9822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9823pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
9824    _knot_mask64(a) & b
9825}
9826
9827/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
9828///
9829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask32&expand=3240)
9830#[inline]
9831#[target_feature(enable = "avx512bw")]
9832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9833pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
9834    a | b
9835}
9836
9837/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
9838///
9839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask64&expand=3241)
9840#[inline]
9841#[target_feature(enable = "avx512bw")]
9842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9843pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
9844    a | b
9845}
9846
9847/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
9848///
9849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask32&expand=3292)
9850#[inline]
9851#[target_feature(enable = "avx512bw")]
9852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9853pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
9854    a ^ b
9855}
9856
9857/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
9858///
9859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask64&expand=3293)
9860#[inline]
9861#[target_feature(enable = "avx512bw")]
9862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9863pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
9864    a ^ b
9865}
9866
9867/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
9868///
9869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask32&expand=3286)
9870#[inline]
9871#[target_feature(enable = "avx512bw")]
9872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9873pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
9874    _knot_mask32(a ^ b)
9875}
9876
9877/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
9878///
9879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask64&expand=3287)
9880#[inline]
9881#[target_feature(enable = "avx512bw")]
9882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9883pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
9884    _knot_mask64(a ^ b)
9885}
9886
9887/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
9888/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
9889///
9890/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask32_u8)
9891#[inline]
9892#[target_feature(enable = "avx512bw")]
9893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9894pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
9895    let tmp = _kor_mask32(a, b);
9896    *all_ones = (tmp == 0xffffffff) as u8;
9897    (tmp == 0) as u8
9898}
9899
9900/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
9901/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
9902///
9903/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask64_u8)
9904#[inline]
9905#[target_feature(enable = "avx512bw")]
9906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9907pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
9908    let tmp = _kor_mask64(a, b);
9909    *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
9910    (tmp == 0) as u8
9911}
9912
9913/// Compute the bitwise OR of 32-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
9914/// store 0 in dst.
9915///
9916/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask32_u8)
9917#[inline]
9918#[target_feature(enable = "avx512bw")]
9919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9920pub unsafe fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
9921    (_kor_mask32(a, b) == 0xffffffff) as u8
9922}
9923
9924/// Compute the bitwise OR of 64-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
9925/// store 0 in dst.
9926///
9927/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask64_u8)
9928#[inline]
9929#[target_feature(enable = "avx512bw")]
9930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9931pub unsafe fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
9932    (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
9933}
9934
9935/// Compute the bitwise OR of 32-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
9936/// store 0 in dst.
9937///
9938/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask32_u8)
9939#[inline]
9940#[target_feature(enable = "avx512bw")]
9941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9942pub unsafe fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
9943    (_kor_mask32(a, b) == 0) as u8
9944}
9945
9946/// Compute the bitwise OR of 64-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
9947/// store 0 in dst.
9948///
9949/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask64_u8)
9950#[inline]
9951#[target_feature(enable = "avx512bw")]
9952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9953pub unsafe fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
9954    (_kor_mask64(a, b) == 0) as u8
9955}
9956
9957/// Shift the bits of 32-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
9958///
9959/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask32)
9960#[inline]
9961#[target_feature(enable = "avx512bw")]
9962#[rustc_legacy_const_generics(1)]
9963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9964pub unsafe fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
9965    a << COUNT
9966}
9967
9968/// Shift the bits of 64-bit mask a left by count while shifting in zeros, and store the least significant 32 bits of the result in k.
9969///
9970/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask64)
9971#[inline]
9972#[target_feature(enable = "avx512bw")]
9973#[rustc_legacy_const_generics(1)]
9974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9975pub unsafe fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
9976    a << COUNT
9977}
9978
9979/// Shift the bits of 32-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
9980///
9981/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask32)
9982#[inline]
9983#[target_feature(enable = "avx512bw")]
9984#[rustc_legacy_const_generics(1)]
9985#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9986pub unsafe fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
9987    a >> COUNT
9988}
9989
9990/// Shift the bits of 64-bit mask a right by count while shifting in zeros, and store the least significant 32 bits of the result in k.
9991///
9992/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask64)
9993#[inline]
9994#[target_feature(enable = "avx512bw")]
9995#[rustc_legacy_const_generics(1)]
9996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9997pub unsafe fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
9998    a >> COUNT
9999}
10000
10001/// Compute the bitwise AND of 32-bit masks a and b, and if the result is all zeros, store 1 in dst,
10002/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10003/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10004///
10005/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask32_u8)
10006#[inline]
10007#[target_feature(enable = "avx512bw")]
10008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
10010    *and_not = (_kandn_mask32(a, b) == 0) as u8;
10011    (_kand_mask32(a, b) == 0) as u8
10012}
10013
10014/// Compute the bitwise AND of 64-bit masks a and b, and if the result is all zeros, store 1 in dst,
10015/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
10016/// zeros, store 1 in and_not, otherwise store 0 in and_not.
10017///
10018/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask64_u8)
10019#[inline]
10020#[target_feature(enable = "avx512bw")]
10021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10022pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
10023    *and_not = (_kandn_mask64(a, b) == 0) as u8;
10024    (_kand_mask64(a, b) == 0) as u8
10025}
10026
10027/// Compute the bitwise NOT of 32-bit mask a and then AND with 16-bit mask b, if the result is all
10028/// zeros, store 1 in dst, otherwise store 0 in dst.
10029///
10030/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask32_u8)
10031#[inline]
10032#[target_feature(enable = "avx512bw")]
10033#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10034pub unsafe fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10035    (_kandn_mask32(a, b) == 0) as u8
10036}
10037
10038/// Compute the bitwise NOT of 64-bit mask a and then AND with 8-bit mask b, if the result is all
10039/// zeros, store 1 in dst, otherwise store 0 in dst.
10040///
10041/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask64_u8)
10042#[inline]
10043#[target_feature(enable = "avx512bw")]
10044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10045pub unsafe fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10046    (_kandn_mask64(a, b) == 0) as u8
10047}
10048
10049/// Compute the bitwise AND of 32-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10050/// store 0 in dst.
10051///
10052/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask32_u8)
10053#[inline]
10054#[target_feature(enable = "avx512bw")]
10055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10056pub unsafe fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
10057    (_kand_mask32(a, b) == 0) as u8
10058}
10059
10060/// Compute the bitwise AND of 64-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
10061/// store 0 in dst.
10062///
10063/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask64_u8)
10064#[inline]
10065#[target_feature(enable = "avx512bw")]
10066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10067pub unsafe fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
10068    (_kand_mask64(a, b) == 0) as u8
10069}
10070
10071/// Unpack and interleave 16 bits from masks a and b, and store the 32-bit result in k.
10072///
10073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackw)
10074#[inline]
10075#[target_feature(enable = "avx512bw")]
10076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10077#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
10078pub unsafe fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
10079    ((a & 0xffff) << 16) | (b & 0xffff)
10080}
10081
10082/// Unpack and interleave 32 bits from masks a and b, and store the 64-bit result in k.
10083///
10084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackd)
10085#[inline]
10086#[target_feature(enable = "avx512bw")]
10087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10088#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
10089pub unsafe fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
10090    ((a & 0xffffffff) << 32) | (b & 0xffffffff)
10091}
10092
10093/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10094///
10095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi8&expand=1407)
10096#[inline]
10097#[target_feature(enable = "avx512bw")]
10098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10099#[cfg_attr(test, assert_instr(vpmovwb))]
10100pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
10101    let a = a.as_i16x32();
10102    transmute::<i8x32, _>(simd_cast(a))
10103}
10104
10105/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10106///
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi8&expand=1408)
10108#[inline]
10109#[target_feature(enable = "avx512bw")]
10110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111#[cfg_attr(test, assert_instr(vpmovwb))]
10112pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10113    let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10114    transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
10115}
10116
10117/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10118///
10119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
10120#[inline]
10121#[target_feature(enable = "avx512bw")]
10122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10123#[cfg_attr(test, assert_instr(vpmovwb))]
10124pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10125    let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
10126    transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
10127}
10128
10129/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10130///
10131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi8&expand=1404)
10132#[inline]
10133#[target_feature(enable = "avx512bw,avx512vl")]
10134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10135#[cfg_attr(test, assert_instr(vpmovwb))]
10136pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
10137    let a = a.as_i16x16();
10138    transmute::<i8x16, _>(simd_cast(a))
10139}
10140
10141/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10142///
10143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi8&expand=1405)
10144#[inline]
10145#[target_feature(enable = "avx512bw,avx512vl")]
10146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10147#[cfg_attr(test, assert_instr(vpmovwb))]
10148pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10149    let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10150    transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10151}
10152
10153/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10154///
10155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
10156#[inline]
10157#[target_feature(enable = "avx512bw,avx512vl")]
10158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10159#[cfg_attr(test, assert_instr(vpmovwb))]
10160pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10161    let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
10162    transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10163}
10164
10165/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
10166///
10167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi8&expand=1401)
10168#[inline]
10169#[target_feature(enable = "avx512bw,avx512vl")]
10170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10171#[cfg_attr(test, assert_instr(vpmovwb))]
10172pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
10173    let a = a.as_i16x8();
10174    let v256: i16x16 = simd_shuffle!(
10175        a,
10176        i16x8::ZERO,
10177        [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]
10178    );
10179    transmute::<i8x16, _>(simd_cast(v256))
10180}
10181
10182/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10183///
10184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi8&expand=1402)
10185#[inline]
10186#[target_feature(enable = "avx512bw,avx512vl")]
10187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10188#[cfg_attr(test, assert_instr(vpmovwb))]
10189pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10190    let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10191    let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10192    transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
10193}
10194
10195/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10196///
10197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi8&expand=1403)
10198#[inline]
10199#[target_feature(enable = "avx512bw,avx512vl")]
10200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10201#[cfg_attr(test, assert_instr(vpmovwb))]
10202pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10203    let convert = _mm_cvtepi16_epi8(a).as_i8x16();
10204    let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
10205    transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
10206}
10207
10208/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10209///
10210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi16_epi8&expand=1807)
10211#[inline]
10212#[target_feature(enable = "avx512bw")]
10213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10214#[cfg_attr(test, assert_instr(vpmovswb))]
10215pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
10216    transmute(vpmovswb(
10217        a.as_i16x32(),
10218        i8x32::ZERO,
10219        0b11111111_11111111_11111111_11111111,
10220    ))
10221}
10222
10223/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10224///
10225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
10226#[inline]
10227#[target_feature(enable = "avx512bw")]
10228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10229#[cfg_attr(test, assert_instr(vpmovswb))]
10230pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10231    transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k))
10232}
10233
10234/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10235///
10236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
10237#[inline]
10238#[target_feature(enable = "avx512bw")]
10239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10240#[cfg_attr(test, assert_instr(vpmovswb))]
10241pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10242    transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k))
10243}
10244
10245/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10246///
10247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi16_epi8&expand=1804)
10248#[inline]
10249#[target_feature(enable = "avx512bw,avx512vl")]
10250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10251#[cfg_attr(test, assert_instr(vpmovswb))]
10252pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10253    transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111))
10254}
10255
10256/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10257///
10258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
10259#[inline]
10260#[target_feature(enable = "avx512bw,avx512vl")]
10261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10262#[cfg_attr(test, assert_instr(vpmovswb))]
10263pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10264    transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k))
10265}
10266
10267/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10268///
10269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
10270#[inline]
10271#[target_feature(enable = "avx512bw,avx512vl")]
10272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10273#[cfg_attr(test, assert_instr(vpmovswb))]
10274pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10275    transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k))
10276}
10277
10278/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
10279///
10280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi16_epi8&expand=1801)
10281#[inline]
10282#[target_feature(enable = "avx512bw,avx512vl")]
10283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10284#[cfg_attr(test, assert_instr(vpmovswb))]
10285pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
10286    transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111))
10287}
10288
10289/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10290///
10291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_epi8&expand=1802)
10292#[inline]
10293#[target_feature(enable = "avx512bw,avx512vl")]
10294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10295#[cfg_attr(test, assert_instr(vpmovswb))]
10296pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10297    transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k))
10298}
10299
10300/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10301///
10302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
10303#[inline]
10304#[target_feature(enable = "avx512bw,avx512vl")]
10305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10306#[cfg_attr(test, assert_instr(vpmovswb))]
10307pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10308    transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k))
10309}
10310
10311/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10312///
10313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi16_epi8&expand=2042)
10314#[inline]
10315#[target_feature(enable = "avx512bw")]
10316#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10317#[cfg_attr(test, assert_instr(vpmovuswb))]
10318pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
10319    transmute(vpmovuswb(
10320        a.as_u16x32(),
10321        u8x32::ZERO,
10322        0b11111111_11111111_11111111_11111111,
10323    ))
10324}
10325
10326/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10327///
10328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
10329#[inline]
10330#[target_feature(enable = "avx512bw")]
10331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10332#[cfg_attr(test, assert_instr(vpmovuswb))]
10333pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10334    transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k))
10335}
10336
10337/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10338///
10339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
10340#[inline]
10341#[target_feature(enable = "avx512bw")]
10342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10343#[cfg_attr(test, assert_instr(vpmovuswb))]
10344pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10345    transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k))
10346}
10347
10348/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10349///
10350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi16_epi8&expand=2039)
10351#[inline]
10352#[target_feature(enable = "avx512bw,avx512vl")]
10353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10354#[cfg_attr(test, assert_instr(vpmovuswb))]
10355pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
10356    transmute(vpmovuswb256(
10357        a.as_u16x16(),
10358        u8x16::ZERO,
10359        0b11111111_11111111,
10360    ))
10361}
10362
10363/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10364///
10365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
10366#[inline]
10367#[target_feature(enable = "avx512bw,avx512vl")]
10368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10369#[cfg_attr(test, assert_instr(vpmovuswb))]
10370pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10371    transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k))
10372}
10373
10374/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10375///
10376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
10377#[inline]
10378#[target_feature(enable = "avx512bw,avx512vl")]
10379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10380#[cfg_attr(test, assert_instr(vpmovuswb))]
10381pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10382    transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k))
10383}
10384
10385/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
10386///
10387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi16_epi8&expand=2036)
10388#[inline]
10389#[target_feature(enable = "avx512bw,avx512vl")]
10390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10391#[cfg_attr(test, assert_instr(vpmovuswb))]
10392pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
10393    transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111))
10394}
10395
10396/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10397///
10398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_epi8&expand=2037)
10399#[inline]
10400#[target_feature(enable = "avx512bw,avx512vl")]
10401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10402#[cfg_attr(test, assert_instr(vpmovuswb))]
10403pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10404    transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k))
10405}
10406
10407/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10408///
10409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
10410#[inline]
10411#[target_feature(enable = "avx512bw,avx512vl")]
10412#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10413#[cfg_attr(test, assert_instr(vpmovuswb))]
10414pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
10415    transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k))
10416}
10417
10418/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10419///
10420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi16&expand=1526)
10421#[inline]
10422#[target_feature(enable = "avx512bw")]
10423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10424#[cfg_attr(test, assert_instr(vpmovsxbw))]
10425pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
10426    let a = a.as_i8x32();
10427    transmute::<i16x32, _>(simd_cast(a))
10428}
10429
10430/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10431///
10432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi16&expand=1527)
10433#[inline]
10434#[target_feature(enable = "avx512bw")]
10435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10436#[cfg_attr(test, assert_instr(vpmovsxbw))]
10437pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10438    let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10439    transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
10440}
10441
10442/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10443///
10444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
10445#[inline]
10446#[target_feature(enable = "avx512bw")]
10447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10448#[cfg_attr(test, assert_instr(vpmovsxbw))]
10449pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
10450    let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
10451    transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
10452}
10453
10454/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10455///
10456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi16&expand=1524)
10457#[inline]
10458#[target_feature(enable = "avx512bw,avx512vl")]
10459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10460#[cfg_attr(test, assert_instr(vpmovsxbw))]
10461pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
10462    let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10463    transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
10464}
10465
10466/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10467///
10468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
10469#[inline]
10470#[target_feature(enable = "avx512bw,avx512vl")]
10471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10472#[cfg_attr(test, assert_instr(vpmovsxbw))]
10473pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
10474    let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
10475    transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
10476}
10477
10478/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10479///
10480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi16&expand=1521)
10481#[inline]
10482#[target_feature(enable = "avx512bw,avx512vl")]
10483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10484#[cfg_attr(test, assert_instr(vpmovsxbw))]
10485pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10486    let convert = _mm_cvtepi8_epi16(a).as_i16x8();
10487    transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
10488}
10489
10490/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10491///
10492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi16&expand=1522)
10493#[inline]
10494#[target_feature(enable = "avx512bw,avx512vl")]
10495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10496#[cfg_attr(test, assert_instr(vpmovsxbw))]
10497pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
10498    let convert = _mm_cvtepi8_epi16(a).as_i16x8();
10499    transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
10500}
10501
10502/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
10503///
10504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi16&expand=1612)
10505#[inline]
10506#[target_feature(enable = "avx512bw")]
10507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10508#[cfg_attr(test, assert_instr(vpmovzxbw))]
10509pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
10510    let a = a.as_u8x32();
10511    transmute::<i16x32, _>(simd_cast(a))
10512}
10513
10514/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10515///
10516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi16&expand=1613)
10517#[inline]
10518#[target_feature(enable = "avx512bw")]
10519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10520#[cfg_attr(test, assert_instr(vpmovzxbw))]
10521pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
10522    let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
10523    transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
10524}
10525
10526/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10527///
10528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
10529#[inline]
10530#[target_feature(enable = "avx512bw")]
10531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10532#[cfg_attr(test, assert_instr(vpmovzxbw))]
10533pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
10534    let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
10535    transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
10536}
10537
10538/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10539///
10540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi16&expand=1610)
10541#[inline]
10542#[target_feature(enable = "avx512bw,avx512vl")]
10543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10544#[cfg_attr(test, assert_instr(vpmovzxbw))]
10545pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
10546    let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
10547    transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
10548}
10549
10550/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10551///
10552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
10553#[inline]
10554#[target_feature(enable = "avx512bw,avx512vl")]
10555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10556#[cfg_attr(test, assert_instr(vpmovzxbw))]
10557pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
10558    let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
10559    transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
10560}
10561
10562/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10563///
10564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi16&expand=1607)
10565#[inline]
10566#[target_feature(enable = "avx512bw,avx512vl")]
10567#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10568#[cfg_attr(test, assert_instr(vpmovzxbw))]
10569pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
10570    let convert = _mm_cvtepu8_epi16(a).as_i16x8();
10571    transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
10572}
10573
10574/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10575///
10576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi16&expand=1608)
10577#[inline]
10578#[target_feature(enable = "avx512bw,avx512vl")]
10579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10580#[cfg_attr(test, assert_instr(vpmovzxbw))]
10581pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
10582    let convert = _mm_cvtepu8_epi16(a).as_i16x8();
10583    transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
10584}
10585
10586/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
10587///
10588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bslli_epi128&expand=591)
10589#[inline]
10590#[target_feature(enable = "avx512bw")]
10591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10592#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
10593#[rustc_legacy_const_generics(1)]
10594pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
10595    static_assert_uimm_bits!(IMM8, 8);
10596    const fn mask(shift: i32, i: u32) -> u32 {
10597        let shift = shift as u32 & 0xff;
10598        if shift > 15 || i % 16 < shift {
10599            0
10600        } else {
10601            64 + (i - shift)
10602        }
10603    }
10604    let a = a.as_i8x64();
10605    let zero = i8x64::ZERO;
10606    let r: i8x64 = simd_shuffle!(
10607        zero,
10608        a,
10609        [
10610            mask(IMM8, 0),
10611            mask(IMM8, 1),
10612            mask(IMM8, 2),
10613            mask(IMM8, 3),
10614            mask(IMM8, 4),
10615            mask(IMM8, 5),
10616            mask(IMM8, 6),
10617            mask(IMM8, 7),
10618            mask(IMM8, 8),
10619            mask(IMM8, 9),
10620            mask(IMM8, 10),
10621            mask(IMM8, 11),
10622            mask(IMM8, 12),
10623            mask(IMM8, 13),
10624            mask(IMM8, 14),
10625            mask(IMM8, 15),
10626            mask(IMM8, 16),
10627            mask(IMM8, 17),
10628            mask(IMM8, 18),
10629            mask(IMM8, 19),
10630            mask(IMM8, 20),
10631            mask(IMM8, 21),
10632            mask(IMM8, 22),
10633            mask(IMM8, 23),
10634            mask(IMM8, 24),
10635            mask(IMM8, 25),
10636            mask(IMM8, 26),
10637            mask(IMM8, 27),
10638            mask(IMM8, 28),
10639            mask(IMM8, 29),
10640            mask(IMM8, 30),
10641            mask(IMM8, 31),
10642            mask(IMM8, 32),
10643            mask(IMM8, 33),
10644            mask(IMM8, 34),
10645            mask(IMM8, 35),
10646            mask(IMM8, 36),
10647            mask(IMM8, 37),
10648            mask(IMM8, 38),
10649            mask(IMM8, 39),
10650            mask(IMM8, 40),
10651            mask(IMM8, 41),
10652            mask(IMM8, 42),
10653            mask(IMM8, 43),
10654            mask(IMM8, 44),
10655            mask(IMM8, 45),
10656            mask(IMM8, 46),
10657            mask(IMM8, 47),
10658            mask(IMM8, 48),
10659            mask(IMM8, 49),
10660            mask(IMM8, 50),
10661            mask(IMM8, 51),
10662            mask(IMM8, 52),
10663            mask(IMM8, 53),
10664            mask(IMM8, 54),
10665            mask(IMM8, 55),
10666            mask(IMM8, 56),
10667            mask(IMM8, 57),
10668            mask(IMM8, 58),
10669            mask(IMM8, 59),
10670            mask(IMM8, 60),
10671            mask(IMM8, 61),
10672            mask(IMM8, 62),
10673            mask(IMM8, 63),
10674        ],
10675    );
10676    transmute(r)
10677}
10678
10679/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
10680///
10681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bsrli_epi128&expand=594)
10682#[inline]
10683#[target_feature(enable = "avx512bw")]
10684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10685#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
10686#[rustc_legacy_const_generics(1)]
10687pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
10688    static_assert_uimm_bits!(IMM8, 8);
10689    let a = a.as_i8x64();
10690    let zero = i8x64::ZERO;
10691    let r: i8x64 = match IMM8 % 16 {
10692        0 => simd_shuffle!(
10693            a,
10694            zero,
10695            [
10696                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
10697                23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10698                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
10699            ],
10700        ),
10701        1 => simd_shuffle!(
10702            a,
10703            zero,
10704            [
10705                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
10706                24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
10707                45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
10708            ],
10709        ),
10710        2 => simd_shuffle!(
10711            a,
10712            zero,
10713            [
10714                2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
10715                25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
10716                46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
10717            ],
10718        ),
10719        3 => simd_shuffle!(
10720            a,
10721            zero,
10722            [
10723                3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
10724                25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
10725                46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
10726                114,
10727            ],
10728        ),
10729        4 => simd_shuffle!(
10730            a,
10731            zero,
10732            [
10733                4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
10734                26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
10735                47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
10736                115,
10737            ],
10738        ),
10739        5 => simd_shuffle!(
10740            a,
10741            zero,
10742            [
10743                5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
10744                27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
10745                96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
10746                115, 116,
10747            ],
10748        ),
10749        6 => simd_shuffle!(
10750            a,
10751            zero,
10752            [
10753                6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
10754                28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
10755                97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
10756                116, 117,
10757            ],
10758        ),
10759        7 => simd_shuffle!(
10760            a,
10761            zero,
10762            [
10763                7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
10764                28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
10765                97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
10766                116, 117, 118,
10767            ],
10768        ),
10769        8 => simd_shuffle!(
10770            a,
10771            zero,
10772            [
10773                8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
10774                29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
10775                98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
10776                116, 117, 118, 119,
10777            ],
10778        ),
10779        9 => simd_shuffle!(
10780            a,
10781            zero,
10782            [
10783                9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
10784                30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
10785                99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
10786                117, 118, 119, 120,
10787            ],
10788        ),
10789        10 => simd_shuffle!(
10790            a,
10791            zero,
10792            [
10793                10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
10794                31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
10795                100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
10796                118, 119, 120, 121,
10797            ],
10798        ),
10799        11 => simd_shuffle!(
10800            a,
10801            zero,
10802            [
10803                11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
10804                80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
10805                100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
10806                117, 118, 119, 120, 121, 122,
10807            ],
10808        ),
10809        12 => simd_shuffle!(
10810            a,
10811            zero,
10812            [
10813                12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
10814                81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
10815                101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
10816                118, 119, 120, 121, 122, 123,
10817            ],
10818        ),
10819        13 => simd_shuffle!(
10820            a,
10821            zero,
10822            [
10823                13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
10824                82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
10825                102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
10826                119, 120, 121, 122, 123, 124,
10827            ],
10828        ),
10829        14 => simd_shuffle!(
10830            a,
10831            zero,
10832            [
10833                14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
10834                83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
10835                103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
10836                120, 121, 122, 123, 124, 125,
10837            ],
10838        ),
10839        15 => simd_shuffle!(
10840            a,
10841            zero,
10842            [
10843                15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
10844                84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
10845                104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
10846                121, 122, 123, 124, 125, 126,
10847            ],
10848        ),
10849        _ => zero,
10850    };
10851    transmute(r)
10852}
10853
10854/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
10855/// Unlike [`_mm_alignr_epi8`], [`_mm256_alignr_epi8`] functions, where the entire input vectors are concatenated to the temporary result,
10856/// this concatenation happens in 4 steps, where each step builds 32-byte temporary result.
10857///
10858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi8&expand=263)
10859#[inline]
10860#[target_feature(enable = "avx512bw")]
10861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10862#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
10863#[rustc_legacy_const_generics(2)]
10864pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
10865    // If palignr is shifting the pair of vectors more than the size of two
10866    // lanes, emit zero.
10867    if IMM8 >= 32 {
10868        return _mm512_setzero_si512();
10869    }
10870    // If palignr is shifting the pair of input vectors more than one lane,
10871    // but less than two lanes, convert to shifting in zeroes.
10872    let (a, b) = if IMM8 > 16 {
10873        (_mm512_setzero_si512(), a)
10874    } else {
10875        (a, b)
10876    };
10877    let a = a.as_i8x64();
10878    let b = b.as_i8x64();
10879
10880    if IMM8 == 16 {
10881        return transmute(a);
10882    }
10883
10884    let r: i8x64 = match IMM8 % 16 {
10885        0 => simd_shuffle!(
10886            b,
10887            a,
10888            [
10889                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
10890                23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
10891                44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
10892            ],
10893        ),
10894        1 => simd_shuffle!(
10895            b,
10896            a,
10897            [
10898                1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
10899                24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
10900                45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
10901            ],
10902        ),
10903        2 => simd_shuffle!(
10904            b,
10905            a,
10906            [
10907                2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
10908                25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
10909                46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
10910            ],
10911        ),
10912        3 => simd_shuffle!(
10913            b,
10914            a,
10915            [
10916                3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
10917                25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
10918                46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
10919                114,
10920            ],
10921        ),
10922        4 => simd_shuffle!(
10923            b,
10924            a,
10925            [
10926                4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
10927                26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
10928                47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
10929                115,
10930            ],
10931        ),
10932        5 => simd_shuffle!(
10933            b,
10934            a,
10935            [
10936                5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
10937                27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
10938                96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
10939                115, 116,
10940            ],
10941        ),
10942        6 => simd_shuffle!(
10943            b,
10944            a,
10945            [
10946                6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
10947                28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
10948                97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
10949                116, 117,
10950            ],
10951        ),
10952        7 => simd_shuffle!(
10953            b,
10954            a,
10955            [
10956                7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
10957                28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
10958                97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
10959                116, 117, 118,
10960            ],
10961        ),
10962        8 => simd_shuffle!(
10963            b,
10964            a,
10965            [
10966                8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
10967                29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
10968                98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
10969                116, 117, 118, 119,
10970            ],
10971        ),
10972        9 => simd_shuffle!(
10973            b,
10974            a,
10975            [
10976                9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
10977                30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
10978                99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
10979                117, 118, 119, 120,
10980            ],
10981        ),
10982        10 => simd_shuffle!(
10983            b,
10984            a,
10985            [
10986                10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
10987                31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
10988                100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
10989                118, 119, 120, 121,
10990            ],
10991        ),
10992        11 => simd_shuffle!(
10993            b,
10994            a,
10995            [
10996                11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
10997                80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
10998                100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
10999                117, 118, 119, 120, 121, 122,
11000            ],
11001        ),
11002        12 => simd_shuffle!(
11003            b,
11004            a,
11005            [
11006                12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
11007                81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
11008                101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
11009                118, 119, 120, 121, 122, 123,
11010            ],
11011        ),
11012        13 => simd_shuffle!(
11013            b,
11014            a,
11015            [
11016                13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
11017                82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
11018                102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
11019                119, 120, 121, 122, 123, 124,
11020            ],
11021        ),
11022        14 => simd_shuffle!(
11023            b,
11024            a,
11025            [
11026                14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
11027                83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
11028                103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
11029                120, 121, 122, 123, 124, 125,
11030            ],
11031        ),
11032        15 => simd_shuffle!(
11033            b,
11034            a,
11035            [
11036                15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
11037                84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
11038                104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
11039                121, 122, 123, 124, 125, 126,
11040            ],
11041        ),
11042        _ => unreachable_unchecked(),
11043    };
11044    transmute(r)
11045}
11046
11047/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11048///
11049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi8&expand=264)
11050#[inline]
11051#[target_feature(enable = "avx512bw")]
11052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11053#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11054#[rustc_legacy_const_generics(4)]
11055pub unsafe fn _mm512_mask_alignr_epi8<const IMM8: i32>(
11056    src: __m512i,
11057    k: __mmask64,
11058    a: __m512i,
11059    b: __m512i,
11060) -> __m512i {
11061    static_assert_uimm_bits!(IMM8, 8);
11062    let r = _mm512_alignr_epi8::<IMM8>(a, b);
11063    transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
11064}
11065
11066/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11067///
11068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi8&expand=265)
11069#[inline]
11070#[target_feature(enable = "avx512bw")]
11071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11072#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
11073#[rustc_legacy_const_generics(3)]
11074pub unsafe fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
11075    k: __mmask64,
11076    a: __m512i,
11077    b: __m512i,
11078) -> __m512i {
11079    static_assert_uimm_bits!(IMM8, 8);
11080    let r = _mm512_alignr_epi8::<IMM8>(a, b);
11081    transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO))
11082}
11083
11084/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11085///
11086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi8&expand=261)
11087#[inline]
11088#[target_feature(enable = "avx512bw,avx512vl")]
11089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11090#[rustc_legacy_const_generics(4)]
11091#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11092pub unsafe fn _mm256_mask_alignr_epi8<const IMM8: i32>(
11093    src: __m256i,
11094    k: __mmask32,
11095    a: __m256i,
11096    b: __m256i,
11097) -> __m256i {
11098    static_assert_uimm_bits!(IMM8, 8);
11099    let r = _mm256_alignr_epi8::<IMM8>(a, b);
11100    transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
11101}
11102
11103/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11104///
11105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi8&expand=262)
11106#[inline]
11107#[target_feature(enable = "avx512bw,avx512vl")]
11108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11109#[rustc_legacy_const_generics(3)]
11110#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11111pub unsafe fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
11112    k: __mmask32,
11113    a: __m256i,
11114    b: __m256i,
11115) -> __m256i {
11116    static_assert_uimm_bits!(IMM8, 8);
11117    let r = _mm256_alignr_epi8::<IMM8>(a, b);
11118    transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO))
11119}
11120
11121/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11122///
11123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi8&expand=258)
11124#[inline]
11125#[target_feature(enable = "avx512bw,avx512vl")]
11126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11127#[rustc_legacy_const_generics(4)]
11128#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11129pub unsafe fn _mm_mask_alignr_epi8<const IMM8: i32>(
11130    src: __m128i,
11131    k: __mmask16,
11132    a: __m128i,
11133    b: __m128i,
11134) -> __m128i {
11135    static_assert_uimm_bits!(IMM8, 8);
11136    let r = _mm_alignr_epi8::<IMM8>(a, b);
11137    transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
11138}
11139
11140/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11141///
11142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi8&expand=259)
11143#[inline]
11144#[target_feature(enable = "avx512bw,avx512vl")]
11145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11146#[rustc_legacy_const_generics(3)]
11147#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
11148pub unsafe fn _mm_maskz_alignr_epi8<const IMM8: i32>(
11149    k: __mmask16,
11150    a: __m128i,
11151    b: __m128i,
11152) -> __m128i {
11153    static_assert_uimm_bits!(IMM8, 8);
11154    let r = _mm_alignr_epi8::<IMM8>(a, b);
11155    transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO))
11156}
11157
11158/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11159///
11160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
11161#[inline]
11162#[target_feature(enable = "avx512bw")]
11163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11164#[cfg_attr(test, assert_instr(vpmovswb))]
11165pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11166    vpmovswbmem(mem_addr, a.as_i16x32(), k);
11167}
11168
11169/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11170///
11171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
11172#[inline]
11173#[target_feature(enable = "avx512bw,avx512vl")]
11174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11175#[cfg_attr(test, assert_instr(vpmovswb))]
11176pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11177    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
11178}
11179
11180/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
11183#[inline]
11184#[target_feature(enable = "avx512bw,avx512vl")]
11185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186#[cfg_attr(test, assert_instr(vpmovswb))]
11187pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11188    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
11189}
11190
11191/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11192///
11193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
11194#[inline]
11195#[target_feature(enable = "avx512bw")]
11196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11197#[cfg_attr(test, assert_instr(vpmovwb))]
11198pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11199    vpmovwbmem(mem_addr, a.as_i16x32(), k);
11200}
11201
11202/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11203///
11204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
11205#[inline]
11206#[target_feature(enable = "avx512bw,avx512vl")]
11207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11208#[cfg_attr(test, assert_instr(vpmovwb))]
11209pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11210    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11211}
11212
11213/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11214///
11215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
11216#[inline]
11217#[target_feature(enable = "avx512bw,avx512vl")]
11218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11219#[cfg_attr(test, assert_instr(vpmovwb))]
11220pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11221    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11222}
11223
11224/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11225///
11226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
11227#[inline]
11228#[target_feature(enable = "avx512bw")]
11229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11230#[cfg_attr(test, assert_instr(vpmovuswb))]
11231pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11232    vpmovuswbmem(mem_addr, a.as_i16x32(), k);
11233}
11234
11235/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11236///
11237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
11238#[inline]
11239#[target_feature(enable = "avx512bw,avx512vl")]
11240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11241#[cfg_attr(test, assert_instr(vpmovuswb))]
11242pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11243    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
11244}
11245
11246/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
11247///
11248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
11249#[inline]
11250#[target_feature(enable = "avx512bw,avx512vl")]
11251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11252#[cfg_attr(test, assert_instr(vpmovuswb))]
11253pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11254    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
11255}
11256
11257#[allow(improper_ctypes)]
11258extern "C" {
11259    #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
11260    fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
11261
11262    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
11263    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
11264    #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11265    fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
11266
11267    #[link_name = "llvm.x86.avx512.packssdw.512"]
11268    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
11269    #[link_name = "llvm.x86.avx512.packsswb.512"]
11270    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
11271    #[link_name = "llvm.x86.avx512.packusdw.512"]
11272    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
11273    #[link_name = "llvm.x86.avx512.packuswb.512"]
11274    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
11275
11276    #[link_name = "llvm.x86.avx512.psll.w.512"]
11277    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
11278
11279    #[link_name = "llvm.x86.avx512.psllv.w.512"]
11280    fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
11281    #[link_name = "llvm.x86.avx512.psllv.w.256"]
11282    fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
11283    #[link_name = "llvm.x86.avx512.psllv.w.128"]
11284    fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
11285
11286    #[link_name = "llvm.x86.avx512.psrl.w.512"]
11287    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
11288
11289    #[link_name = "llvm.x86.avx512.psrlv.w.512"]
11290    fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
11291    #[link_name = "llvm.x86.avx512.psrlv.w.256"]
11292    fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
11293    #[link_name = "llvm.x86.avx512.psrlv.w.128"]
11294    fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
11295
11296    #[link_name = "llvm.x86.avx512.psra.w.512"]
11297    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
11298
11299    #[link_name = "llvm.x86.avx512.psrav.w.512"]
11300    fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
11301    #[link_name = "llvm.x86.avx512.psrav.w.256"]
11302    fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
11303    #[link_name = "llvm.x86.avx512.psrav.w.128"]
11304    fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
11305
11306    #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
11307    fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
11308    #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
11309    fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
11310    #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
11311    fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
11312
11313    #[link_name = "llvm.x86.avx512.permvar.hi.512"]
11314    fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
11315    #[link_name = "llvm.x86.avx512.permvar.hi.256"]
11316    fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
11317    #[link_name = "llvm.x86.avx512.permvar.hi.128"]
11318    fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
11319
11320    #[link_name = "llvm.x86.avx512.pshuf.b.512"]
11321    fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
11322
11323    #[link_name = "llvm.x86.avx512.psad.bw.512"]
11324    fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
11325
11326    #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
11327    fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
11328    #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
11329    fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
11330    #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
11331    fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
11332
11333    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11334    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11335    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11336    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
11337    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
11338    fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
11339
11340    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11341    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11342    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11343    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
11344    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
11345    fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
11346
11347    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
11348    fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11349    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
11350    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11351    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
11352    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11353
11354    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11355    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11356    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11357    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11358    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11359    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11360
11361    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
11362    fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11363    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
11364    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11365    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
11366    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11367
11368    #[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
11369    fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
11370    #[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
11371    fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
11372    #[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
11373    fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
11374    #[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
11375    fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
11376    #[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
11377    fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
11378    #[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
11379    fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
11380
11381    #[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
11382    fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
11383    #[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
11384    fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
11385    #[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
11386    fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
11387    #[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
11388    fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
11389    #[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
11390    fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
11391    #[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
11392    fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
11393
11394}
11395
11396#[cfg(test)]
11397mod tests {
11398
11399    use stdarch_test::simd_test;
11400
11401    use crate::core_arch::x86::*;
11402    use crate::hint::black_box;
11403    use crate::mem::{self};
11404
11405    #[simd_test(enable = "avx512bw")]
11406    unsafe fn test_mm512_abs_epi16() {
11407        let a = _mm512_set1_epi16(-1);
11408        let r = _mm512_abs_epi16(a);
11409        let e = _mm512_set1_epi16(1);
11410        assert_eq_m512i(r, e);
11411    }
11412
11413    #[simd_test(enable = "avx512bw")]
11414    unsafe fn test_mm512_mask_abs_epi16() {
11415        let a = _mm512_set1_epi16(-1);
11416        let r = _mm512_mask_abs_epi16(a, 0, a);
11417        assert_eq_m512i(r, a);
11418        let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
11419        #[rustfmt::skip]
11420        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11421                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11422        assert_eq_m512i(r, e);
11423    }
11424
11425    #[simd_test(enable = "avx512bw")]
11426    unsafe fn test_mm512_maskz_abs_epi16() {
11427        let a = _mm512_set1_epi16(-1);
11428        let r = _mm512_maskz_abs_epi16(0, a);
11429        assert_eq_m512i(r, _mm512_setzero_si512());
11430        let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
11431        #[rustfmt::skip]
11432        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11433                                  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11434        assert_eq_m512i(r, e);
11435    }
11436
11437    #[simd_test(enable = "avx512bw,avx512vl")]
11438    unsafe fn test_mm256_mask_abs_epi16() {
11439        let a = _mm256_set1_epi16(-1);
11440        let r = _mm256_mask_abs_epi16(a, 0, a);
11441        assert_eq_m256i(r, a);
11442        let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
11443        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11444        assert_eq_m256i(r, e);
11445    }
11446
11447    #[simd_test(enable = "avx512bw,avx512vl")]
11448    unsafe fn test_mm256_maskz_abs_epi16() {
11449        let a = _mm256_set1_epi16(-1);
11450        let r = _mm256_maskz_abs_epi16(0, a);
11451        assert_eq_m256i(r, _mm256_setzero_si256());
11452        let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
11453        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11454        assert_eq_m256i(r, e);
11455    }
11456
11457    #[simd_test(enable = "avx512bw,avx512vl")]
11458    unsafe fn test_mm_mask_abs_epi16() {
11459        let a = _mm_set1_epi16(-1);
11460        let r = _mm_mask_abs_epi16(a, 0, a);
11461        assert_eq_m128i(r, a);
11462        let r = _mm_mask_abs_epi16(a, 0b00001111, a);
11463        let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
11464        assert_eq_m128i(r, e);
11465    }
11466
11467    #[simd_test(enable = "avx512bw,avx512vl")]
11468    unsafe fn test_mm_maskz_abs_epi16() {
11469        let a = _mm_set1_epi16(-1);
11470        let r = _mm_maskz_abs_epi16(0, a);
11471        assert_eq_m128i(r, _mm_setzero_si128());
11472        let r = _mm_maskz_abs_epi16(0b00001111, a);
11473        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11474        assert_eq_m128i(r, e);
11475    }
11476
11477    #[simd_test(enable = "avx512bw")]
11478    unsafe fn test_mm512_abs_epi8() {
11479        let a = _mm512_set1_epi8(-1);
11480        let r = _mm512_abs_epi8(a);
11481        let e = _mm512_set1_epi8(1);
11482        assert_eq_m512i(r, e);
11483    }
11484
11485    #[simd_test(enable = "avx512bw")]
11486    unsafe fn test_mm512_mask_abs_epi8() {
11487        let a = _mm512_set1_epi8(-1);
11488        let r = _mm512_mask_abs_epi8(a, 0, a);
11489        assert_eq_m512i(r, a);
11490        let r = _mm512_mask_abs_epi8(
11491            a,
11492            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11493            a,
11494        );
11495        #[rustfmt::skip]
11496        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11497                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11498                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11499                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11500        assert_eq_m512i(r, e);
11501    }
11502
11503    #[simd_test(enable = "avx512bw")]
11504    unsafe fn test_mm512_maskz_abs_epi8() {
11505        let a = _mm512_set1_epi8(-1);
11506        let r = _mm512_maskz_abs_epi8(0, a);
11507        assert_eq_m512i(r, _mm512_setzero_si512());
11508        let r = _mm512_maskz_abs_epi8(
11509            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11510            a,
11511        );
11512        #[rustfmt::skip]
11513        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11514                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11515                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11516                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11517        assert_eq_m512i(r, e);
11518    }
11519
11520    #[simd_test(enable = "avx512bw,avx512vl")]
11521    unsafe fn test_mm256_mask_abs_epi8() {
11522        let a = _mm256_set1_epi8(-1);
11523        let r = _mm256_mask_abs_epi8(a, 0, a);
11524        assert_eq_m256i(r, a);
11525        let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
11526        #[rustfmt::skip]
11527        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
11528                                -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11529        assert_eq_m256i(r, e);
11530    }
11531
11532    #[simd_test(enable = "avx512bw,avx512vl")]
11533    unsafe fn test_mm256_maskz_abs_epi8() {
11534        let a = _mm256_set1_epi8(-1);
11535        let r = _mm256_maskz_abs_epi8(0, a);
11536        assert_eq_m256i(r, _mm256_setzero_si256());
11537        let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
11538        #[rustfmt::skip]
11539        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
11540                                0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11541        assert_eq_m256i(r, e);
11542    }
11543
11544    #[simd_test(enable = "avx512bw,avx512vl")]
11545    unsafe fn test_mm_mask_abs_epi8() {
11546        let a = _mm_set1_epi8(-1);
11547        let r = _mm_mask_abs_epi8(a, 0, a);
11548        assert_eq_m128i(r, a);
11549        let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
11550        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
11551        assert_eq_m128i(r, e);
11552    }
11553
11554    #[simd_test(enable = "avx512bw,avx512vl")]
11555    unsafe fn test_mm_maskz_abs_epi8() {
11556        let a = _mm_set1_epi8(-1);
11557        let r = _mm_maskz_abs_epi8(0, a);
11558        assert_eq_m128i(r, _mm_setzero_si128());
11559        let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
11560        #[rustfmt::skip]
11561        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
11562        assert_eq_m128i(r, e);
11563    }
11564
11565    #[simd_test(enable = "avx512bw")]
11566    unsafe fn test_mm512_add_epi16() {
11567        let a = _mm512_set1_epi16(1);
11568        let b = _mm512_set1_epi16(2);
11569        let r = _mm512_add_epi16(a, b);
11570        let e = _mm512_set1_epi16(3);
11571        assert_eq_m512i(r, e);
11572    }
11573
11574    #[simd_test(enable = "avx512bw")]
11575    unsafe fn test_mm512_mask_add_epi16() {
11576        let a = _mm512_set1_epi16(1);
11577        let b = _mm512_set1_epi16(2);
11578        let r = _mm512_mask_add_epi16(a, 0, a, b);
11579        assert_eq_m512i(r, a);
11580        let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11581        #[rustfmt::skip]
11582        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11583                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11584        assert_eq_m512i(r, e);
11585    }
11586
11587    #[simd_test(enable = "avx512bw")]
11588    unsafe fn test_mm512_maskz_add_epi16() {
11589        let a = _mm512_set1_epi16(1);
11590        let b = _mm512_set1_epi16(2);
11591        let r = _mm512_maskz_add_epi16(0, a, b);
11592        assert_eq_m512i(r, _mm512_setzero_si512());
11593        let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
11594        #[rustfmt::skip]
11595        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11596                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11597        assert_eq_m512i(r, e);
11598    }
11599
11600    #[simd_test(enable = "avx512bw,avx512vl")]
11601    unsafe fn test_mm256_mask_add_epi16() {
11602        let a = _mm256_set1_epi16(1);
11603        let b = _mm256_set1_epi16(2);
11604        let r = _mm256_mask_add_epi16(a, 0, a, b);
11605        assert_eq_m256i(r, a);
11606        let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
11607        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11608        assert_eq_m256i(r, e);
11609    }
11610
11611    #[simd_test(enable = "avx512bw,avx512vl")]
11612    unsafe fn test_mm256_maskz_add_epi16() {
11613        let a = _mm256_set1_epi16(1);
11614        let b = _mm256_set1_epi16(2);
11615        let r = _mm256_maskz_add_epi16(0, a, b);
11616        assert_eq_m256i(r, _mm256_setzero_si256());
11617        let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
11618        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11619        assert_eq_m256i(r, e);
11620    }
11621
11622    #[simd_test(enable = "avx512bw,avx512vl")]
11623    unsafe fn test_mm_mask_add_epi16() {
11624        let a = _mm_set1_epi16(1);
11625        let b = _mm_set1_epi16(2);
11626        let r = _mm_mask_add_epi16(a, 0, a, b);
11627        assert_eq_m128i(r, a);
11628        let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
11629        let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
11630        assert_eq_m128i(r, e);
11631    }
11632
11633    #[simd_test(enable = "avx512bw,avx512vl")]
11634    unsafe fn test_mm_maskz_add_epi16() {
11635        let a = _mm_set1_epi16(1);
11636        let b = _mm_set1_epi16(2);
11637        let r = _mm_maskz_add_epi16(0, a, b);
11638        assert_eq_m128i(r, _mm_setzero_si128());
11639        let r = _mm_maskz_add_epi16(0b00001111, a, b);
11640        let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
11641        assert_eq_m128i(r, e);
11642    }
11643
11644    #[simd_test(enable = "avx512bw")]
11645    unsafe fn test_mm512_add_epi8() {
11646        let a = _mm512_set1_epi8(1);
11647        let b = _mm512_set1_epi8(2);
11648        let r = _mm512_add_epi8(a, b);
11649        let e = _mm512_set1_epi8(3);
11650        assert_eq_m512i(r, e);
11651    }
11652
11653    #[simd_test(enable = "avx512bw")]
11654    unsafe fn test_mm512_mask_add_epi8() {
11655        let a = _mm512_set1_epi8(1);
11656        let b = _mm512_set1_epi8(2);
11657        let r = _mm512_mask_add_epi8(a, 0, a, b);
11658        assert_eq_m512i(r, a);
11659        let r = _mm512_mask_add_epi8(
11660            a,
11661            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11662            a,
11663            b,
11664        );
11665        #[rustfmt::skip]
11666        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11667                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11668                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11669                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11670        assert_eq_m512i(r, e);
11671    }
11672
11673    #[simd_test(enable = "avx512bw")]
11674    unsafe fn test_mm512_maskz_add_epi8() {
11675        let a = _mm512_set1_epi8(1);
11676        let b = _mm512_set1_epi8(2);
11677        let r = _mm512_maskz_add_epi8(0, a, b);
11678        assert_eq_m512i(r, _mm512_setzero_si512());
11679        let r = _mm512_maskz_add_epi8(
11680            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11681            a,
11682            b,
11683        );
11684        #[rustfmt::skip]
11685        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11686                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11687                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11688                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11689        assert_eq_m512i(r, e);
11690    }
11691
11692    #[simd_test(enable = "avx512bw,avx512vl")]
11693    unsafe fn test_mm256_mask_add_epi8() {
11694        let a = _mm256_set1_epi8(1);
11695        let b = _mm256_set1_epi8(2);
11696        let r = _mm256_mask_add_epi8(a, 0, a, b);
11697        assert_eq_m256i(r, a);
11698        let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
11699        #[rustfmt::skip]
11700        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
11701                                1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11702        assert_eq_m256i(r, e);
11703    }
11704
11705    #[simd_test(enable = "avx512bw,avx512vl")]
11706    unsafe fn test_mm256_maskz_add_epi8() {
11707        let a = _mm256_set1_epi8(1);
11708        let b = _mm256_set1_epi8(2);
11709        let r = _mm256_maskz_add_epi8(0, a, b);
11710        assert_eq_m256i(r, _mm256_setzero_si256());
11711        let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
11712        #[rustfmt::skip]
11713        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
11714                                0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11715        assert_eq_m256i(r, e);
11716    }
11717
11718    #[simd_test(enable = "avx512bw,avx512vl")]
11719    unsafe fn test_mm_mask_add_epi8() {
11720        let a = _mm_set1_epi8(1);
11721        let b = _mm_set1_epi8(2);
11722        let r = _mm_mask_add_epi8(a, 0, a, b);
11723        assert_eq_m128i(r, a);
11724        let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
11725        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
11726        assert_eq_m128i(r, e);
11727    }
11728
11729    #[simd_test(enable = "avx512bw,avx512vl")]
11730    unsafe fn test_mm_maskz_add_epi8() {
11731        let a = _mm_set1_epi8(1);
11732        let b = _mm_set1_epi8(2);
11733        let r = _mm_maskz_add_epi8(0, a, b);
11734        assert_eq_m128i(r, _mm_setzero_si128());
11735        let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
11736        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
11737        assert_eq_m128i(r, e);
11738    }
11739
11740    #[simd_test(enable = "avx512bw")]
11741    unsafe fn test_mm512_adds_epu16() {
11742        let a = _mm512_set1_epi16(1);
11743        let b = _mm512_set1_epi16(u16::MAX as i16);
11744        let r = _mm512_adds_epu16(a, b);
11745        let e = _mm512_set1_epi16(u16::MAX as i16);
11746        assert_eq_m512i(r, e);
11747    }
11748
11749    #[simd_test(enable = "avx512bw")]
11750    unsafe fn test_mm512_mask_adds_epu16() {
11751        let a = _mm512_set1_epi16(1);
11752        let b = _mm512_set1_epi16(u16::MAX as i16);
11753        let r = _mm512_mask_adds_epu16(a, 0, a, b);
11754        assert_eq_m512i(r, a);
11755        let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
11756        #[rustfmt::skip]
11757        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11758                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
11759        assert_eq_m512i(r, e);
11760    }
11761
11762    #[simd_test(enable = "avx512bw")]
11763    unsafe fn test_mm512_maskz_adds_epu16() {
11764        let a = _mm512_set1_epi16(1);
11765        let b = _mm512_set1_epi16(u16::MAX as i16);
11766        let r = _mm512_maskz_adds_epu16(0, a, b);
11767        assert_eq_m512i(r, _mm512_setzero_si512());
11768        let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
11769        #[rustfmt::skip]
11770        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11771                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
11772        assert_eq_m512i(r, e);
11773    }
11774
11775    #[simd_test(enable = "avx512bw,avx512vl")]
11776    unsafe fn test_mm256_mask_adds_epu16() {
11777        let a = _mm256_set1_epi16(1);
11778        let b = _mm256_set1_epi16(u16::MAX as i16);
11779        let r = _mm256_mask_adds_epu16(a, 0, a, b);
11780        assert_eq_m256i(r, a);
11781        let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
11782        #[rustfmt::skip]
11783        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
11784        assert_eq_m256i(r, e);
11785    }
11786
11787    #[simd_test(enable = "avx512bw,avx512vl")]
11788    unsafe fn test_mm256_maskz_adds_epu16() {
11789        let a = _mm256_set1_epi16(1);
11790        let b = _mm256_set1_epi16(u16::MAX as i16);
11791        let r = _mm256_maskz_adds_epu16(0, a, b);
11792        assert_eq_m256i(r, _mm256_setzero_si256());
11793        let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
11794        #[rustfmt::skip]
11795        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
11796        assert_eq_m256i(r, e);
11797    }
11798
11799    #[simd_test(enable = "avx512bw,avx512vl")]
11800    unsafe fn test_mm_mask_adds_epu16() {
11801        let a = _mm_set1_epi16(1);
11802        let b = _mm_set1_epi16(u16::MAX as i16);
11803        let r = _mm_mask_adds_epu16(a, 0, a, b);
11804        assert_eq_m128i(r, a);
11805        let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
11806        #[rustfmt::skip]
11807        let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
11808        assert_eq_m128i(r, e);
11809    }
11810
11811    #[simd_test(enable = "avx512bw,avx512vl")]
11812    unsafe fn test_mm_maskz_adds_epu16() {
11813        let a = _mm_set1_epi16(1);
11814        let b = _mm_set1_epi16(u16::MAX as i16);
11815        let r = _mm_maskz_adds_epu16(0, a, b);
11816        assert_eq_m128i(r, _mm_setzero_si128());
11817        let r = _mm_maskz_adds_epu16(0b00001111, a, b);
11818        #[rustfmt::skip]
11819        let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
11820        assert_eq_m128i(r, e);
11821    }
11822
11823    #[simd_test(enable = "avx512bw")]
11824    unsafe fn test_mm512_adds_epu8() {
11825        let a = _mm512_set1_epi8(1);
11826        let b = _mm512_set1_epi8(u8::MAX as i8);
11827        let r = _mm512_adds_epu8(a, b);
11828        let e = _mm512_set1_epi8(u8::MAX as i8);
11829        assert_eq_m512i(r, e);
11830    }
11831
11832    #[simd_test(enable = "avx512bw")]
11833    unsafe fn test_mm512_mask_adds_epu8() {
11834        let a = _mm512_set1_epi8(1);
11835        let b = _mm512_set1_epi8(u8::MAX as i8);
11836        let r = _mm512_mask_adds_epu8(a, 0, a, b);
11837        assert_eq_m512i(r, a);
11838        let r = _mm512_mask_adds_epu8(
11839            a,
11840            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
11841            a,
11842            b,
11843        );
11844        #[rustfmt::skip]
11845        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11846                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11847                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11848                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
11849        assert_eq_m512i(r, e);
11850    }
11851
11852    #[simd_test(enable = "avx512bw")]
11853    unsafe fn test_mm512_maskz_adds_epu8() {
11854        let a = _mm512_set1_epi8(1);
11855        let b = _mm512_set1_epi8(u8::MAX as i8);
11856        let r = _mm512_maskz_adds_epu8(0, a, b);
11857        assert_eq_m512i(r, _mm512_setzero_si512());
11858        let r = _mm512_maskz_adds_epu8(
11859            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
11860            a,
11861            b,
11862        );
11863        #[rustfmt::skip]
11864        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11865                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11866                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11867                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
11868        assert_eq_m512i(r, e);
11869    }
11870
11871    #[simd_test(enable = "avx512bw,avx512vl")]
11872    unsafe fn test_mm256_mask_adds_epu8() {
11873        let a = _mm256_set1_epi8(1);
11874        let b = _mm256_set1_epi8(u8::MAX as i8);
11875        let r = _mm256_mask_adds_epu8(a, 0, a, b);
11876        assert_eq_m256i(r, a);
11877        let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
11878        #[rustfmt::skip]
11879        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11880                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
11881        assert_eq_m256i(r, e);
11882    }
11883
11884    #[simd_test(enable = "avx512bw,avx512vl")]
11885    unsafe fn test_mm256_maskz_adds_epu8() {
11886        let a = _mm256_set1_epi8(1);
11887        let b = _mm256_set1_epi8(u8::MAX as i8);
11888        let r = _mm256_maskz_adds_epu8(0, a, b);
11889        assert_eq_m256i(r, _mm256_setzero_si256());
11890        let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
11891        #[rustfmt::skip]
11892        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11893                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
11894        assert_eq_m256i(r, e);
11895    }
11896
11897    #[simd_test(enable = "avx512bw,avx512vl")]
11898    unsafe fn test_mm_mask_adds_epu8() {
11899        let a = _mm_set1_epi8(1);
11900        let b = _mm_set1_epi8(u8::MAX as i8);
11901        let r = _mm_mask_adds_epu8(a, 0, a, b);
11902        assert_eq_m128i(r, a);
11903        let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
11904        #[rustfmt::skip]
11905        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
11906        assert_eq_m128i(r, e);
11907    }
11908
11909    #[simd_test(enable = "avx512bw,avx512vl")]
11910    unsafe fn test_mm_maskz_adds_epu8() {
11911        let a = _mm_set1_epi8(1);
11912        let b = _mm_set1_epi8(u8::MAX as i8);
11913        let r = _mm_maskz_adds_epu8(0, a, b);
11914        assert_eq_m128i(r, _mm_setzero_si128());
11915        let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
11916        #[rustfmt::skip]
11917        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
11918        assert_eq_m128i(r, e);
11919    }
11920
11921    #[simd_test(enable = "avx512bw")]
11922    unsafe fn test_mm512_adds_epi16() {
11923        let a = _mm512_set1_epi16(1);
11924        let b = _mm512_set1_epi16(i16::MAX);
11925        let r = _mm512_adds_epi16(a, b);
11926        let e = _mm512_set1_epi16(i16::MAX);
11927        assert_eq_m512i(r, e);
11928    }
11929
11930    #[simd_test(enable = "avx512bw")]
11931    unsafe fn test_mm512_mask_adds_epi16() {
11932        let a = _mm512_set1_epi16(1);
11933        let b = _mm512_set1_epi16(i16::MAX);
11934        let r = _mm512_mask_adds_epi16(a, 0, a, b);
11935        assert_eq_m512i(r, a);
11936        let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11937        #[rustfmt::skip]
11938        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11939                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
11940        assert_eq_m512i(r, e);
11941    }
11942
11943    #[simd_test(enable = "avx512bw")]
11944    unsafe fn test_mm512_maskz_adds_epi16() {
11945        let a = _mm512_set1_epi16(1);
11946        let b = _mm512_set1_epi16(i16::MAX);
11947        let r = _mm512_maskz_adds_epi16(0, a, b);
11948        assert_eq_m512i(r, _mm512_setzero_si512());
11949        let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
11950        #[rustfmt::skip]
11951        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11952                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
11953        assert_eq_m512i(r, e);
11954    }
11955
11956    #[simd_test(enable = "avx512bw,avx512vl")]
11957    unsafe fn test_mm256_mask_adds_epi16() {
11958        let a = _mm256_set1_epi16(1);
11959        let b = _mm256_set1_epi16(i16::MAX);
11960        let r = _mm256_mask_adds_epi16(a, 0, a, b);
11961        assert_eq_m256i(r, a);
11962        let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
11963        #[rustfmt::skip]
11964        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
11965        assert_eq_m256i(r, e);
11966    }
11967
11968    #[simd_test(enable = "avx512bw,avx512vl")]
11969    unsafe fn test_mm256_maskz_adds_epi16() {
11970        let a = _mm256_set1_epi16(1);
11971        let b = _mm256_set1_epi16(i16::MAX);
11972        let r = _mm256_maskz_adds_epi16(0, a, b);
11973        assert_eq_m256i(r, _mm256_setzero_si256());
11974        let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
11975        #[rustfmt::skip]
11976        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
11977        assert_eq_m256i(r, e);
11978    }
11979
11980    #[simd_test(enable = "avx512bw,avx512vl")]
11981    unsafe fn test_mm_mask_adds_epi16() {
11982        let a = _mm_set1_epi16(1);
11983        let b = _mm_set1_epi16(i16::MAX);
11984        let r = _mm_mask_adds_epi16(a, 0, a, b);
11985        assert_eq_m128i(r, a);
11986        let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
11987        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
11988        assert_eq_m128i(r, e);
11989    }
11990
11991    #[simd_test(enable = "avx512bw,avx512vl")]
11992    unsafe fn test_mm_maskz_adds_epi16() {
11993        let a = _mm_set1_epi16(1);
11994        let b = _mm_set1_epi16(i16::MAX);
11995        let r = _mm_maskz_adds_epi16(0, a, b);
11996        assert_eq_m128i(r, _mm_setzero_si128());
11997        let r = _mm_maskz_adds_epi16(0b00001111, a, b);
11998        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
11999        assert_eq_m128i(r, e);
12000    }
12001
12002    #[simd_test(enable = "avx512bw")]
12003    unsafe fn test_mm512_adds_epi8() {
12004        let a = _mm512_set1_epi8(1);
12005        let b = _mm512_set1_epi8(i8::MAX);
12006        let r = _mm512_adds_epi8(a, b);
12007        let e = _mm512_set1_epi8(i8::MAX);
12008        assert_eq_m512i(r, e);
12009    }
12010
12011    #[simd_test(enable = "avx512bw")]
12012    unsafe fn test_mm512_mask_adds_epi8() {
12013        let a = _mm512_set1_epi8(1);
12014        let b = _mm512_set1_epi8(i8::MAX);
12015        let r = _mm512_mask_adds_epi8(a, 0, a, b);
12016        assert_eq_m512i(r, a);
12017        let r = _mm512_mask_adds_epi8(
12018            a,
12019            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12020            a,
12021            b,
12022        );
12023        #[rustfmt::skip]
12024        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12025                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12026                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12027                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12028        assert_eq_m512i(r, e);
12029    }
12030
12031    #[simd_test(enable = "avx512bw")]
12032    unsafe fn test_mm512_maskz_adds_epi8() {
12033        let a = _mm512_set1_epi8(1);
12034        let b = _mm512_set1_epi8(i8::MAX);
12035        let r = _mm512_maskz_adds_epi8(0, a, b);
12036        assert_eq_m512i(r, _mm512_setzero_si512());
12037        let r = _mm512_maskz_adds_epi8(
12038            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12039            a,
12040            b,
12041        );
12042        #[rustfmt::skip]
12043        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12044                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12045                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12046                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12047        assert_eq_m512i(r, e);
12048    }
12049
12050    #[simd_test(enable = "avx512bw,avx512vl")]
12051    unsafe fn test_mm256_mask_adds_epi8() {
12052        let a = _mm256_set1_epi8(1);
12053        let b = _mm256_set1_epi8(i8::MAX);
12054        let r = _mm256_mask_adds_epi8(a, 0, a, b);
12055        assert_eq_m256i(r, a);
12056        let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12057        #[rustfmt::skip]
12058        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12059                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12060        assert_eq_m256i(r, e);
12061    }
12062
12063    #[simd_test(enable = "avx512bw,avx512vl")]
12064    unsafe fn test_mm256_maskz_adds_epi8() {
12065        let a = _mm256_set1_epi8(1);
12066        let b = _mm256_set1_epi8(i8::MAX);
12067        let r = _mm256_maskz_adds_epi8(0, a, b);
12068        assert_eq_m256i(r, _mm256_setzero_si256());
12069        let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
12070        #[rustfmt::skip]
12071        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12072                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12073        assert_eq_m256i(r, e);
12074    }
12075
12076    #[simd_test(enable = "avx512bw,avx512vl")]
12077    unsafe fn test_mm_mask_adds_epi8() {
12078        let a = _mm_set1_epi8(1);
12079        let b = _mm_set1_epi8(i8::MAX);
12080        let r = _mm_mask_adds_epi8(a, 0, a, b);
12081        assert_eq_m128i(r, a);
12082        let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
12083        #[rustfmt::skip]
12084        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12085        assert_eq_m128i(r, e);
12086    }
12087
12088    #[simd_test(enable = "avx512bw,avx512vl")]
12089    unsafe fn test_mm_maskz_adds_epi8() {
12090        let a = _mm_set1_epi8(1);
12091        let b = _mm_set1_epi8(i8::MAX);
12092        let r = _mm_maskz_adds_epi8(0, a, b);
12093        assert_eq_m128i(r, _mm_setzero_si128());
12094        let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
12095        #[rustfmt::skip]
12096        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
12097        assert_eq_m128i(r, e);
12098    }
12099
12100    #[simd_test(enable = "avx512bw")]
12101    unsafe fn test_mm512_sub_epi16() {
12102        let a = _mm512_set1_epi16(1);
12103        let b = _mm512_set1_epi16(2);
12104        let r = _mm512_sub_epi16(a, b);
12105        let e = _mm512_set1_epi16(-1);
12106        assert_eq_m512i(r, e);
12107    }
12108
12109    #[simd_test(enable = "avx512bw")]
12110    unsafe fn test_mm512_mask_sub_epi16() {
12111        let a = _mm512_set1_epi16(1);
12112        let b = _mm512_set1_epi16(2);
12113        let r = _mm512_mask_sub_epi16(a, 0, a, b);
12114        assert_eq_m512i(r, a);
12115        let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12116        #[rustfmt::skip]
12117        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12118                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12119        assert_eq_m512i(r, e);
12120    }
12121
12122    #[simd_test(enable = "avx512bw")]
12123    unsafe fn test_mm512_maskz_sub_epi16() {
12124        let a = _mm512_set1_epi16(1);
12125        let b = _mm512_set1_epi16(2);
12126        let r = _mm512_maskz_sub_epi16(0, a, b);
12127        assert_eq_m512i(r, _mm512_setzero_si512());
12128        let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
12129        #[rustfmt::skip]
12130        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12131                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12132        assert_eq_m512i(r, e);
12133    }
12134
12135    #[simd_test(enable = "avx512bw,avx512vl")]
12136    unsafe fn test_mm256_mask_sub_epi16() {
12137        let a = _mm256_set1_epi16(1);
12138        let b = _mm256_set1_epi16(2);
12139        let r = _mm256_mask_sub_epi16(a, 0, a, b);
12140        assert_eq_m256i(r, a);
12141        let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
12142        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12143        assert_eq_m256i(r, e);
12144    }
12145
12146    #[simd_test(enable = "avx512bw,avx512vl")]
12147    unsafe fn test_mm256_maskz_sub_epi16() {
12148        let a = _mm256_set1_epi16(1);
12149        let b = _mm256_set1_epi16(2);
12150        let r = _mm256_maskz_sub_epi16(0, a, b);
12151        assert_eq_m256i(r, _mm256_setzero_si256());
12152        let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
12153        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12154        assert_eq_m256i(r, e);
12155    }
12156
12157    #[simd_test(enable = "avx512bw,avx512vl")]
12158    unsafe fn test_mm_mask_sub_epi16() {
12159        let a = _mm_set1_epi16(1);
12160        let b = _mm_set1_epi16(2);
12161        let r = _mm_mask_sub_epi16(a, 0, a, b);
12162        assert_eq_m128i(r, a);
12163        let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
12164        let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
12165        assert_eq_m128i(r, e);
12166    }
12167
12168    #[simd_test(enable = "avx512bw,avx512vl")]
12169    unsafe fn test_mm_maskz_sub_epi16() {
12170        let a = _mm_set1_epi16(1);
12171        let b = _mm_set1_epi16(2);
12172        let r = _mm_maskz_sub_epi16(0, a, b);
12173        assert_eq_m128i(r, _mm_setzero_si128());
12174        let r = _mm_maskz_sub_epi16(0b00001111, a, b);
12175        let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
12176        assert_eq_m128i(r, e);
12177    }
12178
12179    #[simd_test(enable = "avx512bw")]
12180    unsafe fn test_mm512_sub_epi8() {
12181        let a = _mm512_set1_epi8(1);
12182        let b = _mm512_set1_epi8(2);
12183        let r = _mm512_sub_epi8(a, b);
12184        let e = _mm512_set1_epi8(-1);
12185        assert_eq_m512i(r, e);
12186    }
12187
12188    #[simd_test(enable = "avx512bw")]
12189    unsafe fn test_mm512_mask_sub_epi8() {
12190        let a = _mm512_set1_epi8(1);
12191        let b = _mm512_set1_epi8(2);
12192        let r = _mm512_mask_sub_epi8(a, 0, a, b);
12193        assert_eq_m512i(r, a);
12194        let r = _mm512_mask_sub_epi8(
12195            a,
12196            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12197            a,
12198            b,
12199        );
12200        #[rustfmt::skip]
12201        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12202                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12203                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12204                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12205        assert_eq_m512i(r, e);
12206    }
12207
12208    #[simd_test(enable = "avx512bw")]
12209    unsafe fn test_mm512_maskz_sub_epi8() {
12210        let a = _mm512_set1_epi8(1);
12211        let b = _mm512_set1_epi8(2);
12212        let r = _mm512_maskz_sub_epi8(0, a, b);
12213        assert_eq_m512i(r, _mm512_setzero_si512());
12214        let r = _mm512_maskz_sub_epi8(
12215            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12216            a,
12217            b,
12218        );
12219        #[rustfmt::skip]
12220        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12221                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12222                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12223                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12224        assert_eq_m512i(r, e);
12225    }
12226
12227    #[simd_test(enable = "avx512bw,avx512vl")]
12228    unsafe fn test_mm256_mask_sub_epi8() {
12229        let a = _mm256_set1_epi8(1);
12230        let b = _mm256_set1_epi8(2);
12231        let r = _mm256_mask_sub_epi8(a, 0, a, b);
12232        assert_eq_m256i(r, a);
12233        let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12234        #[rustfmt::skip]
12235        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
12236                                1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12237        assert_eq_m256i(r, e);
12238    }
12239
12240    #[simd_test(enable = "avx512bw,avx512vl")]
12241    unsafe fn test_mm256_maskz_sub_epi8() {
12242        let a = _mm256_set1_epi8(1);
12243        let b = _mm256_set1_epi8(2);
12244        let r = _mm256_maskz_sub_epi8(0, a, b);
12245        assert_eq_m256i(r, _mm256_setzero_si256());
12246        let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
12247        #[rustfmt::skip]
12248        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
12249                                0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12250        assert_eq_m256i(r, e);
12251    }
12252
12253    #[simd_test(enable = "avx512bw,avx512vl")]
12254    unsafe fn test_mm_mask_sub_epi8() {
12255        let a = _mm_set1_epi8(1);
12256        let b = _mm_set1_epi8(2);
12257        let r = _mm_mask_sub_epi8(a, 0, a, b);
12258        assert_eq_m128i(r, a);
12259        let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
12260        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
12261        assert_eq_m128i(r, e);
12262    }
12263
12264    #[simd_test(enable = "avx512bw,avx512vl")]
12265    unsafe fn test_mm_maskz_sub_epi8() {
12266        let a = _mm_set1_epi8(1);
12267        let b = _mm_set1_epi8(2);
12268        let r = _mm_maskz_sub_epi8(0, a, b);
12269        assert_eq_m128i(r, _mm_setzero_si128());
12270        let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
12271        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
12272        assert_eq_m128i(r, e);
12273    }
12274
12275    #[simd_test(enable = "avx512bw")]
12276    unsafe fn test_mm512_subs_epu16() {
12277        let a = _mm512_set1_epi16(1);
12278        let b = _mm512_set1_epi16(u16::MAX as i16);
12279        let r = _mm512_subs_epu16(a, b);
12280        let e = _mm512_set1_epi16(0);
12281        assert_eq_m512i(r, e);
12282    }
12283
12284    #[simd_test(enable = "avx512bw")]
12285    unsafe fn test_mm512_mask_subs_epu16() {
12286        let a = _mm512_set1_epi16(1);
12287        let b = _mm512_set1_epi16(u16::MAX as i16);
12288        let r = _mm512_mask_subs_epu16(a, 0, a, b);
12289        assert_eq_m512i(r, a);
12290        let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12291        #[rustfmt::skip]
12292        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12293                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12294        assert_eq_m512i(r, e);
12295    }
12296
12297    #[simd_test(enable = "avx512bw")]
12298    unsafe fn test_mm512_maskz_subs_epu16() {
12299        let a = _mm512_set1_epi16(1);
12300        let b = _mm512_set1_epi16(u16::MAX as i16);
12301        let r = _mm512_maskz_subs_epu16(0, a, b);
12302        assert_eq_m512i(r, _mm512_setzero_si512());
12303        let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
12304        #[rustfmt::skip]
12305        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12306                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12307        assert_eq_m512i(r, e);
12308    }
12309
12310    #[simd_test(enable = "avx512bw,avx512vl")]
12311    unsafe fn test_mm256_mask_subs_epu16() {
12312        let a = _mm256_set1_epi16(1);
12313        let b = _mm256_set1_epi16(u16::MAX as i16);
12314        let r = _mm256_mask_subs_epu16(a, 0, a, b);
12315        assert_eq_m256i(r, a);
12316        let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
12317        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12318        assert_eq_m256i(r, e);
12319    }
12320
12321    #[simd_test(enable = "avx512bw,avx512vl")]
12322    unsafe fn test_mm256_maskz_subs_epu16() {
12323        let a = _mm256_set1_epi16(1);
12324        let b = _mm256_set1_epi16(u16::MAX as i16);
12325        let r = _mm256_maskz_subs_epu16(0, a, b);
12326        assert_eq_m256i(r, _mm256_setzero_si256());
12327        let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
12328        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12329        assert_eq_m256i(r, e);
12330    }
12331
12332    #[simd_test(enable = "avx512bw,avx512vl")]
12333    unsafe fn test_mm_mask_subs_epu16() {
12334        let a = _mm_set1_epi16(1);
12335        let b = _mm_set1_epi16(u16::MAX as i16);
12336        let r = _mm_mask_subs_epu16(a, 0, a, b);
12337        assert_eq_m128i(r, a);
12338        let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
12339        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12340        assert_eq_m128i(r, e);
12341    }
12342
12343    #[simd_test(enable = "avx512bw,avx512vl")]
12344    unsafe fn test_mm_maskz_subs_epu16() {
12345        let a = _mm_set1_epi16(1);
12346        let b = _mm_set1_epi16(u16::MAX as i16);
12347        let r = _mm_maskz_subs_epu16(0, a, b);
12348        assert_eq_m128i(r, _mm_setzero_si128());
12349        let r = _mm_maskz_subs_epu16(0b00001111, a, b);
12350        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12351        assert_eq_m128i(r, e);
12352    }
12353
12354    #[simd_test(enable = "avx512bw")]
12355    unsafe fn test_mm512_subs_epu8() {
12356        let a = _mm512_set1_epi8(1);
12357        let b = _mm512_set1_epi8(u8::MAX as i8);
12358        let r = _mm512_subs_epu8(a, b);
12359        let e = _mm512_set1_epi8(0);
12360        assert_eq_m512i(r, e);
12361    }
12362
12363    #[simd_test(enable = "avx512bw")]
12364    unsafe fn test_mm512_mask_subs_epu8() {
12365        let a = _mm512_set1_epi8(1);
12366        let b = _mm512_set1_epi8(u8::MAX as i8);
12367        let r = _mm512_mask_subs_epu8(a, 0, a, b);
12368        assert_eq_m512i(r, a);
12369        let r = _mm512_mask_subs_epu8(
12370            a,
12371            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12372            a,
12373            b,
12374        );
12375        #[rustfmt::skip]
12376        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12377                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12378                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12379                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12380        assert_eq_m512i(r, e);
12381    }
12382
12383    #[simd_test(enable = "avx512bw")]
12384    unsafe fn test_mm512_maskz_subs_epu8() {
12385        let a = _mm512_set1_epi8(1);
12386        let b = _mm512_set1_epi8(u8::MAX as i8);
12387        let r = _mm512_maskz_subs_epu8(0, a, b);
12388        assert_eq_m512i(r, _mm512_setzero_si512());
12389        let r = _mm512_maskz_subs_epu8(
12390            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12391            a,
12392            b,
12393        );
12394        #[rustfmt::skip]
12395        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12396                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12397                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12398                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12399        assert_eq_m512i(r, e);
12400    }
12401
12402    #[simd_test(enable = "avx512bw,avx512vl")]
12403    unsafe fn test_mm256_mask_subs_epu8() {
12404        let a = _mm256_set1_epi8(1);
12405        let b = _mm256_set1_epi8(u8::MAX as i8);
12406        let r = _mm256_mask_subs_epu8(a, 0, a, b);
12407        assert_eq_m256i(r, a);
12408        let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
12409        #[rustfmt::skip]
12410        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12411                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12412        assert_eq_m256i(r, e);
12413    }
12414
12415    #[simd_test(enable = "avx512bw,avx512vl")]
12416    unsafe fn test_mm256_maskz_subs_epu8() {
12417        let a = _mm256_set1_epi8(1);
12418        let b = _mm256_set1_epi8(u8::MAX as i8);
12419        let r = _mm256_maskz_subs_epu8(0, a, b);
12420        assert_eq_m256i(r, _mm256_setzero_si256());
12421        let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
12422        #[rustfmt::skip]
12423        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12424                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12425        assert_eq_m256i(r, e);
12426    }
12427
12428    #[simd_test(enable = "avx512bw,avx512vl")]
12429    unsafe fn test_mm_mask_subs_epu8() {
12430        let a = _mm_set1_epi8(1);
12431        let b = _mm_set1_epi8(u8::MAX as i8);
12432        let r = _mm_mask_subs_epu8(a, 0, a, b);
12433        assert_eq_m128i(r, a);
12434        let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
12435        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12436        assert_eq_m128i(r, e);
12437    }
12438
12439    #[simd_test(enable = "avx512bw,avx512vl")]
12440    unsafe fn test_mm_maskz_subs_epu8() {
12441        let a = _mm_set1_epi8(1);
12442        let b = _mm_set1_epi8(u8::MAX as i8);
12443        let r = _mm_maskz_subs_epu8(0, a, b);
12444        assert_eq_m128i(r, _mm_setzero_si128());
12445        let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
12446        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12447        assert_eq_m128i(r, e);
12448    }
12449
12450    #[simd_test(enable = "avx512bw")]
12451    unsafe fn test_mm512_subs_epi16() {
12452        let a = _mm512_set1_epi16(-1);
12453        let b = _mm512_set1_epi16(i16::MAX);
12454        let r = _mm512_subs_epi16(a, b);
12455        let e = _mm512_set1_epi16(i16::MIN);
12456        assert_eq_m512i(r, e);
12457    }
12458
12459    #[simd_test(enable = "avx512bw")]
12460    unsafe fn test_mm512_mask_subs_epi16() {
12461        let a = _mm512_set1_epi16(-1);
12462        let b = _mm512_set1_epi16(i16::MAX);
12463        let r = _mm512_mask_subs_epi16(a, 0, a, b);
12464        assert_eq_m512i(r, a);
12465        let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12466        #[rustfmt::skip]
12467        let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12468                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12469        assert_eq_m512i(r, e);
12470    }
12471
12472    #[simd_test(enable = "avx512bw")]
12473    unsafe fn test_mm512_maskz_subs_epi16() {
12474        let a = _mm512_set1_epi16(-1);
12475        let b = _mm512_set1_epi16(i16::MAX);
12476        let r = _mm512_maskz_subs_epi16(0, a, b);
12477        assert_eq_m512i(r, _mm512_setzero_si512());
12478        let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
12479        #[rustfmt::skip]
12480        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12481                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12482        assert_eq_m512i(r, e);
12483    }
12484
12485    #[simd_test(enable = "avx512bw,avx512vl")]
12486    unsafe fn test_mm256_mask_subs_epi16() {
12487        let a = _mm256_set1_epi16(-1);
12488        let b = _mm256_set1_epi16(i16::MAX);
12489        let r = _mm256_mask_subs_epi16(a, 0, a, b);
12490        assert_eq_m256i(r, a);
12491        let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
12492        #[rustfmt::skip]
12493        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12494        assert_eq_m256i(r, e);
12495    }
12496
12497    #[simd_test(enable = "avx512bw,avx512vl")]
12498    unsafe fn test_mm256_maskz_subs_epi16() {
12499        let a = _mm256_set1_epi16(-1);
12500        let b = _mm256_set1_epi16(i16::MAX);
12501        let r = _mm256_maskz_subs_epi16(0, a, b);
12502        assert_eq_m256i(r, _mm256_setzero_si256());
12503        let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
12504        #[rustfmt::skip]
12505        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12506        assert_eq_m256i(r, e);
12507    }
12508
12509    #[simd_test(enable = "avx512bw,avx512vl")]
12510    unsafe fn test_mm_mask_subs_epi16() {
12511        let a = _mm_set1_epi16(-1);
12512        let b = _mm_set1_epi16(i16::MAX);
12513        let r = _mm_mask_subs_epi16(a, 0, a, b);
12514        assert_eq_m128i(r, a);
12515        let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
12516        let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12517        assert_eq_m128i(r, e);
12518    }
12519
12520    #[simd_test(enable = "avx512bw,avx512vl")]
12521    unsafe fn test_mm_maskz_subs_epi16() {
12522        let a = _mm_set1_epi16(-1);
12523        let b = _mm_set1_epi16(i16::MAX);
12524        let r = _mm_maskz_subs_epi16(0, a, b);
12525        assert_eq_m128i(r, _mm_setzero_si128());
12526        let r = _mm_maskz_subs_epi16(0b00001111, a, b);
12527        let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
12528        assert_eq_m128i(r, e);
12529    }
12530
12531    #[simd_test(enable = "avx512bw")]
12532    unsafe fn test_mm512_subs_epi8() {
12533        let a = _mm512_set1_epi8(-1);
12534        let b = _mm512_set1_epi8(i8::MAX);
12535        let r = _mm512_subs_epi8(a, b);
12536        let e = _mm512_set1_epi8(i8::MIN);
12537        assert_eq_m512i(r, e);
12538    }
12539
12540    #[simd_test(enable = "avx512bw")]
12541    unsafe fn test_mm512_mask_subs_epi8() {
12542        let a = _mm512_set1_epi8(-1);
12543        let b = _mm512_set1_epi8(i8::MAX);
12544        let r = _mm512_mask_subs_epi8(a, 0, a, b);
12545        assert_eq_m512i(r, a);
12546        let r = _mm512_mask_subs_epi8(
12547            a,
12548            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12549            a,
12550            b,
12551        );
12552        #[rustfmt::skip]
12553        let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12554                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12555                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12556                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12557        assert_eq_m512i(r, e);
12558    }
12559
12560    #[simd_test(enable = "avx512bw")]
12561    unsafe fn test_mm512_maskz_subs_epi8() {
12562        let a = _mm512_set1_epi8(-1);
12563        let b = _mm512_set1_epi8(i8::MAX);
12564        let r = _mm512_maskz_subs_epi8(0, a, b);
12565        assert_eq_m512i(r, _mm512_setzero_si512());
12566        let r = _mm512_maskz_subs_epi8(
12567            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
12568            a,
12569            b,
12570        );
12571        #[rustfmt::skip]
12572        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12573                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12574                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12575                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12576        assert_eq_m512i(r, e);
12577    }
12578
12579    #[simd_test(enable = "avx512bw,avx512vl")]
12580    unsafe fn test_mm256_mask_subs_epi8() {
12581        let a = _mm256_set1_epi8(-1);
12582        let b = _mm256_set1_epi8(i8::MAX);
12583        let r = _mm256_mask_subs_epi8(a, 0, a, b);
12584        assert_eq_m256i(r, a);
12585        let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
12586        #[rustfmt::skip]
12587        let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12588                                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12589        assert_eq_m256i(r, e);
12590    }
12591
12592    #[simd_test(enable = "avx512bw,avx512vl")]
12593    unsafe fn test_mm256_maskz_subs_epi8() {
12594        let a = _mm256_set1_epi8(-1);
12595        let b = _mm256_set1_epi8(i8::MAX);
12596        let r = _mm256_maskz_subs_epi8(0, a, b);
12597        assert_eq_m256i(r, _mm256_setzero_si256());
12598        let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
12599        #[rustfmt::skip]
12600        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12601                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12602        assert_eq_m256i(r, e);
12603    }
12604
12605    #[simd_test(enable = "avx512bw,avx512vl")]
12606    unsafe fn test_mm_mask_subs_epi8() {
12607        let a = _mm_set1_epi8(-1);
12608        let b = _mm_set1_epi8(i8::MAX);
12609        let r = _mm_mask_subs_epi8(a, 0, a, b);
12610        assert_eq_m128i(r, a);
12611        let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
12612        #[rustfmt::skip]
12613        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12614        assert_eq_m128i(r, e);
12615    }
12616
12617    #[simd_test(enable = "avx512bw,avx512vl")]
12618    unsafe fn test_mm_maskz_subs_epi8() {
12619        let a = _mm_set1_epi8(-1);
12620        let b = _mm_set1_epi8(i8::MAX);
12621        let r = _mm_maskz_subs_epi8(0, a, b);
12622        assert_eq_m128i(r, _mm_setzero_si128());
12623        let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
12624        #[rustfmt::skip]
12625        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
12626        assert_eq_m128i(r, e);
12627    }
12628
12629    #[simd_test(enable = "avx512bw")]
12630    unsafe fn test_mm512_mulhi_epu16() {
12631        let a = _mm512_set1_epi16(1);
12632        let b = _mm512_set1_epi16(1);
12633        let r = _mm512_mulhi_epu16(a, b);
12634        let e = _mm512_set1_epi16(0);
12635        assert_eq_m512i(r, e);
12636    }
12637
12638    #[simd_test(enable = "avx512bw")]
12639    unsafe fn test_mm512_mask_mulhi_epu16() {
12640        let a = _mm512_set1_epi16(1);
12641        let b = _mm512_set1_epi16(1);
12642        let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
12643        assert_eq_m512i(r, a);
12644        let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
12645        #[rustfmt::skip]
12646        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12647                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12648        assert_eq_m512i(r, e);
12649    }
12650
12651    #[simd_test(enable = "avx512bw")]
12652    unsafe fn test_mm512_maskz_mulhi_epu16() {
12653        let a = _mm512_set1_epi16(1);
12654        let b = _mm512_set1_epi16(1);
12655        let r = _mm512_maskz_mulhi_epu16(0, a, b);
12656        assert_eq_m512i(r, _mm512_setzero_si512());
12657        let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
12658        #[rustfmt::skip]
12659        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12660                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12661        assert_eq_m512i(r, e);
12662    }
12663
12664    #[simd_test(enable = "avx512bw,avx512vl")]
12665    unsafe fn test_mm256_mask_mulhi_epu16() {
12666        let a = _mm256_set1_epi16(1);
12667        let b = _mm256_set1_epi16(1);
12668        let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
12669        assert_eq_m256i(r, a);
12670        let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
12671        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12672        assert_eq_m256i(r, e);
12673    }
12674
12675    #[simd_test(enable = "avx512bw,avx512vl")]
12676    unsafe fn test_mm256_maskz_mulhi_epu16() {
12677        let a = _mm256_set1_epi16(1);
12678        let b = _mm256_set1_epi16(1);
12679        let r = _mm256_maskz_mulhi_epu16(0, a, b);
12680        assert_eq_m256i(r, _mm256_setzero_si256());
12681        let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
12682        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12683        assert_eq_m256i(r, e);
12684    }
12685
12686    #[simd_test(enable = "avx512bw,avx512vl")]
12687    unsafe fn test_mm_mask_mulhi_epu16() {
12688        let a = _mm_set1_epi16(1);
12689        let b = _mm_set1_epi16(1);
12690        let r = _mm_mask_mulhi_epu16(a, 0, a, b);
12691        assert_eq_m128i(r, a);
12692        let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
12693        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12694        assert_eq_m128i(r, e);
12695    }
12696
12697    #[simd_test(enable = "avx512bw,avx512vl")]
12698    unsafe fn test_mm_maskz_mulhi_epu16() {
12699        let a = _mm_set1_epi16(1);
12700        let b = _mm_set1_epi16(1);
12701        let r = _mm_maskz_mulhi_epu16(0, a, b);
12702        assert_eq_m128i(r, _mm_setzero_si128());
12703        let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
12704        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12705        assert_eq_m128i(r, e);
12706    }
12707
12708    #[simd_test(enable = "avx512bw")]
12709    unsafe fn test_mm512_mulhi_epi16() {
12710        let a = _mm512_set1_epi16(1);
12711        let b = _mm512_set1_epi16(1);
12712        let r = _mm512_mulhi_epi16(a, b);
12713        let e = _mm512_set1_epi16(0);
12714        assert_eq_m512i(r, e);
12715    }
12716
12717    #[simd_test(enable = "avx512bw")]
12718    unsafe fn test_mm512_mask_mulhi_epi16() {
12719        let a = _mm512_set1_epi16(1);
12720        let b = _mm512_set1_epi16(1);
12721        let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
12722        assert_eq_m512i(r, a);
12723        let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12724        #[rustfmt::skip]
12725        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12726                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12727        assert_eq_m512i(r, e);
12728    }
12729
12730    #[simd_test(enable = "avx512bw")]
12731    unsafe fn test_mm512_maskz_mulhi_epi16() {
12732        let a = _mm512_set1_epi16(1);
12733        let b = _mm512_set1_epi16(1);
12734        let r = _mm512_maskz_mulhi_epi16(0, a, b);
12735        assert_eq_m512i(r, _mm512_setzero_si512());
12736        let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
12737        #[rustfmt::skip]
12738        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12739                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12740        assert_eq_m512i(r, e);
12741    }
12742
12743    #[simd_test(enable = "avx512bw,avx512vl")]
12744    unsafe fn test_mm256_mask_mulhi_epi16() {
12745        let a = _mm256_set1_epi16(1);
12746        let b = _mm256_set1_epi16(1);
12747        let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
12748        assert_eq_m256i(r, a);
12749        let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
12750        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12751        assert_eq_m256i(r, e);
12752    }
12753
12754    #[simd_test(enable = "avx512bw,avx512vl")]
12755    unsafe fn test_mm256_maskz_mulhi_epi16() {
12756        let a = _mm256_set1_epi16(1);
12757        let b = _mm256_set1_epi16(1);
12758        let r = _mm256_maskz_mulhi_epi16(0, a, b);
12759        assert_eq_m256i(r, _mm256_setzero_si256());
12760        let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
12761        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12762        assert_eq_m256i(r, e);
12763    }
12764
12765    #[simd_test(enable = "avx512bw,avx512vl")]
12766    unsafe fn test_mm_mask_mulhi_epi16() {
12767        let a = _mm_set1_epi16(1);
12768        let b = _mm_set1_epi16(1);
12769        let r = _mm_mask_mulhi_epi16(a, 0, a, b);
12770        assert_eq_m128i(r, a);
12771        let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
12772        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12773        assert_eq_m128i(r, e);
12774    }
12775
12776    #[simd_test(enable = "avx512bw,avx512vl")]
12777    unsafe fn test_mm_maskz_mulhi_epi16() {
12778        let a = _mm_set1_epi16(1);
12779        let b = _mm_set1_epi16(1);
12780        let r = _mm_maskz_mulhi_epi16(0, a, b);
12781        assert_eq_m128i(r, _mm_setzero_si128());
12782        let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
12783        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12784        assert_eq_m128i(r, e);
12785    }
12786
12787    #[simd_test(enable = "avx512bw")]
12788    unsafe fn test_mm512_mulhrs_epi16() {
12789        let a = _mm512_set1_epi16(1);
12790        let b = _mm512_set1_epi16(1);
12791        let r = _mm512_mulhrs_epi16(a, b);
12792        let e = _mm512_set1_epi16(0);
12793        assert_eq_m512i(r, e);
12794    }
12795
12796    #[simd_test(enable = "avx512bw")]
12797    unsafe fn test_mm512_mask_mulhrs_epi16() {
12798        let a = _mm512_set1_epi16(1);
12799        let b = _mm512_set1_epi16(1);
12800        let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
12801        assert_eq_m512i(r, a);
12802        let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12803        #[rustfmt::skip]
12804        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12805                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12806        assert_eq_m512i(r, e);
12807    }
12808
12809    #[simd_test(enable = "avx512bw")]
12810    unsafe fn test_mm512_maskz_mulhrs_epi16() {
12811        let a = _mm512_set1_epi16(1);
12812        let b = _mm512_set1_epi16(1);
12813        let r = _mm512_maskz_mulhrs_epi16(0, a, b);
12814        assert_eq_m512i(r, _mm512_setzero_si512());
12815        let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
12816        #[rustfmt::skip]
12817        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12818                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12819        assert_eq_m512i(r, e);
12820    }
12821
12822    #[simd_test(enable = "avx512bw,avx512vl")]
12823    unsafe fn test_mm256_mask_mulhrs_epi16() {
12824        let a = _mm256_set1_epi16(1);
12825        let b = _mm256_set1_epi16(1);
12826        let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
12827        assert_eq_m256i(r, a);
12828        let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
12829        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
12830        assert_eq_m256i(r, e);
12831    }
12832
12833    #[simd_test(enable = "avx512bw,avx512vl")]
12834    unsafe fn test_mm256_maskz_mulhrs_epi16() {
12835        let a = _mm256_set1_epi16(1);
12836        let b = _mm256_set1_epi16(1);
12837        let r = _mm256_maskz_mulhrs_epi16(0, a, b);
12838        assert_eq_m256i(r, _mm256_setzero_si256());
12839        let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
12840        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
12841        assert_eq_m256i(r, e);
12842    }
12843
12844    #[simd_test(enable = "avx512bw,avx512vl")]
12845    unsafe fn test_mm_mask_mulhrs_epi16() {
12846        let a = _mm_set1_epi16(1);
12847        let b = _mm_set1_epi16(1);
12848        let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
12849        assert_eq_m128i(r, a);
12850        let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
12851        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
12852        assert_eq_m128i(r, e);
12853    }
12854
12855    #[simd_test(enable = "avx512bw,avx512vl")]
12856    unsafe fn test_mm_maskz_mulhrs_epi16() {
12857        let a = _mm_set1_epi16(1);
12858        let b = _mm_set1_epi16(1);
12859        let r = _mm_maskz_mulhrs_epi16(0, a, b);
12860        assert_eq_m128i(r, _mm_setzero_si128());
12861        let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
12862        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
12863        assert_eq_m128i(r, e);
12864    }
12865
12866    #[simd_test(enable = "avx512bw")]
12867    unsafe fn test_mm512_mullo_epi16() {
12868        let a = _mm512_set1_epi16(1);
12869        let b = _mm512_set1_epi16(1);
12870        let r = _mm512_mullo_epi16(a, b);
12871        let e = _mm512_set1_epi16(1);
12872        assert_eq_m512i(r, e);
12873    }
12874
12875    #[simd_test(enable = "avx512bw")]
12876    unsafe fn test_mm512_mask_mullo_epi16() {
12877        let a = _mm512_set1_epi16(1);
12878        let b = _mm512_set1_epi16(1);
12879        let r = _mm512_mask_mullo_epi16(a, 0, a, b);
12880        assert_eq_m512i(r, a);
12881        let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
12882        #[rustfmt::skip]
12883        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12884                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
12885        assert_eq_m512i(r, e);
12886    }
12887
12888    #[simd_test(enable = "avx512bw")]
12889    unsafe fn test_mm512_maskz_mullo_epi16() {
12890        let a = _mm512_set1_epi16(1);
12891        let b = _mm512_set1_epi16(1);
12892        let r = _mm512_maskz_mullo_epi16(0, a, b);
12893        assert_eq_m512i(r, _mm512_setzero_si512());
12894        let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
12895        #[rustfmt::skip]
12896        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12897                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
12898        assert_eq_m512i(r, e);
12899    }
12900
12901    #[simd_test(enable = "avx512bw,avx512vl")]
12902    unsafe fn test_mm256_mask_mullo_epi16() {
12903        let a = _mm256_set1_epi16(1);
12904        let b = _mm256_set1_epi16(1);
12905        let r = _mm256_mask_mullo_epi16(a, 0, a, b);
12906        assert_eq_m256i(r, a);
12907        let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
12908        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
12909        assert_eq_m256i(r, e);
12910    }
12911
12912    #[simd_test(enable = "avx512bw,avx512vl")]
12913    unsafe fn test_mm256_maskz_mullo_epi16() {
12914        let a = _mm256_set1_epi16(1);
12915        let b = _mm256_set1_epi16(1);
12916        let r = _mm256_maskz_mullo_epi16(0, a, b);
12917        assert_eq_m256i(r, _mm256_setzero_si256());
12918        let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
12919        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
12920        assert_eq_m256i(r, e);
12921    }
12922
12923    #[simd_test(enable = "avx512bw,avx512vl")]
12924    unsafe fn test_mm_mask_mullo_epi16() {
12925        let a = _mm_set1_epi16(1);
12926        let b = _mm_set1_epi16(1);
12927        let r = _mm_mask_mullo_epi16(a, 0, a, b);
12928        assert_eq_m128i(r, a);
12929        let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
12930        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
12931        assert_eq_m128i(r, e);
12932    }
12933
12934    #[simd_test(enable = "avx512bw,avx512vl")]
12935    unsafe fn test_mm_maskz_mullo_epi16() {
12936        let a = _mm_set1_epi16(1);
12937        let b = _mm_set1_epi16(1);
12938        let r = _mm_maskz_mullo_epi16(0, a, b);
12939        assert_eq_m128i(r, _mm_setzero_si128());
12940        let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
12941        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
12942        assert_eq_m128i(r, e);
12943    }
12944
12945    #[simd_test(enable = "avx512bw")]
12946    unsafe fn test_mm512_max_epu16() {
12947        #[rustfmt::skip]
12948        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12949                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12950        #[rustfmt::skip]
12951        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12952                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12953        let r = _mm512_max_epu16(a, b);
12954        #[rustfmt::skip]
12955        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
12956                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
12957        assert_eq_m512i(r, e);
12958    }
12959
12960    #[simd_test(enable = "avx512f")]
12961    unsafe fn test_mm512_mask_max_epu16() {
12962        #[rustfmt::skip]
12963        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12964                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12965        #[rustfmt::skip]
12966        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12967                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12968        let r = _mm512_mask_max_epu16(a, 0, a, b);
12969        assert_eq_m512i(r, a);
12970        let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
12971        #[rustfmt::skip]
12972        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12973                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12974        assert_eq_m512i(r, e);
12975    }
12976
12977    #[simd_test(enable = "avx512f")]
12978    unsafe fn test_mm512_maskz_max_epu16() {
12979        #[rustfmt::skip]
12980        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12981                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12982        #[rustfmt::skip]
12983        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12984                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12985        let r = _mm512_maskz_max_epu16(0, a, b);
12986        assert_eq_m512i(r, _mm512_setzero_si512());
12987        let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
12988        #[rustfmt::skip]
12989        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
12990                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
12991        assert_eq_m512i(r, e);
12992    }
12993
12994    #[simd_test(enable = "avx512f,avx512vl")]
12995    unsafe fn test_mm256_mask_max_epu16() {
12996        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12997        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12998        let r = _mm256_mask_max_epu16(a, 0, a, b);
12999        assert_eq_m256i(r, a);
13000        let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
13001        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13002        assert_eq_m256i(r, e);
13003    }
13004
13005    #[simd_test(enable = "avx512f,avx512vl")]
13006    unsafe fn test_mm256_maskz_max_epu16() {
13007        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13008        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13009        let r = _mm256_maskz_max_epu16(0, a, b);
13010        assert_eq_m256i(r, _mm256_setzero_si256());
13011        let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
13012        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13013        assert_eq_m256i(r, e);
13014    }
13015
13016    #[simd_test(enable = "avx512f,avx512vl")]
13017    unsafe fn test_mm_mask_max_epu16() {
13018        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13019        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13020        let r = _mm_mask_max_epu16(a, 0, a, b);
13021        assert_eq_m128i(r, a);
13022        let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
13023        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13024        assert_eq_m128i(r, e);
13025    }
13026
13027    #[simd_test(enable = "avx512f,avx512vl")]
13028    unsafe fn test_mm_maskz_max_epu16() {
13029        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13030        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13031        let r = _mm_maskz_max_epu16(0, a, b);
13032        assert_eq_m128i(r, _mm_setzero_si128());
13033        let r = _mm_maskz_max_epu16(0b00001111, a, b);
13034        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13035        assert_eq_m128i(r, e);
13036    }
13037
13038    #[simd_test(enable = "avx512bw")]
13039    unsafe fn test_mm512_max_epu8() {
13040        #[rustfmt::skip]
13041        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13042                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13043                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13044                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13045        #[rustfmt::skip]
13046        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13047                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13048                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13049                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13050        let r = _mm512_max_epu8(a, b);
13051        #[rustfmt::skip]
13052        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13053                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13054                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13055                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13056        assert_eq_m512i(r, e);
13057    }
13058
13059    #[simd_test(enable = "avx512f")]
13060    unsafe fn test_mm512_mask_max_epu8() {
13061        #[rustfmt::skip]
13062        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13063                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13064                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13065                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13066        #[rustfmt::skip]
13067        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13068                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13069                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13070                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13071        let r = _mm512_mask_max_epu8(a, 0, a, b);
13072        assert_eq_m512i(r, a);
13073        let r = _mm512_mask_max_epu8(
13074            a,
13075            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13076            a,
13077            b,
13078        );
13079        #[rustfmt::skip]
13080        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13081                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13082                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13083                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13084        assert_eq_m512i(r, e);
13085    }
13086
13087    #[simd_test(enable = "avx512f")]
13088    unsafe fn test_mm512_maskz_max_epu8() {
13089        #[rustfmt::skip]
13090        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13091                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13092                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13093                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13094        #[rustfmt::skip]
13095        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13096                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13097                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13098                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13099        let r = _mm512_maskz_max_epu8(0, a, b);
13100        assert_eq_m512i(r, _mm512_setzero_si512());
13101        let r = _mm512_maskz_max_epu8(
13102            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13103            a,
13104            b,
13105        );
13106        #[rustfmt::skip]
13107        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13108                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13109                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13110                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13111        assert_eq_m512i(r, e);
13112    }
13113
13114    #[simd_test(enable = "avx512f,avx512vl")]
13115    unsafe fn test_mm256_mask_max_epu8() {
13116        #[rustfmt::skip]
13117        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13118                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13119        #[rustfmt::skip]
13120        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13121                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13122        let r = _mm256_mask_max_epu8(a, 0, a, b);
13123        assert_eq_m256i(r, a);
13124        let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13125        #[rustfmt::skip]
13126        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13127                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13128        assert_eq_m256i(r, e);
13129    }
13130
13131    #[simd_test(enable = "avx512f,avx512vl")]
13132    unsafe fn test_mm256_maskz_max_epu8() {
13133        #[rustfmt::skip]
13134        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13135                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13136        #[rustfmt::skip]
13137        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13138                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13139        let r = _mm256_maskz_max_epu8(0, a, b);
13140        assert_eq_m256i(r, _mm256_setzero_si256());
13141        let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
13142        #[rustfmt::skip]
13143        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13144                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13145        assert_eq_m256i(r, e);
13146    }
13147
13148    #[simd_test(enable = "avx512f,avx512vl")]
13149    unsafe fn test_mm_mask_max_epu8() {
13150        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13151        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13152        let r = _mm_mask_max_epu8(a, 0, a, b);
13153        assert_eq_m128i(r, a);
13154        let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
13155        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13156        assert_eq_m128i(r, e);
13157    }
13158
13159    #[simd_test(enable = "avx512f,avx512vl")]
13160    unsafe fn test_mm_maskz_max_epu8() {
13161        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13162        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13163        let r = _mm_maskz_max_epu8(0, a, b);
13164        assert_eq_m128i(r, _mm_setzero_si128());
13165        let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
13166        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13167        assert_eq_m128i(r, e);
13168    }
13169
13170    #[simd_test(enable = "avx512bw")]
13171    unsafe fn test_mm512_max_epi16() {
13172        #[rustfmt::skip]
13173        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13174                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13175        #[rustfmt::skip]
13176        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13177                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13178        let r = _mm512_max_epi16(a, b);
13179        #[rustfmt::skip]
13180        let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13181                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13182        assert_eq_m512i(r, e);
13183    }
13184
13185    #[simd_test(enable = "avx512f")]
13186    unsafe fn test_mm512_mask_max_epi16() {
13187        #[rustfmt::skip]
13188        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13189                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13190        #[rustfmt::skip]
13191        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13192                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13193        let r = _mm512_mask_max_epi16(a, 0, a, b);
13194        assert_eq_m512i(r, a);
13195        let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13196        #[rustfmt::skip]
13197        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13198                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13199        assert_eq_m512i(r, e);
13200    }
13201
13202    #[simd_test(enable = "avx512f")]
13203    unsafe fn test_mm512_maskz_max_epi16() {
13204        #[rustfmt::skip]
13205        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13206                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13207        #[rustfmt::skip]
13208        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13209                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13210        let r = _mm512_maskz_max_epi16(0, a, b);
13211        assert_eq_m512i(r, _mm512_setzero_si512());
13212        let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
13213        #[rustfmt::skip]
13214        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13215                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13216        assert_eq_m512i(r, e);
13217    }
13218
13219    #[simd_test(enable = "avx512f,avx512vl")]
13220    unsafe fn test_mm256_mask_max_epi16() {
13221        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13222        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13223        let r = _mm256_mask_max_epi16(a, 0, a, b);
13224        assert_eq_m256i(r, a);
13225        let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
13226        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13227        assert_eq_m256i(r, e);
13228    }
13229
13230    #[simd_test(enable = "avx512f,avx512vl")]
13231    unsafe fn test_mm256_maskz_max_epi16() {
13232        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13233        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13234        let r = _mm256_maskz_max_epi16(0, a, b);
13235        assert_eq_m256i(r, _mm256_setzero_si256());
13236        let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
13237        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13238        assert_eq_m256i(r, e);
13239    }
13240
13241    #[simd_test(enable = "avx512f,avx512vl")]
13242    unsafe fn test_mm_mask_max_epi16() {
13243        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13244        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13245        let r = _mm_mask_max_epi16(a, 0, a, b);
13246        assert_eq_m128i(r, a);
13247        let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
13248        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13249        assert_eq_m128i(r, e);
13250    }
13251
13252    #[simd_test(enable = "avx512f,avx512vl")]
13253    unsafe fn test_mm_maskz_max_epi16() {
13254        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13255        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13256        let r = _mm_maskz_max_epi16(0, a, b);
13257        assert_eq_m128i(r, _mm_setzero_si128());
13258        let r = _mm_maskz_max_epi16(0b00001111, a, b);
13259        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
13260        assert_eq_m128i(r, e);
13261    }
13262
13263    #[simd_test(enable = "avx512bw")]
13264    unsafe fn test_mm512_max_epi8() {
13265        #[rustfmt::skip]
13266        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13267                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13268                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13269                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13270        #[rustfmt::skip]
13271        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13272                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13273                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13274                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13275        let r = _mm512_max_epi8(a, b);
13276        #[rustfmt::skip]
13277        let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13278                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13279                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
13280                                15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13281        assert_eq_m512i(r, e);
13282    }
13283
13284    #[simd_test(enable = "avx512f")]
13285    unsafe fn test_mm512_mask_max_epi8() {
13286        #[rustfmt::skip]
13287        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13288                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13289                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13290                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13291        #[rustfmt::skip]
13292        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13293                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13294                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13295                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13296        let r = _mm512_mask_max_epi8(a, 0, a, b);
13297        assert_eq_m512i(r, a);
13298        let r = _mm512_mask_max_epi8(
13299            a,
13300            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13301            a,
13302            b,
13303        );
13304        #[rustfmt::skip]
13305        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13306                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13307                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13308                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13309        assert_eq_m512i(r, e);
13310    }
13311
13312    #[simd_test(enable = "avx512f")]
13313    unsafe fn test_mm512_maskz_max_epi8() {
13314        #[rustfmt::skip]
13315        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13316                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13317                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13318                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13319        #[rustfmt::skip]
13320        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13321                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13322                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13323                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13324        let r = _mm512_maskz_max_epi8(0, a, b);
13325        assert_eq_m512i(r, _mm512_setzero_si512());
13326        let r = _mm512_maskz_max_epi8(
13327            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13328            a,
13329            b,
13330        );
13331        #[rustfmt::skip]
13332        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13333                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13334                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13335                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13336        assert_eq_m512i(r, e);
13337    }
13338
13339    #[simd_test(enable = "avx512f,avx512vl")]
13340    unsafe fn test_mm256_mask_max_epi8() {
13341        #[rustfmt::skip]
13342        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13343                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13344        #[rustfmt::skip]
13345        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13346                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13347        let r = _mm256_mask_max_epi8(a, 0, a, b);
13348        assert_eq_m256i(r, a);
13349        let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13350        #[rustfmt::skip]
13351        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13352                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13353        assert_eq_m256i(r, e);
13354    }
13355
13356    #[simd_test(enable = "avx512f,avx512vl")]
13357    unsafe fn test_mm256_maskz_max_epi8() {
13358        #[rustfmt::skip]
13359        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13360                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13361        #[rustfmt::skip]
13362        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13363                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13364        let r = _mm256_maskz_max_epi8(0, a, b);
13365        assert_eq_m256i(r, _mm256_setzero_si256());
13366        let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
13367        #[rustfmt::skip]
13368        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
13369                                0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13370        assert_eq_m256i(r, e);
13371    }
13372
13373    #[simd_test(enable = "avx512f,avx512vl")]
13374    unsafe fn test_mm_mask_max_epi8() {
13375        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13376        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13377        let r = _mm_mask_max_epi8(a, 0, a, b);
13378        assert_eq_m128i(r, a);
13379        let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
13380        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13381        assert_eq_m128i(r, e);
13382    }
13383
13384    #[simd_test(enable = "avx512f,avx512vl")]
13385    unsafe fn test_mm_maskz_max_epi8() {
13386        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13387        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13388        let r = _mm_maskz_max_epi8(0, a, b);
13389        assert_eq_m128i(r, _mm_setzero_si128());
13390        let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
13391        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
13392        assert_eq_m128i(r, e);
13393    }
13394
13395    #[simd_test(enable = "avx512bw")]
13396    unsafe fn test_mm512_min_epu16() {
13397        #[rustfmt::skip]
13398        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13399                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13400        #[rustfmt::skip]
13401        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13402                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13403        let r = _mm512_min_epu16(a, b);
13404        #[rustfmt::skip]
13405        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13406                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13407        assert_eq_m512i(r, e);
13408    }
13409
13410    #[simd_test(enable = "avx512f")]
13411    unsafe fn test_mm512_mask_min_epu16() {
13412        #[rustfmt::skip]
13413        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13414                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13415        #[rustfmt::skip]
13416        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13417                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13418        let r = _mm512_mask_min_epu16(a, 0, a, b);
13419        assert_eq_m512i(r, a);
13420        let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
13421        #[rustfmt::skip]
13422        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13423                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13424        assert_eq_m512i(r, e);
13425    }
13426
13427    #[simd_test(enable = "avx512f")]
13428    unsafe fn test_mm512_maskz_min_epu16() {
13429        #[rustfmt::skip]
13430        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13431                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13432        #[rustfmt::skip]
13433        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13434                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13435        let r = _mm512_maskz_min_epu16(0, a, b);
13436        assert_eq_m512i(r, _mm512_setzero_si512());
13437        let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
13438        #[rustfmt::skip]
13439        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13440                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13441        assert_eq_m512i(r, e);
13442    }
13443
13444    #[simd_test(enable = "avx512f,avx512vl")]
13445    unsafe fn test_mm256_mask_min_epu16() {
13446        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13447        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13448        let r = _mm256_mask_min_epu16(a, 0, a, b);
13449        assert_eq_m256i(r, a);
13450        let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
13451        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13452        assert_eq_m256i(r, e);
13453    }
13454
13455    #[simd_test(enable = "avx512f,avx512vl")]
13456    unsafe fn test_mm256_maskz_min_epu16() {
13457        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13458        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13459        let r = _mm256_maskz_min_epu16(0, a, b);
13460        assert_eq_m256i(r, _mm256_setzero_si256());
13461        let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
13462        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13463        assert_eq_m256i(r, e);
13464    }
13465
13466    #[simd_test(enable = "avx512f,avx512vl")]
13467    unsafe fn test_mm_mask_min_epu16() {
13468        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13469        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13470        let r = _mm_mask_min_epu16(a, 0, a, b);
13471        assert_eq_m128i(r, a);
13472        let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
13473        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
13474        assert_eq_m128i(r, e);
13475    }
13476
13477    #[simd_test(enable = "avx512f,avx512vl")]
13478    unsafe fn test_mm_maskz_min_epu16() {
13479        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13480        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13481        let r = _mm_maskz_min_epu16(0, a, b);
13482        assert_eq_m128i(r, _mm_setzero_si128());
13483        let r = _mm_maskz_min_epu16(0b00001111, a, b);
13484        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
13485        assert_eq_m128i(r, e);
13486    }
13487
13488    #[simd_test(enable = "avx512bw")]
13489    unsafe fn test_mm512_min_epu8() {
13490        #[rustfmt::skip]
13491        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13492                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13493                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13494                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13495        #[rustfmt::skip]
13496        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13497                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13498                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13499                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13500        let r = _mm512_min_epu8(a, b);
13501        #[rustfmt::skip]
13502        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13503                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13504                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13505                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13506        assert_eq_m512i(r, e);
13507    }
13508
13509    #[simd_test(enable = "avx512f")]
13510    unsafe fn test_mm512_mask_min_epu8() {
13511        #[rustfmt::skip]
13512        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13513                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13514                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13515                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13516        #[rustfmt::skip]
13517        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13518                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13519                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13520                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13521        let r = _mm512_mask_min_epu8(a, 0, a, b);
13522        assert_eq_m512i(r, a);
13523        let r = _mm512_mask_min_epu8(
13524            a,
13525            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13526            a,
13527            b,
13528        );
13529        #[rustfmt::skip]
13530        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13531                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13532                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13533                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13534        assert_eq_m512i(r, e);
13535    }
13536
13537    #[simd_test(enable = "avx512f")]
13538    unsafe fn test_mm512_maskz_min_epu8() {
13539        #[rustfmt::skip]
13540        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13541                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13542                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13543                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13544        #[rustfmt::skip]
13545        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13546                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13547                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13548                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13549        let r = _mm512_maskz_min_epu8(0, a, b);
13550        assert_eq_m512i(r, _mm512_setzero_si512());
13551        let r = _mm512_maskz_min_epu8(
13552            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13553            a,
13554            b,
13555        );
13556        #[rustfmt::skip]
13557        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13558                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13559                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13560                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13561        assert_eq_m512i(r, e);
13562    }
13563
13564    #[simd_test(enable = "avx512f,avx512vl")]
13565    unsafe fn test_mm256_mask_min_epu8() {
13566        #[rustfmt::skip]
13567        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13568                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13569        #[rustfmt::skip]
13570        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13571                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13572        let r = _mm256_mask_min_epu8(a, 0, a, b);
13573        assert_eq_m256i(r, a);
13574        let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
13575        #[rustfmt::skip]
13576        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13577                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13578        assert_eq_m256i(r, e);
13579    }
13580
13581    #[simd_test(enable = "avx512f,avx512vl")]
13582    unsafe fn test_mm256_maskz_min_epu8() {
13583        #[rustfmt::skip]
13584        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13585                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13586        #[rustfmt::skip]
13587        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13588                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13589        let r = _mm256_maskz_min_epu8(0, a, b);
13590        assert_eq_m256i(r, _mm256_setzero_si256());
13591        let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
13592        #[rustfmt::skip]
13593        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13594                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13595        assert_eq_m256i(r, e);
13596    }
13597
13598    #[simd_test(enable = "avx512f,avx512vl")]
13599    unsafe fn test_mm_mask_min_epu8() {
13600        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13601        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13602        let r = _mm_mask_min_epu8(a, 0, a, b);
13603        assert_eq_m128i(r, a);
13604        let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
13605        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13606        assert_eq_m128i(r, e);
13607    }
13608
13609    #[simd_test(enable = "avx512f,avx512vl")]
13610    unsafe fn test_mm_maskz_min_epu8() {
13611        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13612        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13613        let r = _mm_maskz_min_epu8(0, a, b);
13614        assert_eq_m128i(r, _mm_setzero_si128());
13615        let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
13616        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13617        assert_eq_m128i(r, e);
13618    }
13619
13620    #[simd_test(enable = "avx512bw")]
13621    unsafe fn test_mm512_min_epi16() {
13622        #[rustfmt::skip]
13623        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13624                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13625        #[rustfmt::skip]
13626        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13627                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13628        let r = _mm512_min_epi16(a, b);
13629        #[rustfmt::skip]
13630        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13631                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13632        assert_eq_m512i(r, e);
13633    }
13634
13635    #[simd_test(enable = "avx512f")]
13636    unsafe fn test_mm512_mask_min_epi16() {
13637        #[rustfmt::skip]
13638        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13639                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13640        #[rustfmt::skip]
13641        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13642                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13643        let r = _mm512_mask_min_epi16(a, 0, a, b);
13644        assert_eq_m512i(r, a);
13645        let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
13646        #[rustfmt::skip]
13647        let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13648                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13649        assert_eq_m512i(r, e);
13650    }
13651
13652    #[simd_test(enable = "avx512f")]
13653    unsafe fn test_mm512_maskz_min_epi16() {
13654        #[rustfmt::skip]
13655        let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13656                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13657        #[rustfmt::skip]
13658        let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13659                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13660        let r = _mm512_maskz_min_epi16(0, a, b);
13661        assert_eq_m512i(r, _mm512_setzero_si512());
13662        let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
13663        #[rustfmt::skip]
13664        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13665                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13666        assert_eq_m512i(r, e);
13667    }
13668
13669    #[simd_test(enable = "avx512f,avx512vl")]
13670    unsafe fn test_mm256_mask_min_epi16() {
13671        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13672        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13673        let r = _mm256_mask_min_epi16(a, 0, a, b);
13674        assert_eq_m256i(r, a);
13675        let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
13676        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13677        assert_eq_m256i(r, e);
13678    }
13679
13680    #[simd_test(enable = "avx512f,avx512vl")]
13681    unsafe fn test_mm256_maskz_min_epi16() {
13682        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13683        let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13684        let r = _mm256_maskz_min_epi16(0, a, b);
13685        assert_eq_m256i(r, _mm256_setzero_si256());
13686        let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
13687        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13688        assert_eq_m256i(r, e);
13689    }
13690
13691    #[simd_test(enable = "avx512f,avx512vl")]
13692    unsafe fn test_mm_mask_min_epi16() {
13693        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13694        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13695        let r = _mm_mask_min_epi16(a, 0, a, b);
13696        assert_eq_m128i(r, a);
13697        let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
13698        let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
13699        assert_eq_m128i(r, e);
13700    }
13701
13702    #[simd_test(enable = "avx512f,avx512vl")]
13703    unsafe fn test_mm_maskz_min_epi16() {
13704        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
13705        let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
13706        let r = _mm_maskz_min_epi16(0, a, b);
13707        assert_eq_m128i(r, _mm_setzero_si128());
13708        let r = _mm_maskz_min_epi16(0b00001111, a, b);
13709        let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
13710        assert_eq_m128i(r, e);
13711    }
13712
13713    #[simd_test(enable = "avx512bw")]
13714    unsafe fn test_mm512_min_epi8() {
13715        #[rustfmt::skip]
13716        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13717                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13718                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13719                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13720        #[rustfmt::skip]
13721        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13722                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13723                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13724                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13725        let r = _mm512_min_epi8(a, b);
13726        #[rustfmt::skip]
13727        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13728                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13729                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13730                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13731        assert_eq_m512i(r, e);
13732    }
13733
13734    #[simd_test(enable = "avx512f")]
13735    unsafe fn test_mm512_mask_min_epi8() {
13736        #[rustfmt::skip]
13737        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13738                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13739                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13740                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13741        #[rustfmt::skip]
13742        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13743                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13744                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13745                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13746        let r = _mm512_mask_min_epi8(a, 0, a, b);
13747        assert_eq_m512i(r, a);
13748        let r = _mm512_mask_min_epi8(
13749            a,
13750            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13751            a,
13752            b,
13753        );
13754        #[rustfmt::skip]
13755        let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13756                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13757                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13758                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13759        assert_eq_m512i(r, e);
13760    }
13761
13762    #[simd_test(enable = "avx512f")]
13763    unsafe fn test_mm512_maskz_min_epi8() {
13764        #[rustfmt::skip]
13765        let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13766                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13767                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13768                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13769        #[rustfmt::skip]
13770        let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13771                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13772                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13773                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13774        let r = _mm512_maskz_min_epi8(0, a, b);
13775        assert_eq_m512i(r, _mm512_setzero_si512());
13776        let r = _mm512_maskz_min_epi8(
13777            0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
13778            a,
13779            b,
13780        );
13781        #[rustfmt::skip]
13782        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13783                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13784                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13785                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13786        assert_eq_m512i(r, e);
13787    }
13788
13789    #[simd_test(enable = "avx512f,avx512vl")]
13790    unsafe fn test_mm256_mask_min_epi8() {
13791        #[rustfmt::skip]
13792        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13793                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13794        #[rustfmt::skip]
13795        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13796                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13797        let r = _mm256_mask_min_epi8(a, 0, a, b);
13798        assert_eq_m256i(r, a);
13799        let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
13800        #[rustfmt::skip]
13801        let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
13802                                0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13803        assert_eq_m256i(r, e);
13804    }
13805
13806    #[simd_test(enable = "avx512f,avx512vl")]
13807    unsafe fn test_mm256_maskz_min_epi8() {
13808        #[rustfmt::skip]
13809        let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
13810                                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13811        #[rustfmt::skip]
13812        let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
13813                                15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13814        let r = _mm256_maskz_min_epi8(0, a, b);
13815        assert_eq_m256i(r, _mm256_setzero_si256());
13816        let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
13817        #[rustfmt::skip]
13818        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
13819                                0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13820        assert_eq_m256i(r, e);
13821    }
13822
13823    #[simd_test(enable = "avx512f,avx512vl")]
13824    unsafe fn test_mm_mask_min_epi8() {
13825        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13826        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13827        let r = _mm_mask_min_epi8(a, 0, a, b);
13828        assert_eq_m128i(r, a);
13829        let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
13830        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13831        assert_eq_m128i(r, e);
13832    }
13833
13834    #[simd_test(enable = "avx512f,avx512vl")]
13835    unsafe fn test_mm_maskz_min_epi8() {
13836        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13837        let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13838        let r = _mm_maskz_min_epi8(0, a, b);
13839        assert_eq_m128i(r, _mm_setzero_si128());
13840        let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
13841        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
13842        assert_eq_m128i(r, e);
13843    }
13844
13845    #[simd_test(enable = "avx512bw")]
13846    unsafe fn test_mm512_cmplt_epu16_mask() {
13847        let a = _mm512_set1_epi16(-2);
13848        let b = _mm512_set1_epi16(-1);
13849        let m = _mm512_cmplt_epu16_mask(a, b);
13850        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13851    }
13852
13853    #[simd_test(enable = "avx512bw")]
13854    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
13855        let a = _mm512_set1_epi16(-2);
13856        let b = _mm512_set1_epi16(-1);
13857        let mask = 0b01010101_01010101_01010101_01010101;
13858        let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
13859        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13860    }
13861
13862    #[simd_test(enable = "avx512bw,avx512vl")]
13863    unsafe fn test_mm256_cmplt_epu16_mask() {
13864        let a = _mm256_set1_epi16(-2);
13865        let b = _mm256_set1_epi16(-1);
13866        let m = _mm256_cmplt_epu16_mask(a, b);
13867        assert_eq!(m, 0b11111111_11111111);
13868    }
13869
13870    #[simd_test(enable = "avx512bw,avx512vl")]
13871    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
13872        let a = _mm256_set1_epi16(-2);
13873        let b = _mm256_set1_epi16(-1);
13874        let mask = 0b01010101_01010101;
13875        let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
13876        assert_eq!(r, 0b01010101_01010101);
13877    }
13878
13879    #[simd_test(enable = "avx512bw,avx512vl")]
13880    unsafe fn test_mm_cmplt_epu16_mask() {
13881        let a = _mm_set1_epi16(-2);
13882        let b = _mm_set1_epi16(-1);
13883        let m = _mm_cmplt_epu16_mask(a, b);
13884        assert_eq!(m, 0b11111111);
13885    }
13886
13887    #[simd_test(enable = "avx512bw,avx512vl")]
13888    unsafe fn test_mm_mask_cmplt_epu16_mask() {
13889        let a = _mm_set1_epi16(-2);
13890        let b = _mm_set1_epi16(-1);
13891        let mask = 0b01010101;
13892        let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
13893        assert_eq!(r, 0b01010101);
13894    }
13895
13896    #[simd_test(enable = "avx512bw")]
13897    unsafe fn test_mm512_cmplt_epu8_mask() {
13898        let a = _mm512_set1_epi8(-2);
13899        let b = _mm512_set1_epi8(-1);
13900        let m = _mm512_cmplt_epu8_mask(a, b);
13901        assert_eq!(
13902            m,
13903            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13904        );
13905    }
13906
13907    #[simd_test(enable = "avx512bw")]
13908    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
13909        let a = _mm512_set1_epi8(-2);
13910        let b = _mm512_set1_epi8(-1);
13911        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13912        let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
13913        assert_eq!(
13914            r,
13915            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13916        );
13917    }
13918
13919    #[simd_test(enable = "avx512bw,avx512vl")]
13920    unsafe fn test_mm256_cmplt_epu8_mask() {
13921        let a = _mm256_set1_epi8(-2);
13922        let b = _mm256_set1_epi8(-1);
13923        let m = _mm256_cmplt_epu8_mask(a, b);
13924        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13925    }
13926
13927    #[simd_test(enable = "avx512bw,avx512vl")]
13928    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
13929        let a = _mm256_set1_epi8(-2);
13930        let b = _mm256_set1_epi8(-1);
13931        let mask = 0b01010101_01010101_01010101_01010101;
13932        let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
13933        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13934    }
13935
13936    #[simd_test(enable = "avx512bw,avx512vl")]
13937    unsafe fn test_mm_cmplt_epu8_mask() {
13938        let a = _mm_set1_epi8(-2);
13939        let b = _mm_set1_epi8(-1);
13940        let m = _mm_cmplt_epu8_mask(a, b);
13941        assert_eq!(m, 0b11111111_11111111);
13942    }
13943
13944    #[simd_test(enable = "avx512bw,avx512vl")]
13945    unsafe fn test_mm_mask_cmplt_epu8_mask() {
13946        let a = _mm_set1_epi8(-2);
13947        let b = _mm_set1_epi8(-1);
13948        let mask = 0b01010101_01010101;
13949        let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
13950        assert_eq!(r, 0b01010101_01010101);
13951    }
13952
13953    #[simd_test(enable = "avx512bw")]
13954    unsafe fn test_mm512_cmplt_epi16_mask() {
13955        let a = _mm512_set1_epi16(-2);
13956        let b = _mm512_set1_epi16(-1);
13957        let m = _mm512_cmplt_epi16_mask(a, b);
13958        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13959    }
13960
13961    #[simd_test(enable = "avx512bw")]
13962    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
13963        let a = _mm512_set1_epi16(-2);
13964        let b = _mm512_set1_epi16(-1);
13965        let mask = 0b01010101_01010101_01010101_01010101;
13966        let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
13967        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13968    }
13969
13970    #[simd_test(enable = "avx512bw,avx512vl")]
13971    unsafe fn test_mm256_cmplt_epi16_mask() {
13972        let a = _mm256_set1_epi16(-2);
13973        let b = _mm256_set1_epi16(-1);
13974        let m = _mm256_cmplt_epi16_mask(a, b);
13975        assert_eq!(m, 0b11111111_11111111);
13976    }
13977
13978    #[simd_test(enable = "avx512bw,avx512vl")]
13979    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
13980        let a = _mm256_set1_epi16(-2);
13981        let b = _mm256_set1_epi16(-1);
13982        let mask = 0b01010101_01010101;
13983        let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
13984        assert_eq!(r, 0b01010101_01010101);
13985    }
13986
13987    #[simd_test(enable = "avx512bw,avx512vl")]
13988    unsafe fn test_mm_cmplt_epi16_mask() {
13989        let a = _mm_set1_epi16(-2);
13990        let b = _mm_set1_epi16(-1);
13991        let m = _mm_cmplt_epi16_mask(a, b);
13992        assert_eq!(m, 0b11111111);
13993    }
13994
13995    #[simd_test(enable = "avx512bw,avx512vl")]
13996    unsafe fn test_mm_mask_cmplt_epi16_mask() {
13997        let a = _mm_set1_epi16(-2);
13998        let b = _mm_set1_epi16(-1);
13999        let mask = 0b01010101;
14000        let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
14001        assert_eq!(r, 0b01010101);
14002    }
14003
14004    #[simd_test(enable = "avx512bw")]
14005    unsafe fn test_mm512_cmplt_epi8_mask() {
14006        let a = _mm512_set1_epi8(-2);
14007        let b = _mm512_set1_epi8(-1);
14008        let m = _mm512_cmplt_epi8_mask(a, b);
14009        assert_eq!(
14010            m,
14011            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14012        );
14013    }
14014
14015    #[simd_test(enable = "avx512bw")]
14016    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
14017        let a = _mm512_set1_epi8(-2);
14018        let b = _mm512_set1_epi8(-1);
14019        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14020        let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
14021        assert_eq!(
14022            r,
14023            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14024        );
14025    }
14026
14027    #[simd_test(enable = "avx512bw,avx512vl")]
14028    unsafe fn test_mm256_cmplt_epi8_mask() {
14029        let a = _mm256_set1_epi8(-2);
14030        let b = _mm256_set1_epi8(-1);
14031        let m = _mm256_cmplt_epi8_mask(a, b);
14032        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14033    }
14034
14035    #[simd_test(enable = "avx512bw,avx512vl")]
14036    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
14037        let a = _mm256_set1_epi8(-2);
14038        let b = _mm256_set1_epi8(-1);
14039        let mask = 0b01010101_01010101_01010101_01010101;
14040        let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
14041        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14042    }
14043
14044    #[simd_test(enable = "avx512bw,avx512vl")]
14045    unsafe fn test_mm_cmplt_epi8_mask() {
14046        let a = _mm_set1_epi8(-2);
14047        let b = _mm_set1_epi8(-1);
14048        let m = _mm_cmplt_epi8_mask(a, b);
14049        assert_eq!(m, 0b11111111_11111111);
14050    }
14051
14052    #[simd_test(enable = "avx512bw,avx512vl")]
14053    unsafe fn test_mm_mask_cmplt_epi8_mask() {
14054        let a = _mm_set1_epi8(-2);
14055        let b = _mm_set1_epi8(-1);
14056        let mask = 0b01010101_01010101;
14057        let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
14058        assert_eq!(r, 0b01010101_01010101);
14059    }
14060
14061    #[simd_test(enable = "avx512bw")]
14062    unsafe fn test_mm512_cmpgt_epu16_mask() {
14063        let a = _mm512_set1_epi16(2);
14064        let b = _mm512_set1_epi16(1);
14065        let m = _mm512_cmpgt_epu16_mask(a, b);
14066        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14067    }
14068
14069    #[simd_test(enable = "avx512bw")]
14070    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
14071        let a = _mm512_set1_epi16(2);
14072        let b = _mm512_set1_epi16(1);
14073        let mask = 0b01010101_01010101_01010101_01010101;
14074        let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
14075        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14076    }
14077
14078    #[simd_test(enable = "avx512bw,avx512vl")]
14079    unsafe fn test_mm256_cmpgt_epu16_mask() {
14080        let a = _mm256_set1_epi16(2);
14081        let b = _mm256_set1_epi16(1);
14082        let m = _mm256_cmpgt_epu16_mask(a, b);
14083        assert_eq!(m, 0b11111111_11111111);
14084    }
14085
14086    #[simd_test(enable = "avx512bw,avx512vl")]
14087    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
14088        let a = _mm256_set1_epi16(2);
14089        let b = _mm256_set1_epi16(1);
14090        let mask = 0b01010101_01010101;
14091        let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
14092        assert_eq!(r, 0b01010101_01010101);
14093    }
14094
14095    #[simd_test(enable = "avx512bw,avx512vl")]
14096    unsafe fn test_mm_cmpgt_epu16_mask() {
14097        let a = _mm_set1_epi16(2);
14098        let b = _mm_set1_epi16(1);
14099        let m = _mm_cmpgt_epu16_mask(a, b);
14100        assert_eq!(m, 0b11111111);
14101    }
14102
14103    #[simd_test(enable = "avx512bw,avx512vl")]
14104    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
14105        let a = _mm_set1_epi16(2);
14106        let b = _mm_set1_epi16(1);
14107        let mask = 0b01010101;
14108        let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
14109        assert_eq!(r, 0b01010101);
14110    }
14111
14112    #[simd_test(enable = "avx512bw")]
14113    unsafe fn test_mm512_cmpgt_epu8_mask() {
14114        let a = _mm512_set1_epi8(2);
14115        let b = _mm512_set1_epi8(1);
14116        let m = _mm512_cmpgt_epu8_mask(a, b);
14117        assert_eq!(
14118            m,
14119            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14120        );
14121    }
14122
14123    #[simd_test(enable = "avx512bw")]
14124    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
14125        let a = _mm512_set1_epi8(2);
14126        let b = _mm512_set1_epi8(1);
14127        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14128        let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
14129        assert_eq!(
14130            r,
14131            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14132        );
14133    }
14134
14135    #[simd_test(enable = "avx512bw,avx512vl")]
14136    unsafe fn test_mm256_cmpgt_epu8_mask() {
14137        let a = _mm256_set1_epi8(2);
14138        let b = _mm256_set1_epi8(1);
14139        let m = _mm256_cmpgt_epu8_mask(a, b);
14140        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14141    }
14142
14143    #[simd_test(enable = "avx512bw,avx512vl")]
14144    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
14145        let a = _mm256_set1_epi8(2);
14146        let b = _mm256_set1_epi8(1);
14147        let mask = 0b01010101_01010101_01010101_01010101;
14148        let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
14149        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14150    }
14151
14152    #[simd_test(enable = "avx512bw,avx512vl")]
14153    unsafe fn test_mm_cmpgt_epu8_mask() {
14154        let a = _mm_set1_epi8(2);
14155        let b = _mm_set1_epi8(1);
14156        let m = _mm_cmpgt_epu8_mask(a, b);
14157        assert_eq!(m, 0b11111111_11111111);
14158    }
14159
14160    #[simd_test(enable = "avx512bw,avx512vl")]
14161    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
14162        let a = _mm_set1_epi8(2);
14163        let b = _mm_set1_epi8(1);
14164        let mask = 0b01010101_01010101;
14165        let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
14166        assert_eq!(r, 0b01010101_01010101);
14167    }
14168
14169    #[simd_test(enable = "avx512bw")]
14170    unsafe fn test_mm512_cmpgt_epi16_mask() {
14171        let a = _mm512_set1_epi16(2);
14172        let b = _mm512_set1_epi16(-1);
14173        let m = _mm512_cmpgt_epi16_mask(a, b);
14174        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14175    }
14176
14177    #[simd_test(enable = "avx512bw")]
14178    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
14179        let a = _mm512_set1_epi16(2);
14180        let b = _mm512_set1_epi16(-1);
14181        let mask = 0b01010101_01010101_01010101_01010101;
14182        let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
14183        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14184    }
14185
14186    #[simd_test(enable = "avx512bw,avx512vl")]
14187    unsafe fn test_mm256_cmpgt_epi16_mask() {
14188        let a = _mm256_set1_epi16(2);
14189        let b = _mm256_set1_epi16(-1);
14190        let m = _mm256_cmpgt_epi16_mask(a, b);
14191        assert_eq!(m, 0b11111111_11111111);
14192    }
14193
14194    #[simd_test(enable = "avx512bw,avx512vl")]
14195    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
14196        let a = _mm256_set1_epi16(2);
14197        let b = _mm256_set1_epi16(-1);
14198        let mask = 0b001010101_01010101;
14199        let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
14200        assert_eq!(r, 0b01010101_01010101);
14201    }
14202
14203    #[simd_test(enable = "avx512bw,avx512vl")]
14204    unsafe fn test_mm_cmpgt_epi16_mask() {
14205        let a = _mm_set1_epi16(2);
14206        let b = _mm_set1_epi16(-1);
14207        let m = _mm_cmpgt_epi16_mask(a, b);
14208        assert_eq!(m, 0b11111111);
14209    }
14210
14211    #[simd_test(enable = "avx512bw,avx512vl")]
14212    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
14213        let a = _mm_set1_epi16(2);
14214        let b = _mm_set1_epi16(-1);
14215        let mask = 0b01010101;
14216        let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
14217        assert_eq!(r, 0b01010101);
14218    }
14219
14220    #[simd_test(enable = "avx512bw")]
14221    unsafe fn test_mm512_cmpgt_epi8_mask() {
14222        let a = _mm512_set1_epi8(2);
14223        let b = _mm512_set1_epi8(-1);
14224        let m = _mm512_cmpgt_epi8_mask(a, b);
14225        assert_eq!(
14226            m,
14227            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14228        );
14229    }
14230
14231    #[simd_test(enable = "avx512bw")]
14232    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
14233        let a = _mm512_set1_epi8(2);
14234        let b = _mm512_set1_epi8(-1);
14235        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14236        let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
14237        assert_eq!(
14238            r,
14239            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14240        );
14241    }
14242
14243    #[simd_test(enable = "avx512bw,avx512vl")]
14244    unsafe fn test_mm256_cmpgt_epi8_mask() {
14245        let a = _mm256_set1_epi8(2);
14246        let b = _mm256_set1_epi8(-1);
14247        let m = _mm256_cmpgt_epi8_mask(a, b);
14248        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14249    }
14250
14251    #[simd_test(enable = "avx512bw,avx512vl")]
14252    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
14253        let a = _mm256_set1_epi8(2);
14254        let b = _mm256_set1_epi8(-1);
14255        let mask = 0b01010101_01010101_01010101_01010101;
14256        let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
14257        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14258    }
14259
14260    #[simd_test(enable = "avx512bw,avx512vl")]
14261    unsafe fn test_mm_cmpgt_epi8_mask() {
14262        let a = _mm_set1_epi8(2);
14263        let b = _mm_set1_epi8(-1);
14264        let m = _mm_cmpgt_epi8_mask(a, b);
14265        assert_eq!(m, 0b11111111_11111111);
14266    }
14267
14268    #[simd_test(enable = "avx512bw,avx512vl")]
14269    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
14270        let a = _mm_set1_epi8(2);
14271        let b = _mm_set1_epi8(-1);
14272        let mask = 0b01010101_01010101;
14273        let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
14274        assert_eq!(r, 0b01010101_01010101);
14275    }
14276
14277    #[simd_test(enable = "avx512bw")]
14278    unsafe fn test_mm512_cmple_epu16_mask() {
14279        let a = _mm512_set1_epi16(-1);
14280        let b = _mm512_set1_epi16(-1);
14281        let m = _mm512_cmple_epu16_mask(a, b);
14282        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14283    }
14284
14285    #[simd_test(enable = "avx512bw")]
14286    unsafe fn test_mm512_mask_cmple_epu16_mask() {
14287        let a = _mm512_set1_epi16(-1);
14288        let b = _mm512_set1_epi16(-1);
14289        let mask = 0b01010101_01010101_01010101_01010101;
14290        let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
14291        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14292    }
14293
14294    #[simd_test(enable = "avx512bw,avx512vl")]
14295    unsafe fn test_mm256_cmple_epu16_mask() {
14296        let a = _mm256_set1_epi16(-1);
14297        let b = _mm256_set1_epi16(-1);
14298        let m = _mm256_cmple_epu16_mask(a, b);
14299        assert_eq!(m, 0b11111111_11111111);
14300    }
14301
14302    #[simd_test(enable = "avx512bw,avx512vl")]
14303    unsafe fn test_mm256_mask_cmple_epu16_mask() {
14304        let a = _mm256_set1_epi16(-1);
14305        let b = _mm256_set1_epi16(-1);
14306        let mask = 0b01010101_01010101;
14307        let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
14308        assert_eq!(r, 0b01010101_01010101);
14309    }
14310
14311    #[simd_test(enable = "avx512bw,avx512vl")]
14312    unsafe fn test_mm_cmple_epu16_mask() {
14313        let a = _mm_set1_epi16(-1);
14314        let b = _mm_set1_epi16(-1);
14315        let m = _mm_cmple_epu16_mask(a, b);
14316        assert_eq!(m, 0b11111111);
14317    }
14318
14319    #[simd_test(enable = "avx512bw,avx512vl")]
14320    unsafe fn test_mm_mask_cmple_epu16_mask() {
14321        let a = _mm_set1_epi16(-1);
14322        let b = _mm_set1_epi16(-1);
14323        let mask = 0b01010101;
14324        let r = _mm_mask_cmple_epu16_mask(mask, a, b);
14325        assert_eq!(r, 0b01010101);
14326    }
14327
14328    #[simd_test(enable = "avx512bw")]
14329    unsafe fn test_mm512_cmple_epu8_mask() {
14330        let a = _mm512_set1_epi8(-1);
14331        let b = _mm512_set1_epi8(-1);
14332        let m = _mm512_cmple_epu8_mask(a, b);
14333        assert_eq!(
14334            m,
14335            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14336        );
14337    }
14338
14339    #[simd_test(enable = "avx512bw")]
14340    unsafe fn test_mm512_mask_cmple_epu8_mask() {
14341        let a = _mm512_set1_epi8(-1);
14342        let b = _mm512_set1_epi8(-1);
14343        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14344        let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
14345        assert_eq!(
14346            r,
14347            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14348        );
14349    }
14350
14351    #[simd_test(enable = "avx512bw,avx512vl")]
14352    unsafe fn test_mm256_cmple_epu8_mask() {
14353        let a = _mm256_set1_epi8(-1);
14354        let b = _mm256_set1_epi8(-1);
14355        let m = _mm256_cmple_epu8_mask(a, b);
14356        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14357    }
14358
14359    #[simd_test(enable = "avx512bw,avx512vl")]
14360    unsafe fn test_mm256_mask_cmple_epu8_mask() {
14361        let a = _mm256_set1_epi8(-1);
14362        let b = _mm256_set1_epi8(-1);
14363        let mask = 0b01010101_01010101_01010101_01010101;
14364        let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
14365        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14366    }
14367
14368    #[simd_test(enable = "avx512bw,avx512vl")]
14369    unsafe fn test_mm_cmple_epu8_mask() {
14370        let a = _mm_set1_epi8(-1);
14371        let b = _mm_set1_epi8(-1);
14372        let m = _mm_cmple_epu8_mask(a, b);
14373        assert_eq!(m, 0b11111111_11111111);
14374    }
14375
14376    #[simd_test(enable = "avx512bw,avx512vl")]
14377    unsafe fn test_mm_mask_cmple_epu8_mask() {
14378        let a = _mm_set1_epi8(-1);
14379        let b = _mm_set1_epi8(-1);
14380        let mask = 0b01010101_01010101;
14381        let r = _mm_mask_cmple_epu8_mask(mask, a, b);
14382        assert_eq!(r, 0b01010101_01010101);
14383    }
14384
14385    #[simd_test(enable = "avx512bw")]
14386    unsafe fn test_mm512_cmple_epi16_mask() {
14387        let a = _mm512_set1_epi16(-1);
14388        let b = _mm512_set1_epi16(-1);
14389        let m = _mm512_cmple_epi16_mask(a, b);
14390        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14391    }
14392
14393    #[simd_test(enable = "avx512bw")]
14394    unsafe fn test_mm512_mask_cmple_epi16_mask() {
14395        let a = _mm512_set1_epi16(-1);
14396        let b = _mm512_set1_epi16(-1);
14397        let mask = 0b01010101_01010101_01010101_01010101;
14398        let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
14399        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14400    }
14401
14402    #[simd_test(enable = "avx512bw,avx512vl")]
14403    unsafe fn test_mm256_cmple_epi16_mask() {
14404        let a = _mm256_set1_epi16(-1);
14405        let b = _mm256_set1_epi16(-1);
14406        let m = _mm256_cmple_epi16_mask(a, b);
14407        assert_eq!(m, 0b11111111_11111111);
14408    }
14409
14410    #[simd_test(enable = "avx512bw,avx512vl")]
14411    unsafe fn test_mm256_mask_cmple_epi16_mask() {
14412        let a = _mm256_set1_epi16(-1);
14413        let b = _mm256_set1_epi16(-1);
14414        let mask = 0b01010101_01010101;
14415        let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
14416        assert_eq!(r, 0b01010101_01010101);
14417    }
14418
14419    #[simd_test(enable = "avx512bw,avx512vl")]
14420    unsafe fn test_mm_cmple_epi16_mask() {
14421        let a = _mm_set1_epi16(-1);
14422        let b = _mm_set1_epi16(-1);
14423        let m = _mm_cmple_epi16_mask(a, b);
14424        assert_eq!(m, 0b11111111);
14425    }
14426
14427    #[simd_test(enable = "avx512bw,avx512vl")]
14428    unsafe fn test_mm_mask_cmple_epi16_mask() {
14429        let a = _mm_set1_epi16(-1);
14430        let b = _mm_set1_epi16(-1);
14431        let mask = 0b01010101;
14432        let r = _mm_mask_cmple_epi16_mask(mask, a, b);
14433        assert_eq!(r, 0b01010101);
14434    }
14435
14436    #[simd_test(enable = "avx512bw")]
14437    unsafe fn test_mm512_cmple_epi8_mask() {
14438        let a = _mm512_set1_epi8(-1);
14439        let b = _mm512_set1_epi8(-1);
14440        let m = _mm512_cmple_epi8_mask(a, b);
14441        assert_eq!(
14442            m,
14443            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14444        );
14445    }
14446
14447    #[simd_test(enable = "avx512bw")]
14448    unsafe fn test_mm512_mask_cmple_epi8_mask() {
14449        let a = _mm512_set1_epi8(-1);
14450        let b = _mm512_set1_epi8(-1);
14451        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14452        let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
14453        assert_eq!(
14454            r,
14455            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14456        );
14457    }
14458
14459    #[simd_test(enable = "avx512bw,avx512vl")]
14460    unsafe fn test_mm256_cmple_epi8_mask() {
14461        let a = _mm256_set1_epi8(-1);
14462        let b = _mm256_set1_epi8(-1);
14463        let m = _mm256_cmple_epi8_mask(a, b);
14464        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14465    }
14466
14467    #[simd_test(enable = "avx512bw,avx512vl")]
14468    unsafe fn test_mm256_mask_cmple_epi8_mask() {
14469        let a = _mm256_set1_epi8(-1);
14470        let b = _mm256_set1_epi8(-1);
14471        let mask = 0b01010101_01010101_01010101_01010101;
14472        let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
14473        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14474    }
14475
14476    #[simd_test(enable = "avx512bw,avx512vl")]
14477    unsafe fn test_mm_cmple_epi8_mask() {
14478        let a = _mm_set1_epi8(-1);
14479        let b = _mm_set1_epi8(-1);
14480        let m = _mm_cmple_epi8_mask(a, b);
14481        assert_eq!(m, 0b11111111_11111111);
14482    }
14483
14484    #[simd_test(enable = "avx512bw,avx512vl")]
14485    unsafe fn test_mm_mask_cmple_epi8_mask() {
14486        let a = _mm_set1_epi8(-1);
14487        let b = _mm_set1_epi8(-1);
14488        let mask = 0b01010101_01010101;
14489        let r = _mm_mask_cmple_epi8_mask(mask, a, b);
14490        assert_eq!(r, 0b01010101_01010101);
14491    }
14492
14493    #[simd_test(enable = "avx512bw")]
14494    unsafe fn test_mm512_cmpge_epu16_mask() {
14495        let a = _mm512_set1_epi16(1);
14496        let b = _mm512_set1_epi16(1);
14497        let m = _mm512_cmpge_epu16_mask(a, b);
14498        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14499    }
14500
14501    #[simd_test(enable = "avx512bw")]
14502    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
14503        let a = _mm512_set1_epi16(1);
14504        let b = _mm512_set1_epi16(1);
14505        let mask = 0b01010101_01010101_01010101_01010101;
14506        let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
14507        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14508    }
14509
14510    #[simd_test(enable = "avx512bw,avx512vl")]
14511    unsafe fn test_mm256_cmpge_epu16_mask() {
14512        let a = _mm256_set1_epi16(1);
14513        let b = _mm256_set1_epi16(1);
14514        let m = _mm256_cmpge_epu16_mask(a, b);
14515        assert_eq!(m, 0b11111111_11111111);
14516    }
14517
14518    #[simd_test(enable = "avx512bw,avx512vl")]
14519    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
14520        let a = _mm256_set1_epi16(1);
14521        let b = _mm256_set1_epi16(1);
14522        let mask = 0b01010101_01010101;
14523        let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
14524        assert_eq!(r, 0b01010101_01010101);
14525    }
14526
14527    #[simd_test(enable = "avx512bw,avx512vl")]
14528    unsafe fn test_mm_cmpge_epu16_mask() {
14529        let a = _mm_set1_epi16(1);
14530        let b = _mm_set1_epi16(1);
14531        let m = _mm_cmpge_epu16_mask(a, b);
14532        assert_eq!(m, 0b11111111);
14533    }
14534
14535    #[simd_test(enable = "avx512bw,avx512vl")]
14536    unsafe fn test_mm_mask_cmpge_epu16_mask() {
14537        let a = _mm_set1_epi16(1);
14538        let b = _mm_set1_epi16(1);
14539        let mask = 0b01010101;
14540        let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
14541        assert_eq!(r, 0b01010101);
14542    }
14543
14544    #[simd_test(enable = "avx512bw")]
14545    unsafe fn test_mm512_cmpge_epu8_mask() {
14546        let a = _mm512_set1_epi8(1);
14547        let b = _mm512_set1_epi8(1);
14548        let m = _mm512_cmpge_epu8_mask(a, b);
14549        assert_eq!(
14550            m,
14551            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14552        );
14553    }
14554
14555    #[simd_test(enable = "avx512bw")]
14556    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
14557        let a = _mm512_set1_epi8(1);
14558        let b = _mm512_set1_epi8(1);
14559        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14560        let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
14561        assert_eq!(
14562            r,
14563            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14564        );
14565    }
14566
14567    #[simd_test(enable = "avx512bw,avx512vl")]
14568    unsafe fn test_mm256_cmpge_epu8_mask() {
14569        let a = _mm256_set1_epi8(1);
14570        let b = _mm256_set1_epi8(1);
14571        let m = _mm256_cmpge_epu8_mask(a, b);
14572        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14573    }
14574
14575    #[simd_test(enable = "avx512bw,avx512vl")]
14576    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
14577        let a = _mm256_set1_epi8(1);
14578        let b = _mm256_set1_epi8(1);
14579        let mask = 0b01010101_01010101_01010101_01010101;
14580        let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
14581        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14582    }
14583
14584    #[simd_test(enable = "avx512bw,avx512vl")]
14585    unsafe fn test_mm_cmpge_epu8_mask() {
14586        let a = _mm_set1_epi8(1);
14587        let b = _mm_set1_epi8(1);
14588        let m = _mm_cmpge_epu8_mask(a, b);
14589        assert_eq!(m, 0b11111111_11111111);
14590    }
14591
14592    #[simd_test(enable = "avx512bw,avx512vl")]
14593    unsafe fn test_mm_mask_cmpge_epu8_mask() {
14594        let a = _mm_set1_epi8(1);
14595        let b = _mm_set1_epi8(1);
14596        let mask = 0b01010101_01010101;
14597        let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
14598        assert_eq!(r, 0b01010101_01010101);
14599    }
14600
14601    #[simd_test(enable = "avx512bw")]
14602    unsafe fn test_mm512_cmpge_epi16_mask() {
14603        let a = _mm512_set1_epi16(-1);
14604        let b = _mm512_set1_epi16(-1);
14605        let m = _mm512_cmpge_epi16_mask(a, b);
14606        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14607    }
14608
14609    #[simd_test(enable = "avx512bw")]
14610    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
14611        let a = _mm512_set1_epi16(-1);
14612        let b = _mm512_set1_epi16(-1);
14613        let mask = 0b01010101_01010101_01010101_01010101;
14614        let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
14615        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14616    }
14617
14618    #[simd_test(enable = "avx512bw,avx512vl")]
14619    unsafe fn test_mm256_cmpge_epi16_mask() {
14620        let a = _mm256_set1_epi16(-1);
14621        let b = _mm256_set1_epi16(-1);
14622        let m = _mm256_cmpge_epi16_mask(a, b);
14623        assert_eq!(m, 0b11111111_11111111);
14624    }
14625
14626    #[simd_test(enable = "avx512bw,avx512vl")]
14627    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
14628        let a = _mm256_set1_epi16(-1);
14629        let b = _mm256_set1_epi16(-1);
14630        let mask = 0b01010101_01010101;
14631        let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
14632        assert_eq!(r, 0b01010101_01010101);
14633    }
14634
14635    #[simd_test(enable = "avx512bw,avx512vl")]
14636    unsafe fn test_mm_cmpge_epi16_mask() {
14637        let a = _mm_set1_epi16(-1);
14638        let b = _mm_set1_epi16(-1);
14639        let m = _mm_cmpge_epi16_mask(a, b);
14640        assert_eq!(m, 0b11111111);
14641    }
14642
14643    #[simd_test(enable = "avx512bw,avx512vl")]
14644    unsafe fn test_mm_mask_cmpge_epi16_mask() {
14645        let a = _mm_set1_epi16(-1);
14646        let b = _mm_set1_epi16(-1);
14647        let mask = 0b01010101;
14648        let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
14649        assert_eq!(r, 0b01010101);
14650    }
14651
14652    #[simd_test(enable = "avx512bw")]
14653    unsafe fn test_mm512_cmpge_epi8_mask() {
14654        let a = _mm512_set1_epi8(-1);
14655        let b = _mm512_set1_epi8(-1);
14656        let m = _mm512_cmpge_epi8_mask(a, b);
14657        assert_eq!(
14658            m,
14659            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14660        );
14661    }
14662
14663    #[simd_test(enable = "avx512bw")]
14664    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
14665        let a = _mm512_set1_epi8(-1);
14666        let b = _mm512_set1_epi8(-1);
14667        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14668        let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
14669        assert_eq!(
14670            r,
14671            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14672        );
14673    }
14674
14675    #[simd_test(enable = "avx512bw,avx512vl")]
14676    unsafe fn test_mm256_cmpge_epi8_mask() {
14677        let a = _mm256_set1_epi8(-1);
14678        let b = _mm256_set1_epi8(-1);
14679        let m = _mm256_cmpge_epi8_mask(a, b);
14680        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14681    }
14682
14683    #[simd_test(enable = "avx512bw,avx512vl")]
14684    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
14685        let a = _mm256_set1_epi8(-1);
14686        let b = _mm256_set1_epi8(-1);
14687        let mask = 0b01010101_01010101_01010101_01010101;
14688        let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
14689        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14690    }
14691
14692    #[simd_test(enable = "avx512bw,avx512vl")]
14693    unsafe fn test_mm_cmpge_epi8_mask() {
14694        let a = _mm_set1_epi8(-1);
14695        let b = _mm_set1_epi8(-1);
14696        let m = _mm_cmpge_epi8_mask(a, b);
14697        assert_eq!(m, 0b11111111_11111111);
14698    }
14699
14700    #[simd_test(enable = "avx512bw,avx512vl")]
14701    unsafe fn test_mm_mask_cmpge_epi8_mask() {
14702        let a = _mm_set1_epi8(-1);
14703        let b = _mm_set1_epi8(-1);
14704        let mask = 0b01010101_01010101;
14705        let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
14706        assert_eq!(r, 0b01010101_01010101);
14707    }
14708
14709    #[simd_test(enable = "avx512bw")]
14710    unsafe fn test_mm512_cmpeq_epu16_mask() {
14711        let a = _mm512_set1_epi16(1);
14712        let b = _mm512_set1_epi16(1);
14713        let m = _mm512_cmpeq_epu16_mask(a, b);
14714        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14715    }
14716
14717    #[simd_test(enable = "avx512bw")]
14718    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
14719        let a = _mm512_set1_epi16(1);
14720        let b = _mm512_set1_epi16(1);
14721        let mask = 0b01010101_01010101_01010101_01010101;
14722        let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
14723        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14724    }
14725
14726    #[simd_test(enable = "avx512bw,avx512vl")]
14727    unsafe fn test_mm256_cmpeq_epu16_mask() {
14728        let a = _mm256_set1_epi16(1);
14729        let b = _mm256_set1_epi16(1);
14730        let m = _mm256_cmpeq_epu16_mask(a, b);
14731        assert_eq!(m, 0b11111111_11111111);
14732    }
14733
14734    #[simd_test(enable = "avx512bw,avx512vl")]
14735    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
14736        let a = _mm256_set1_epi16(1);
14737        let b = _mm256_set1_epi16(1);
14738        let mask = 0b01010101_01010101;
14739        let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
14740        assert_eq!(r, 0b01010101_01010101);
14741    }
14742
14743    #[simd_test(enable = "avx512bw,avx512vl")]
14744    unsafe fn test_mm_cmpeq_epu16_mask() {
14745        let a = _mm_set1_epi16(1);
14746        let b = _mm_set1_epi16(1);
14747        let m = _mm_cmpeq_epu16_mask(a, b);
14748        assert_eq!(m, 0b11111111);
14749    }
14750
14751    #[simd_test(enable = "avx512bw,avx512vl")]
14752    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
14753        let a = _mm_set1_epi16(1);
14754        let b = _mm_set1_epi16(1);
14755        let mask = 0b01010101;
14756        let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
14757        assert_eq!(r, 0b01010101);
14758    }
14759
14760    #[simd_test(enable = "avx512bw")]
14761    unsafe fn test_mm512_cmpeq_epu8_mask() {
14762        let a = _mm512_set1_epi8(1);
14763        let b = _mm512_set1_epi8(1);
14764        let m = _mm512_cmpeq_epu8_mask(a, b);
14765        assert_eq!(
14766            m,
14767            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14768        );
14769    }
14770
14771    #[simd_test(enable = "avx512bw")]
14772    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
14773        let a = _mm512_set1_epi8(1);
14774        let b = _mm512_set1_epi8(1);
14775        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14776        let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
14777        assert_eq!(
14778            r,
14779            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14780        );
14781    }
14782
14783    #[simd_test(enable = "avx512bw,avx512vl")]
14784    unsafe fn test_mm256_cmpeq_epu8_mask() {
14785        let a = _mm256_set1_epi8(1);
14786        let b = _mm256_set1_epi8(1);
14787        let m = _mm256_cmpeq_epu8_mask(a, b);
14788        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14789    }
14790
14791    #[simd_test(enable = "avx512bw,avx512vl")]
14792    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
14793        let a = _mm256_set1_epi8(1);
14794        let b = _mm256_set1_epi8(1);
14795        let mask = 0b01010101_01010101_01010101_01010101;
14796        let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
14797        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14798    }
14799
14800    #[simd_test(enable = "avx512bw,avx512vl")]
14801    unsafe fn test_mm_cmpeq_epu8_mask() {
14802        let a = _mm_set1_epi8(1);
14803        let b = _mm_set1_epi8(1);
14804        let m = _mm_cmpeq_epu8_mask(a, b);
14805        assert_eq!(m, 0b11111111_11111111);
14806    }
14807
14808    #[simd_test(enable = "avx512bw,avx512vl")]
14809    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
14810        let a = _mm_set1_epi8(1);
14811        let b = _mm_set1_epi8(1);
14812        let mask = 0b01010101_01010101;
14813        let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
14814        assert_eq!(r, 0b01010101_01010101);
14815    }
14816
14817    #[simd_test(enable = "avx512bw")]
14818    unsafe fn test_mm512_cmpeq_epi16_mask() {
14819        let a = _mm512_set1_epi16(-1);
14820        let b = _mm512_set1_epi16(-1);
14821        let m = _mm512_cmpeq_epi16_mask(a, b);
14822        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14823    }
14824
14825    #[simd_test(enable = "avx512bw")]
14826    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
14827        let a = _mm512_set1_epi16(-1);
14828        let b = _mm512_set1_epi16(-1);
14829        let mask = 0b01010101_01010101_01010101_01010101;
14830        let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
14831        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14832    }
14833
14834    #[simd_test(enable = "avx512bw,avx512vl")]
14835    unsafe fn test_mm256_cmpeq_epi16_mask() {
14836        let a = _mm256_set1_epi16(-1);
14837        let b = _mm256_set1_epi16(-1);
14838        let m = _mm256_cmpeq_epi16_mask(a, b);
14839        assert_eq!(m, 0b11111111_11111111);
14840    }
14841
14842    #[simd_test(enable = "avx512bw,avx512vl")]
14843    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
14844        let a = _mm256_set1_epi16(-1);
14845        let b = _mm256_set1_epi16(-1);
14846        let mask = 0b01010101_01010101;
14847        let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
14848        assert_eq!(r, 0b01010101_01010101);
14849    }
14850
14851    #[simd_test(enable = "avx512bw,avx512vl")]
14852    unsafe fn test_mm_cmpeq_epi16_mask() {
14853        let a = _mm_set1_epi16(-1);
14854        let b = _mm_set1_epi16(-1);
14855        let m = _mm_cmpeq_epi16_mask(a, b);
14856        assert_eq!(m, 0b11111111);
14857    }
14858
14859    #[simd_test(enable = "avx512bw,avx512vl")]
14860    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
14861        let a = _mm_set1_epi16(-1);
14862        let b = _mm_set1_epi16(-1);
14863        let mask = 0b01010101;
14864        let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
14865        assert_eq!(r, 0b01010101);
14866    }
14867
14868    #[simd_test(enable = "avx512bw")]
14869    unsafe fn test_mm512_cmpeq_epi8_mask() {
14870        let a = _mm512_set1_epi8(-1);
14871        let b = _mm512_set1_epi8(-1);
14872        let m = _mm512_cmpeq_epi8_mask(a, b);
14873        assert_eq!(
14874            m,
14875            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14876        );
14877    }
14878
14879    #[simd_test(enable = "avx512bw")]
14880    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
14881        let a = _mm512_set1_epi8(-1);
14882        let b = _mm512_set1_epi8(-1);
14883        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14884        let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
14885        assert_eq!(
14886            r,
14887            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14888        );
14889    }
14890
14891    #[simd_test(enable = "avx512bw,avx512vl")]
14892    unsafe fn test_mm256_cmpeq_epi8_mask() {
14893        let a = _mm256_set1_epi8(-1);
14894        let b = _mm256_set1_epi8(-1);
14895        let m = _mm256_cmpeq_epi8_mask(a, b);
14896        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14897    }
14898
14899    #[simd_test(enable = "avx512bw,avx512vl")]
14900    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
14901        let a = _mm256_set1_epi8(-1);
14902        let b = _mm256_set1_epi8(-1);
14903        let mask = 0b01010101_01010101_01010101_01010101;
14904        let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
14905        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14906    }
14907
14908    #[simd_test(enable = "avx512bw,avx512vl")]
14909    unsafe fn test_mm_cmpeq_epi8_mask() {
14910        let a = _mm_set1_epi8(-1);
14911        let b = _mm_set1_epi8(-1);
14912        let m = _mm_cmpeq_epi8_mask(a, b);
14913        assert_eq!(m, 0b11111111_11111111);
14914    }
14915
14916    #[simd_test(enable = "avx512bw,avx512vl")]
14917    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
14918        let a = _mm_set1_epi8(-1);
14919        let b = _mm_set1_epi8(-1);
14920        let mask = 0b01010101_01010101;
14921        let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
14922        assert_eq!(r, 0b01010101_01010101);
14923    }
14924
14925    #[simd_test(enable = "avx512bw")]
14926    unsafe fn test_mm512_cmpneq_epu16_mask() {
14927        let a = _mm512_set1_epi16(2);
14928        let b = _mm512_set1_epi16(1);
14929        let m = _mm512_cmpneq_epu16_mask(a, b);
14930        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14931    }
14932
14933    #[simd_test(enable = "avx512bw")]
14934    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
14935        let a = _mm512_set1_epi16(2);
14936        let b = _mm512_set1_epi16(1);
14937        let mask = 0b01010101_01010101_01010101_01010101;
14938        let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
14939        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14940    }
14941
14942    #[simd_test(enable = "avx512bw,avx512vl")]
14943    unsafe fn test_mm256_cmpneq_epu16_mask() {
14944        let a = _mm256_set1_epi16(2);
14945        let b = _mm256_set1_epi16(1);
14946        let m = _mm256_cmpneq_epu16_mask(a, b);
14947        assert_eq!(m, 0b11111111_11111111);
14948    }
14949
14950    #[simd_test(enable = "avx512bw,avx512vl")]
14951    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
14952        let a = _mm256_set1_epi16(2);
14953        let b = _mm256_set1_epi16(1);
14954        let mask = 0b01010101_01010101;
14955        let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
14956        assert_eq!(r, 0b01010101_01010101);
14957    }
14958
14959    #[simd_test(enable = "avx512bw,avx512vl")]
14960    unsafe fn test_mm_cmpneq_epu16_mask() {
14961        let a = _mm_set1_epi16(2);
14962        let b = _mm_set1_epi16(1);
14963        let m = _mm_cmpneq_epu16_mask(a, b);
14964        assert_eq!(m, 0b11111111);
14965    }
14966
14967    #[simd_test(enable = "avx512bw,avx512vl")]
14968    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
14969        let a = _mm_set1_epi16(2);
14970        let b = _mm_set1_epi16(1);
14971        let mask = 0b01010101;
14972        let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
14973        assert_eq!(r, 0b01010101);
14974    }
14975
14976    #[simd_test(enable = "avx512bw")]
14977    unsafe fn test_mm512_cmpneq_epu8_mask() {
14978        let a = _mm512_set1_epi8(2);
14979        let b = _mm512_set1_epi8(1);
14980        let m = _mm512_cmpneq_epu8_mask(a, b);
14981        assert_eq!(
14982            m,
14983            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
14984        );
14985    }
14986
14987    #[simd_test(enable = "avx512bw")]
14988    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
14989        let a = _mm512_set1_epi8(2);
14990        let b = _mm512_set1_epi8(1);
14991        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
14992        let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
14993        assert_eq!(
14994            r,
14995            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
14996        );
14997    }
14998
14999    #[simd_test(enable = "avx512bw,avx512vl")]
15000    unsafe fn test_mm256_cmpneq_epu8_mask() {
15001        let a = _mm256_set1_epi8(2);
15002        let b = _mm256_set1_epi8(1);
15003        let m = _mm256_cmpneq_epu8_mask(a, b);
15004        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15005    }
15006
15007    #[simd_test(enable = "avx512bw,avx512vl")]
15008    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
15009        let a = _mm256_set1_epi8(2);
15010        let b = _mm256_set1_epi8(1);
15011        let mask = 0b01010101_01010101_01010101_01010101;
15012        let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
15013        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15014    }
15015
15016    #[simd_test(enable = "avx512bw,avx512vl")]
15017    unsafe fn test_mm_cmpneq_epu8_mask() {
15018        let a = _mm_set1_epi8(2);
15019        let b = _mm_set1_epi8(1);
15020        let m = _mm_cmpneq_epu8_mask(a, b);
15021        assert_eq!(m, 0b11111111_11111111);
15022    }
15023
15024    #[simd_test(enable = "avx512bw,avx512vl")]
15025    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
15026        let a = _mm_set1_epi8(2);
15027        let b = _mm_set1_epi8(1);
15028        let mask = 0b01010101_01010101;
15029        let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
15030        assert_eq!(r, 0b01010101_01010101);
15031    }
15032
15033    #[simd_test(enable = "avx512bw")]
15034    unsafe fn test_mm512_cmpneq_epi16_mask() {
15035        let a = _mm512_set1_epi16(1);
15036        let b = _mm512_set1_epi16(-1);
15037        let m = _mm512_cmpneq_epi16_mask(a, b);
15038        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15039    }
15040
15041    #[simd_test(enable = "avx512bw")]
15042    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
15043        let a = _mm512_set1_epi16(1);
15044        let b = _mm512_set1_epi16(-1);
15045        let mask = 0b01010101_01010101_01010101_01010101;
15046        let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
15047        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15048    }
15049
15050    #[simd_test(enable = "avx512bw,avx512vl")]
15051    unsafe fn test_mm256_cmpneq_epi16_mask() {
15052        let a = _mm256_set1_epi16(1);
15053        let b = _mm256_set1_epi16(-1);
15054        let m = _mm256_cmpneq_epi16_mask(a, b);
15055        assert_eq!(m, 0b11111111_11111111);
15056    }
15057
15058    #[simd_test(enable = "avx512bw,avx512vl")]
15059    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
15060        let a = _mm256_set1_epi16(1);
15061        let b = _mm256_set1_epi16(-1);
15062        let mask = 0b01010101_01010101;
15063        let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
15064        assert_eq!(r, 0b01010101_01010101);
15065    }
15066
15067    #[simd_test(enable = "avx512bw,avx512vl")]
15068    unsafe fn test_mm_cmpneq_epi16_mask() {
15069        let a = _mm_set1_epi16(1);
15070        let b = _mm_set1_epi16(-1);
15071        let m = _mm_cmpneq_epi16_mask(a, b);
15072        assert_eq!(m, 0b11111111);
15073    }
15074
15075    #[simd_test(enable = "avx512bw,avx512vl")]
15076    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
15077        let a = _mm_set1_epi16(1);
15078        let b = _mm_set1_epi16(-1);
15079        let mask = 0b01010101;
15080        let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
15081        assert_eq!(r, 0b01010101);
15082    }
15083
15084    #[simd_test(enable = "avx512bw")]
15085    unsafe fn test_mm512_cmpneq_epi8_mask() {
15086        let a = _mm512_set1_epi8(1);
15087        let b = _mm512_set1_epi8(-1);
15088        let m = _mm512_cmpneq_epi8_mask(a, b);
15089        assert_eq!(
15090            m,
15091            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15092        );
15093    }
15094
15095    #[simd_test(enable = "avx512bw")]
15096    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
15097        let a = _mm512_set1_epi8(1);
15098        let b = _mm512_set1_epi8(-1);
15099        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15100        let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
15101        assert_eq!(
15102            r,
15103            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15104        );
15105    }
15106
15107    #[simd_test(enable = "avx512bw,avx512vl")]
15108    unsafe fn test_mm256_cmpneq_epi8_mask() {
15109        let a = _mm256_set1_epi8(1);
15110        let b = _mm256_set1_epi8(-1);
15111        let m = _mm256_cmpneq_epi8_mask(a, b);
15112        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15113    }
15114
15115    #[simd_test(enable = "avx512bw,avx512vl")]
15116    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
15117        let a = _mm256_set1_epi8(1);
15118        let b = _mm256_set1_epi8(-1);
15119        let mask = 0b01010101_01010101_01010101_01010101;
15120        let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
15121        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15122    }
15123
15124    #[simd_test(enable = "avx512bw,avx512vl")]
15125    unsafe fn test_mm_cmpneq_epi8_mask() {
15126        let a = _mm_set1_epi8(1);
15127        let b = _mm_set1_epi8(-1);
15128        let m = _mm_cmpneq_epi8_mask(a, b);
15129        assert_eq!(m, 0b11111111_11111111);
15130    }
15131
15132    #[simd_test(enable = "avx512bw,avx512vl")]
15133    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
15134        let a = _mm_set1_epi8(1);
15135        let b = _mm_set1_epi8(-1);
15136        let mask = 0b01010101_01010101;
15137        let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
15138        assert_eq!(r, 0b01010101_01010101);
15139    }
15140
15141    #[simd_test(enable = "avx512bw")]
15142    unsafe fn test_mm512_cmp_epu16_mask() {
15143        let a = _mm512_set1_epi16(0);
15144        let b = _mm512_set1_epi16(1);
15145        let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15146        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15147    }
15148
15149    #[simd_test(enable = "avx512bw")]
15150    unsafe fn test_mm512_mask_cmp_epu16_mask() {
15151        let a = _mm512_set1_epi16(0);
15152        let b = _mm512_set1_epi16(1);
15153        let mask = 0b01010101_01010101_01010101_01010101;
15154        let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15155        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15156    }
15157
15158    #[simd_test(enable = "avx512bw,avx512vl")]
15159    unsafe fn test_mm256_cmp_epu16_mask() {
15160        let a = _mm256_set1_epi16(0);
15161        let b = _mm256_set1_epi16(1);
15162        let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15163        assert_eq!(m, 0b11111111_11111111);
15164    }
15165
15166    #[simd_test(enable = "avx512bw,avx512vl")]
15167    unsafe fn test_mm256_mask_cmp_epu16_mask() {
15168        let a = _mm256_set1_epi16(0);
15169        let b = _mm256_set1_epi16(1);
15170        let mask = 0b01010101_01010101;
15171        let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15172        assert_eq!(r, 0b01010101_01010101);
15173    }
15174
15175    #[simd_test(enable = "avx512bw,avx512vl")]
15176    unsafe fn test_mm_cmp_epu16_mask() {
15177        let a = _mm_set1_epi16(0);
15178        let b = _mm_set1_epi16(1);
15179        let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
15180        assert_eq!(m, 0b11111111);
15181    }
15182
15183    #[simd_test(enable = "avx512bw,avx512vl")]
15184    unsafe fn test_mm_mask_cmp_epu16_mask() {
15185        let a = _mm_set1_epi16(0);
15186        let b = _mm_set1_epi16(1);
15187        let mask = 0b01010101;
15188        let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
15189        assert_eq!(r, 0b01010101);
15190    }
15191
15192    #[simd_test(enable = "avx512bw")]
15193    unsafe fn test_mm512_cmp_epu8_mask() {
15194        let a = _mm512_set1_epi8(0);
15195        let b = _mm512_set1_epi8(1);
15196        let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15197        assert_eq!(
15198            m,
15199            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15200        );
15201    }
15202
15203    #[simd_test(enable = "avx512bw")]
15204    unsafe fn test_mm512_mask_cmp_epu8_mask() {
15205        let a = _mm512_set1_epi8(0);
15206        let b = _mm512_set1_epi8(1);
15207        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15208        let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15209        assert_eq!(
15210            r,
15211            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15212        );
15213    }
15214
15215    #[simd_test(enable = "avx512bw,avx512vl")]
15216    unsafe fn test_mm256_cmp_epu8_mask() {
15217        let a = _mm256_set1_epi8(0);
15218        let b = _mm256_set1_epi8(1);
15219        let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15220        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15221    }
15222
15223    #[simd_test(enable = "avx512bw,avx512vl")]
15224    unsafe fn test_mm256_mask_cmp_epu8_mask() {
15225        let a = _mm256_set1_epi8(0);
15226        let b = _mm256_set1_epi8(1);
15227        let mask = 0b01010101_01010101_01010101_01010101;
15228        let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15229        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15230    }
15231
15232    #[simd_test(enable = "avx512bw,avx512vl")]
15233    unsafe fn test_mm_cmp_epu8_mask() {
15234        let a = _mm_set1_epi8(0);
15235        let b = _mm_set1_epi8(1);
15236        let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
15237        assert_eq!(m, 0b11111111_11111111);
15238    }
15239
15240    #[simd_test(enable = "avx512bw,avx512vl")]
15241    unsafe fn test_mm_mask_cmp_epu8_mask() {
15242        let a = _mm_set1_epi8(0);
15243        let b = _mm_set1_epi8(1);
15244        let mask = 0b01010101_01010101;
15245        let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
15246        assert_eq!(r, 0b01010101_01010101);
15247    }
15248
15249    #[simd_test(enable = "avx512bw")]
15250    unsafe fn test_mm512_cmp_epi16_mask() {
15251        let a = _mm512_set1_epi16(0);
15252        let b = _mm512_set1_epi16(1);
15253        let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15254        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15255    }
15256
15257    #[simd_test(enable = "avx512bw")]
15258    unsafe fn test_mm512_mask_cmp_epi16_mask() {
15259        let a = _mm512_set1_epi16(0);
15260        let b = _mm512_set1_epi16(1);
15261        let mask = 0b01010101_01010101_01010101_01010101;
15262        let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15263        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15264    }
15265
15266    #[simd_test(enable = "avx512bw,avx512vl")]
15267    unsafe fn test_mm256_cmp_epi16_mask() {
15268        let a = _mm256_set1_epi16(0);
15269        let b = _mm256_set1_epi16(1);
15270        let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15271        assert_eq!(m, 0b11111111_11111111);
15272    }
15273
15274    #[simd_test(enable = "avx512bw,avx512vl")]
15275    unsafe fn test_mm256_mask_cmp_epi16_mask() {
15276        let a = _mm256_set1_epi16(0);
15277        let b = _mm256_set1_epi16(1);
15278        let mask = 0b01010101_01010101;
15279        let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15280        assert_eq!(r, 0b01010101_01010101);
15281    }
15282
15283    #[simd_test(enable = "avx512bw,avx512vl")]
15284    unsafe fn test_mm_cmp_epi16_mask() {
15285        let a = _mm_set1_epi16(0);
15286        let b = _mm_set1_epi16(1);
15287        let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
15288        assert_eq!(m, 0b11111111);
15289    }
15290
15291    #[simd_test(enable = "avx512bw,avx512vl")]
15292    unsafe fn test_mm_mask_cmp_epi16_mask() {
15293        let a = _mm_set1_epi16(0);
15294        let b = _mm_set1_epi16(1);
15295        let mask = 0b01010101;
15296        let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
15297        assert_eq!(r, 0b01010101);
15298    }
15299
15300    #[simd_test(enable = "avx512bw")]
15301    unsafe fn test_mm512_cmp_epi8_mask() {
15302        let a = _mm512_set1_epi8(0);
15303        let b = _mm512_set1_epi8(1);
15304        let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15305        assert_eq!(
15306            m,
15307            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
15308        );
15309    }
15310
15311    #[simd_test(enable = "avx512bw")]
15312    unsafe fn test_mm512_mask_cmp_epi8_mask() {
15313        let a = _mm512_set1_epi8(0);
15314        let b = _mm512_set1_epi8(1);
15315        let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
15316        let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15317        assert_eq!(
15318            r,
15319            0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
15320        );
15321    }
15322
15323    #[simd_test(enable = "avx512bw,avx512vl")]
15324    unsafe fn test_mm256_cmp_epi8_mask() {
15325        let a = _mm256_set1_epi8(0);
15326        let b = _mm256_set1_epi8(1);
15327        let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15328        assert_eq!(m, 0b11111111_11111111_11111111_11111111);
15329    }
15330
15331    #[simd_test(enable = "avx512bw,avx512vl")]
15332    unsafe fn test_mm256_mask_cmp_epi8_mask() {
15333        let a = _mm256_set1_epi8(0);
15334        let b = _mm256_set1_epi8(1);
15335        let mask = 0b01010101_01010101_01010101_01010101;
15336        let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15337        assert_eq!(r, 0b01010101_01010101_01010101_01010101);
15338    }
15339
15340    #[simd_test(enable = "avx512bw,avx512vl")]
15341    unsafe fn test_mm_cmp_epi8_mask() {
15342        let a = _mm_set1_epi8(0);
15343        let b = _mm_set1_epi8(1);
15344        let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
15345        assert_eq!(m, 0b11111111_11111111);
15346    }
15347
15348    #[simd_test(enable = "avx512bw,avx512vl")]
15349    unsafe fn test_mm_mask_cmp_epi8_mask() {
15350        let a = _mm_set1_epi8(0);
15351        let b = _mm_set1_epi8(1);
15352        let mask = 0b01010101_01010101;
15353        let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
15354        assert_eq!(r, 0b01010101_01010101);
15355    }
15356
15357    #[simd_test(enable = "avx512bw,avx512vl")]
15358    unsafe fn test_mm256_reduce_add_epi16() {
15359        let a = _mm256_set1_epi16(1);
15360        let e = _mm256_reduce_add_epi16(a);
15361        assert_eq!(16, e);
15362    }
15363
15364    #[simd_test(enable = "avx512bw,avx512vl")]
15365    unsafe fn test_mm256_mask_reduce_add_epi16() {
15366        let a = _mm256_set1_epi16(1);
15367        let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
15368        assert_eq!(8, e);
15369    }
15370
15371    #[simd_test(enable = "avx512bw,avx512vl")]
15372    unsafe fn test_mm_reduce_add_epi16() {
15373        let a = _mm_set1_epi16(1);
15374        let e = _mm_reduce_add_epi16(a);
15375        assert_eq!(8, e);
15376    }
15377
15378    #[simd_test(enable = "avx512bw,avx512vl")]
15379    unsafe fn test_mm_mask_reduce_add_epi16() {
15380        let a = _mm_set1_epi16(1);
15381        let e = _mm_mask_reduce_add_epi16(0b11110000, a);
15382        assert_eq!(4, e);
15383    }
15384
15385    #[simd_test(enable = "avx512bw,avx512vl")]
15386    unsafe fn test_mm256_reduce_add_epi8() {
15387        let a = _mm256_set1_epi8(1);
15388        let e = _mm256_reduce_add_epi8(a);
15389        assert_eq!(32, e);
15390    }
15391
15392    #[simd_test(enable = "avx512bw,avx512vl")]
15393    unsafe fn test_mm256_mask_reduce_add_epi8() {
15394        let a = _mm256_set1_epi8(1);
15395        let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
15396        assert_eq!(16, e);
15397    }
15398
15399    #[simd_test(enable = "avx512bw,avx512vl")]
15400    unsafe fn test_mm_reduce_add_epi8() {
15401        let a = _mm_set1_epi8(1);
15402        let e = _mm_reduce_add_epi8(a);
15403        assert_eq!(16, e);
15404    }
15405
15406    #[simd_test(enable = "avx512bw,avx512vl")]
15407    unsafe fn test_mm_mask_reduce_add_epi8() {
15408        let a = _mm_set1_epi8(1);
15409        let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
15410        assert_eq!(8, e);
15411    }
15412
15413    #[simd_test(enable = "avx512bw,avx512vl")]
15414    unsafe fn test_mm256_reduce_and_epi16() {
15415        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15416        let e = _mm256_reduce_and_epi16(a);
15417        assert_eq!(0, e);
15418    }
15419
15420    #[simd_test(enable = "avx512bw,avx512vl")]
15421    unsafe fn test_mm256_mask_reduce_and_epi16() {
15422        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15423        let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
15424        assert_eq!(1, e);
15425    }
15426
15427    #[simd_test(enable = "avx512bw,avx512vl")]
15428    unsafe fn test_mm_reduce_and_epi16() {
15429        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15430        let e = _mm_reduce_and_epi16(a);
15431        assert_eq!(0, e);
15432    }
15433
15434    #[simd_test(enable = "avx512bw,avx512vl")]
15435    unsafe fn test_mm_mask_reduce_and_epi16() {
15436        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15437        let e = _mm_mask_reduce_and_epi16(0b11110000, a);
15438        assert_eq!(1, e);
15439    }
15440
15441    #[simd_test(enable = "avx512bw,avx512vl")]
15442    unsafe fn test_mm256_reduce_and_epi8() {
15443        let a = _mm256_set_epi8(
15444            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15445            2, 2, 2,
15446        );
15447        let e = _mm256_reduce_and_epi8(a);
15448        assert_eq!(0, e);
15449    }
15450
15451    #[simd_test(enable = "avx512bw,avx512vl")]
15452    unsafe fn test_mm256_mask_reduce_and_epi8() {
15453        let a = _mm256_set_epi8(
15454            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15455            2, 2, 2,
15456        );
15457        let e = _mm256_mask_reduce_and_epi8(0b11111111_00000000_11111111_00000000, a);
15458        assert_eq!(1, e);
15459    }
15460
15461    #[simd_test(enable = "avx512bw,avx512vl")]
15462    unsafe fn test_mm_reduce_and_epi8() {
15463        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15464        let e = _mm_reduce_and_epi8(a);
15465        assert_eq!(0, e);
15466    }
15467
15468    #[simd_test(enable = "avx512bw,avx512vl")]
15469    unsafe fn test_mm_mask_reduce_and_epi8() {
15470        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15471        let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
15472        assert_eq!(1, e);
15473    }
15474
15475    #[simd_test(enable = "avx512bw,avx512vl")]
15476    unsafe fn test_mm256_reduce_mul_epi16() {
15477        let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15478        let e = _mm256_reduce_mul_epi16(a);
15479        assert_eq!(256, e);
15480    }
15481
15482    #[simd_test(enable = "avx512bw,avx512vl")]
15483    unsafe fn test_mm256_mask_reduce_mul_epi16() {
15484        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15485        let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
15486        assert_eq!(1, e);
15487    }
15488
15489    #[simd_test(enable = "avx512bw,avx512vl")]
15490    unsafe fn test_mm_reduce_mul_epi16() {
15491        let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
15492        let e = _mm_reduce_mul_epi16(a);
15493        assert_eq!(16, e);
15494    }
15495
15496    #[simd_test(enable = "avx512bw,avx512vl")]
15497    unsafe fn test_mm_mask_reduce_mul_epi16() {
15498        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15499        let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
15500        assert_eq!(1, e);
15501    }
15502
15503    #[simd_test(enable = "avx512bw,avx512vl")]
15504    unsafe fn test_mm256_reduce_mul_epi8() {
15505        let a = _mm256_set_epi8(
15506            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15507            2, 2, 2,
15508        );
15509        let e = _mm256_reduce_mul_epi8(a);
15510        assert_eq!(64, e);
15511    }
15512
15513    #[simd_test(enable = "avx512bw,avx512vl")]
15514    unsafe fn test_mm256_mask_reduce_mul_epi8() {
15515        let a = _mm256_set_epi8(
15516            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15517            2, 2, 2,
15518        );
15519        let e = _mm256_mask_reduce_mul_epi8(0b11111111_00000000_11111111_00000000, a);
15520        assert_eq!(1, e);
15521    }
15522
15523    #[simd_test(enable = "avx512bw,avx512vl")]
15524    unsafe fn test_mm_reduce_mul_epi8() {
15525        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15526        let e = _mm_reduce_mul_epi8(a);
15527        assert_eq!(8, e);
15528    }
15529
15530    #[simd_test(enable = "avx512bw,avx512vl")]
15531    unsafe fn test_mm_mask_reduce_mul_epi8() {
15532        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
15533        let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
15534        assert_eq!(1, e);
15535    }
15536
15537    #[simd_test(enable = "avx512bw,avx512vl")]
15538    unsafe fn test_mm256_reduce_max_epi16() {
15539        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15540        let e: i16 = _mm256_reduce_max_epi16(a);
15541        assert_eq!(15, e);
15542    }
15543
15544    #[simd_test(enable = "avx512bw,avx512vl")]
15545    unsafe fn test_mm256_mask_reduce_max_epi16() {
15546        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15547        let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
15548        assert_eq!(7, e);
15549    }
15550
15551    #[simd_test(enable = "avx512bw,avx512vl")]
15552    unsafe fn test_mm_reduce_max_epi16() {
15553        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15554        let e: i16 = _mm_reduce_max_epi16(a);
15555        assert_eq!(7, e);
15556    }
15557
15558    #[simd_test(enable = "avx512bw,avx512vl")]
15559    unsafe fn test_mm_mask_reduce_max_epi16() {
15560        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15561        let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
15562        assert_eq!(3, e);
15563    }
15564
15565    #[simd_test(enable = "avx512bw,avx512vl")]
15566    unsafe fn test_mm256_reduce_max_epi8() {
15567        let a = _mm256_set_epi8(
15568            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15569            24, 25, 26, 27, 28, 29, 30, 31,
15570        );
15571        let e: i8 = _mm256_reduce_max_epi8(a);
15572        assert_eq!(31, e);
15573    }
15574
15575    #[simd_test(enable = "avx512bw,avx512vl")]
15576    unsafe fn test_mm256_mask_reduce_max_epi8() {
15577        let a = _mm256_set_epi8(
15578            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15579            24, 25, 26, 27, 28, 29, 30, 31,
15580        );
15581        let e: i8 = _mm256_mask_reduce_max_epi8(0b1111111111111111_0000000000000000, a);
15582        assert_eq!(15, e);
15583    }
15584
15585    #[simd_test(enable = "avx512bw,avx512vl")]
15586    unsafe fn test_mm_reduce_max_epi8() {
15587        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15588        let e: i8 = _mm_reduce_max_epi8(a);
15589        assert_eq!(15, e);
15590    }
15591
15592    #[simd_test(enable = "avx512bw,avx512vl")]
15593    unsafe fn test_mm_mask_reduce_max_epi8() {
15594        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15595        let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
15596        assert_eq!(7, e);
15597    }
15598
15599    #[simd_test(enable = "avx512bw,avx512vl")]
15600    unsafe fn test_mm256_reduce_max_epu16() {
15601        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15602        let e: u16 = _mm256_reduce_max_epu16(a);
15603        assert_eq!(15, e);
15604    }
15605
15606    #[simd_test(enable = "avx512bw,avx512vl")]
15607    unsafe fn test_mm256_mask_reduce_max_epu16() {
15608        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15609        let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
15610        assert_eq!(7, e);
15611    }
15612
15613    #[simd_test(enable = "avx512bw,avx512vl")]
15614    unsafe fn test_mm_reduce_max_epu16() {
15615        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15616        let e: u16 = _mm_reduce_max_epu16(a);
15617        assert_eq!(7, e);
15618    }
15619
15620    #[simd_test(enable = "avx512bw,avx512vl")]
15621    unsafe fn test_mm_mask_reduce_max_epu16() {
15622        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15623        let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
15624        assert_eq!(3, e);
15625    }
15626
15627    #[simd_test(enable = "avx512bw,avx512vl")]
15628    unsafe fn test_mm256_reduce_max_epu8() {
15629        let a = _mm256_set_epi8(
15630            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15631            24, 25, 26, 27, 28, 29, 30, 31,
15632        );
15633        let e: u8 = _mm256_reduce_max_epu8(a);
15634        assert_eq!(31, e);
15635    }
15636
15637    #[simd_test(enable = "avx512bw,avx512vl")]
15638    unsafe fn test_mm256_mask_reduce_max_epu8() {
15639        let a = _mm256_set_epi8(
15640            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15641            24, 25, 26, 27, 28, 29, 30, 31,
15642        );
15643        let e: u8 = _mm256_mask_reduce_max_epu8(0b1111111111111111_0000000000000000, a);
15644        assert_eq!(15, e);
15645    }
15646
15647    #[simd_test(enable = "avx512bw,avx512vl")]
15648    unsafe fn test_mm_reduce_max_epu8() {
15649        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15650        let e: u8 = _mm_reduce_max_epu8(a);
15651        assert_eq!(15, e);
15652    }
15653
15654    #[simd_test(enable = "avx512bw,avx512vl")]
15655    unsafe fn test_mm_mask_reduce_max_epu8() {
15656        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15657        let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
15658        assert_eq!(7, e);
15659    }
15660
15661    #[simd_test(enable = "avx512bw,avx512vl")]
15662    unsafe fn test_mm256_reduce_min_epi16() {
15663        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15664        let e: i16 = _mm256_reduce_min_epi16(a);
15665        assert_eq!(0, e);
15666    }
15667
15668    #[simd_test(enable = "avx512bw,avx512vl")]
15669    unsafe fn test_mm256_mask_reduce_min_epi16() {
15670        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15671        let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
15672        assert_eq!(0, e);
15673    }
15674
15675    #[simd_test(enable = "avx512bw,avx512vl")]
15676    unsafe fn test_mm_reduce_min_epi16() {
15677        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15678        let e: i16 = _mm_reduce_min_epi16(a);
15679        assert_eq!(0, e);
15680    }
15681
15682    #[simd_test(enable = "avx512bw,avx512vl")]
15683    unsafe fn test_mm_mask_reduce_min_epi16() {
15684        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15685        let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
15686        assert_eq!(0, e);
15687    }
15688
15689    #[simd_test(enable = "avx512bw,avx512vl")]
15690    unsafe fn test_mm256_reduce_min_epi8() {
15691        let a = _mm256_set_epi8(
15692            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15693            24, 25, 26, 27, 28, 29, 30, 31,
15694        );
15695        let e: i8 = _mm256_reduce_min_epi8(a);
15696        assert_eq!(0, e);
15697    }
15698
15699    #[simd_test(enable = "avx512bw,avx512vl")]
15700    unsafe fn test_mm256_mask_reduce_min_epi8() {
15701        let a = _mm256_set_epi8(
15702            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15703            24, 25, 26, 27, 28, 29, 30, 31,
15704        );
15705        let e: i8 = _mm256_mask_reduce_min_epi8(0b1111111111111111_0000000000000000, a);
15706        assert_eq!(0, e);
15707    }
15708
15709    #[simd_test(enable = "avx512bw,avx512vl")]
15710    unsafe fn test_mm_reduce_min_epi8() {
15711        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15712        let e: i8 = _mm_reduce_min_epi8(a);
15713        assert_eq!(0, e);
15714    }
15715
15716    #[simd_test(enable = "avx512bw,avx512vl")]
15717    unsafe fn test_mm_mask_reduce_min_epi8() {
15718        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15719        let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
15720        assert_eq!(0, e);
15721    }
15722
15723    #[simd_test(enable = "avx512bw,avx512vl")]
15724    unsafe fn test_mm256_reduce_min_epu16() {
15725        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15726        let e: u16 = _mm256_reduce_min_epu16(a);
15727        assert_eq!(0, e);
15728    }
15729
15730    #[simd_test(enable = "avx512bw,avx512vl")]
15731    unsafe fn test_mm256_mask_reduce_min_epu16() {
15732        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15733        let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
15734        assert_eq!(0, e);
15735    }
15736
15737    #[simd_test(enable = "avx512bw,avx512vl")]
15738    unsafe fn test_mm_reduce_min_epu16() {
15739        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15740        let e: u16 = _mm_reduce_min_epu16(a);
15741        assert_eq!(0, e);
15742    }
15743
15744    #[simd_test(enable = "avx512bw,avx512vl")]
15745    unsafe fn test_mm_mask_reduce_min_epu16() {
15746        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15747        let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
15748        assert_eq!(0, e);
15749    }
15750
15751    #[simd_test(enable = "avx512bw,avx512vl")]
15752    unsafe fn test_mm256_reduce_min_epu8() {
15753        let a = _mm256_set_epi8(
15754            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15755            24, 25, 26, 27, 28, 29, 30, 31,
15756        );
15757        let e: u8 = _mm256_reduce_min_epu8(a);
15758        assert_eq!(0, e);
15759    }
15760
15761    #[simd_test(enable = "avx512bw,avx512vl")]
15762    unsafe fn test_mm256_mask_reduce_min_epu8() {
15763        let a = _mm256_set_epi8(
15764            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15765            24, 25, 26, 27, 28, 29, 30, 31,
15766        );
15767        let e: u8 = _mm256_mask_reduce_min_epu8(0b1111111111111111_0000000000000000, a);
15768        assert_eq!(0, e);
15769    }
15770
15771    #[simd_test(enable = "avx512bw,avx512vl")]
15772    unsafe fn test_mm_reduce_min_epu8() {
15773        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15774        let e: u8 = _mm_reduce_min_epu8(a);
15775        assert_eq!(0, e);
15776    }
15777
15778    #[simd_test(enable = "avx512bw,avx512vl")]
15779    unsafe fn test_mm_mask_reduce_min_epu8() {
15780        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15781        let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
15782        assert_eq!(0, e);
15783    }
15784
15785    #[simd_test(enable = "avx512bw,avx512vl")]
15786    unsafe fn test_mm256_reduce_or_epi16() {
15787        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15788        let e = _mm256_reduce_or_epi16(a);
15789        assert_eq!(3, e);
15790    }
15791
15792    #[simd_test(enable = "avx512bw,avx512vl")]
15793    unsafe fn test_mm256_mask_reduce_or_epi16() {
15794        let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15795        let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
15796        assert_eq!(1, e);
15797    }
15798
15799    #[simd_test(enable = "avx512bw,avx512vl")]
15800    unsafe fn test_mm_reduce_or_epi16() {
15801        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15802        let e = _mm_reduce_or_epi16(a);
15803        assert_eq!(3, e);
15804    }
15805
15806    #[simd_test(enable = "avx512bw,avx512vl")]
15807    unsafe fn test_mm_mask_reduce_or_epi16() {
15808        let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
15809        let e = _mm_mask_reduce_or_epi16(0b11110000, a);
15810        assert_eq!(1, e);
15811    }
15812
15813    #[simd_test(enable = "avx512bw,avx512vl")]
15814    unsafe fn test_mm256_reduce_or_epi8() {
15815        let a = _mm256_set_epi8(
15816            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15817            2, 2, 2,
15818        );
15819        let e = _mm256_reduce_or_epi8(a);
15820        assert_eq!(3, e);
15821    }
15822
15823    #[simd_test(enable = "avx512bw,avx512vl")]
15824    unsafe fn test_mm256_mask_reduce_or_epi8() {
15825        let a = _mm256_set_epi8(
15826            1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
15827            2, 2, 2,
15828        );
15829        let e = _mm256_mask_reduce_or_epi8(0b11111111_00000000_11111111_00000000, a);
15830        assert_eq!(1, e);
15831    }
15832
15833    #[simd_test(enable = "avx512bw,avx512vl")]
15834    unsafe fn test_mm_reduce_or_epi8() {
15835        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15836        let e = _mm_reduce_or_epi8(a);
15837        assert_eq!(3, e);
15838    }
15839
15840    #[simd_test(enable = "avx512bw,avx512vl")]
15841    unsafe fn test_mm_mask_reduce_or_epi8() {
15842        let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
15843        let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
15844        assert_eq!(1, e);
15845    }
15846
15847    #[simd_test(enable = "avx512bw")]
15848    unsafe fn test_mm512_loadu_epi16() {
15849        #[rustfmt::skip]
15850        let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
15851        let r = _mm512_loadu_epi16(&a[0]);
15852        #[rustfmt::skip]
15853        let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
15854        assert_eq_m512i(r, e);
15855    }
15856
15857    #[simd_test(enable = "avx512bw,avx512vl")]
15858    unsafe fn test_mm256_loadu_epi16() {
15859        let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
15860        let r = _mm256_loadu_epi16(&a[0]);
15861        let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
15862        assert_eq_m256i(r, e);
15863    }
15864
15865    #[simd_test(enable = "avx512bw,avx512vl")]
15866    unsafe fn test_mm_loadu_epi16() {
15867        let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
15868        let r = _mm_loadu_epi16(&a[0]);
15869        let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
15870        assert_eq_m128i(r, e);
15871    }
15872
15873    #[simd_test(enable = "avx512bw")]
15874    unsafe fn test_mm512_loadu_epi8() {
15875        #[rustfmt::skip]
15876        let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15877                           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
15878        let r = _mm512_loadu_epi8(&a[0]);
15879        #[rustfmt::skip]
15880        let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
15881                                32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
15882        assert_eq_m512i(r, e);
15883    }
15884
15885    #[simd_test(enable = "avx512bw,avx512vl")]
15886    unsafe fn test_mm256_loadu_epi8() {
15887        #[rustfmt::skip]
15888        let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
15889        let r = _mm256_loadu_epi8(&a[0]);
15890        #[rustfmt::skip]
15891        let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
15892        assert_eq_m256i(r, e);
15893    }
15894
15895    #[simd_test(enable = "avx512bw,avx512vl")]
15896    unsafe fn test_mm_loadu_epi8() {
15897        let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
15898        let r = _mm_loadu_epi8(&a[0]);
15899        let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
15900        assert_eq_m128i(r, e);
15901    }
15902
15903    #[simd_test(enable = "avx512bw")]
15904    unsafe fn test_mm512_storeu_epi16() {
15905        let a = _mm512_set1_epi16(9);
15906        let mut r = _mm512_undefined_epi32();
15907        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
15908        assert_eq_m512i(r, a);
15909    }
15910
15911    #[simd_test(enable = "avx512bw,avx512vl")]
15912    unsafe fn test_mm256_storeu_epi16() {
15913        let a = _mm256_set1_epi16(9);
15914        let mut r = _mm256_set1_epi32(0);
15915        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
15916        assert_eq_m256i(r, a);
15917    }
15918
15919    #[simd_test(enable = "avx512bw,avx512vl")]
15920    unsafe fn test_mm_storeu_epi16() {
15921        let a = _mm_set1_epi16(9);
15922        let mut r = _mm_set1_epi32(0);
15923        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
15924        assert_eq_m128i(r, a);
15925    }
15926
15927    #[simd_test(enable = "avx512bw")]
15928    unsafe fn test_mm512_storeu_epi8() {
15929        let a = _mm512_set1_epi8(9);
15930        let mut r = _mm512_undefined_epi32();
15931        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
15932        assert_eq_m512i(r, a);
15933    }
15934
15935    #[simd_test(enable = "avx512bw,avx512vl")]
15936    unsafe fn test_mm256_storeu_epi8() {
15937        let a = _mm256_set1_epi8(9);
15938        let mut r = _mm256_set1_epi32(0);
15939        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
15940        assert_eq_m256i(r, a);
15941    }
15942
15943    #[simd_test(enable = "avx512bw,avx512vl")]
15944    unsafe fn test_mm_storeu_epi8() {
15945        let a = _mm_set1_epi8(9);
15946        let mut r = _mm_set1_epi32(0);
15947        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
15948        assert_eq_m128i(r, a);
15949    }
15950
15951    #[simd_test(enable = "avx512f,avx512bw")]
15952    unsafe fn test_mm512_mask_loadu_epi16() {
15953        let src = _mm512_set1_epi16(42);
15954        let a = &[
15955            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15956            24, 25, 26, 27, 28, 29, 30, 31, 32,
15957        ];
15958        let p = a.as_ptr();
15959        let m = 0b10101010_11001100_11101000_11001010;
15960        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
15961        let e = &[
15962            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
15963            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
15964        ];
15965        let e = _mm512_loadu_epi16(e.as_ptr());
15966        assert_eq_m512i(r, e);
15967    }
15968
15969    #[simd_test(enable = "avx512f,avx512bw")]
15970    unsafe fn test_mm512_maskz_loadu_epi16() {
15971        let a = &[
15972            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15973            24, 25, 26, 27, 28, 29, 30, 31, 32,
15974        ];
15975        let p = a.as_ptr();
15976        let m = 0b10101010_11001100_11101000_11001010;
15977        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
15978        let e = &[
15979            0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
15980            26, 0, 28, 0, 30, 0, 32,
15981        ];
15982        let e = _mm512_loadu_epi16(e.as_ptr());
15983        assert_eq_m512i(r, e);
15984    }
15985
15986    #[simd_test(enable = "avx512f,avx512bw")]
15987    unsafe fn test_mm512_mask_storeu_epi16() {
15988        let mut r = [42_i16; 32];
15989        let a = &[
15990            1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
15991            24, 25, 26, 27, 28, 29, 30, 31, 32,
15992        ];
15993        let a = _mm512_loadu_epi16(a.as_ptr());
15994        let m = 0b10101010_11001100_11101000_11001010;
15995        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
15996        let e = &[
15997            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
15998            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
15999        ];
16000        let e = _mm512_loadu_epi16(e.as_ptr());
16001        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
16002    }
16003
16004    #[simd_test(enable = "avx512f,avx512bw")]
16005    unsafe fn test_mm512_mask_loadu_epi8() {
16006        let src = _mm512_set1_epi8(42);
16007        let a = &[
16008            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16009            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16010            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16011        ];
16012        let p = a.as_ptr();
16013        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16014        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
16015        let e = &[
16016            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16017            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16018            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16019        ];
16020        let e = _mm512_loadu_epi8(e.as_ptr());
16021        assert_eq_m512i(r, e);
16022    }
16023
16024    #[simd_test(enable = "avx512f,avx512bw")]
16025    unsafe fn test_mm512_maskz_loadu_epi8() {
16026        let a = &[
16027            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16028            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16029            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16030        ];
16031        let p = a.as_ptr();
16032        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16033        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
16034        let e = &[
16035            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16036            26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
16037            50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
16038        ];
16039        let e = _mm512_loadu_epi8(e.as_ptr());
16040        assert_eq_m512i(r, e);
16041    }
16042
16043    #[simd_test(enable = "avx512f,avx512bw")]
16044    unsafe fn test_mm512_mask_storeu_epi8() {
16045        let mut r = [42_i8; 64];
16046        let a = &[
16047            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16048            24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
16049            46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
16050        ];
16051        let a = _mm512_loadu_epi8(a.as_ptr());
16052        let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
16053        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16054        let e = &[
16055            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16056            23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
16057            45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
16058        ];
16059        let e = _mm512_loadu_epi8(e.as_ptr());
16060        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
16061    }
16062
16063    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16064    unsafe fn test_mm256_mask_loadu_epi16() {
16065        let src = _mm256_set1_epi16(42);
16066        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16067        let p = a.as_ptr();
16068        let m = 0b11101000_11001010;
16069        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
16070        let e = &[
16071            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16072        ];
16073        let e = _mm256_loadu_epi16(e.as_ptr());
16074        assert_eq_m256i(r, e);
16075    }
16076
16077    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16078    unsafe fn test_mm256_maskz_loadu_epi16() {
16079        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16080        let p = a.as_ptr();
16081        let m = 0b11101000_11001010;
16082        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
16083        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16084        let e = _mm256_loadu_epi16(e.as_ptr());
16085        assert_eq_m256i(r, e);
16086    }
16087
16088    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16089    unsafe fn test_mm256_mask_storeu_epi16() {
16090        let mut r = [42_i16; 16];
16091        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16092        let a = _mm256_loadu_epi16(a.as_ptr());
16093        let m = 0b11101000_11001010;
16094        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16095        let e = &[
16096            42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16097        ];
16098        let e = _mm256_loadu_epi16(e.as_ptr());
16099        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
16100    }
16101
16102    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16103    unsafe fn test_mm256_mask_loadu_epi8() {
16104        let src = _mm256_set1_epi8(42);
16105        let a = &[
16106            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16107            24, 25, 26, 27, 28, 29, 30, 31, 32,
16108        ];
16109        let p = a.as_ptr();
16110        let m = 0b10101010_11001100_11101000_11001010;
16111        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
16112        let e = &[
16113            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16114            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16115        ];
16116        let e = _mm256_loadu_epi8(e.as_ptr());
16117        assert_eq_m256i(r, e);
16118    }
16119
16120    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16121    unsafe fn test_mm256_maskz_loadu_epi8() {
16122        let a = &[
16123            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16124            24, 25, 26, 27, 28, 29, 30, 31, 32,
16125        ];
16126        let p = a.as_ptr();
16127        let m = 0b10101010_11001100_11101000_11001010;
16128        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
16129        let e = &[
16130            0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
16131            26, 0, 28, 0, 30, 0, 32,
16132        ];
16133        let e = _mm256_loadu_epi8(e.as_ptr());
16134        assert_eq_m256i(r, e);
16135    }
16136
16137    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16138    unsafe fn test_mm256_mask_storeu_epi8() {
16139        let mut r = [42_i8; 32];
16140        let a = &[
16141            1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
16142            24, 25, 26, 27, 28, 29, 30, 31, 32,
16143        ];
16144        let a = _mm256_loadu_epi8(a.as_ptr());
16145        let m = 0b10101010_11001100_11101000_11001010;
16146        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16147        let e = &[
16148            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
16149            23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
16150        ];
16151        let e = _mm256_loadu_epi8(e.as_ptr());
16152        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
16153    }
16154
16155    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16156    unsafe fn test_mm_mask_loadu_epi16() {
16157        let src = _mm_set1_epi16(42);
16158        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16159        let p = a.as_ptr();
16160        let m = 0b11001010;
16161        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
16162        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16163        let e = _mm_loadu_epi16(e.as_ptr());
16164        assert_eq_m128i(r, e);
16165    }
16166
16167    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16168    unsafe fn test_mm_maskz_loadu_epi16() {
16169        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16170        let p = a.as_ptr();
16171        let m = 0b11001010;
16172        let r = _mm_maskz_loadu_epi16(m, black_box(p));
16173        let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
16174        let e = _mm_loadu_epi16(e.as_ptr());
16175        assert_eq_m128i(r, e);
16176    }
16177
16178    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16179    unsafe fn test_mm_mask_storeu_epi16() {
16180        let mut r = [42_i16; 8];
16181        let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
16182        let a = _mm_loadu_epi16(a.as_ptr());
16183        let m = 0b11001010;
16184        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
16185        let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
16186        let e = _mm_loadu_epi16(e.as_ptr());
16187        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
16188    }
16189
16190    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16191    unsafe fn test_mm_mask_loadu_epi8() {
16192        let src = _mm_set1_epi8(42);
16193        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16194        let p = a.as_ptr();
16195        let m = 0b11101000_11001010;
16196        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
16197        let e = &[
16198            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16199        ];
16200        let e = _mm_loadu_epi8(e.as_ptr());
16201        assert_eq_m128i(r, e);
16202    }
16203
16204    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16205    unsafe fn test_mm_maskz_loadu_epi8() {
16206        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16207        let p = a.as_ptr();
16208        let m = 0b11101000_11001010;
16209        let r = _mm_maskz_loadu_epi8(m, black_box(p));
16210        let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
16211        let e = _mm_loadu_epi8(e.as_ptr());
16212        assert_eq_m128i(r, e);
16213    }
16214
16215    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
16216    unsafe fn test_mm_mask_storeu_epi8() {
16217        let mut r = [42_i8; 16];
16218        let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
16219        let a = _mm_loadu_epi8(a.as_ptr());
16220        let m = 0b11101000_11001010;
16221        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
16222        let e = &[
16223            42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
16224        ];
16225        let e = _mm_loadu_epi8(e.as_ptr());
16226        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
16227    }
16228
16229    #[simd_test(enable = "avx512bw")]
16230    unsafe fn test_mm512_madd_epi16() {
16231        let a = _mm512_set1_epi16(1);
16232        let b = _mm512_set1_epi16(1);
16233        let r = _mm512_madd_epi16(a, b);
16234        let e = _mm512_set1_epi32(2);
16235        assert_eq_m512i(r, e);
16236    }
16237
16238    #[simd_test(enable = "avx512bw")]
16239    unsafe fn test_mm512_mask_madd_epi16() {
16240        let a = _mm512_set1_epi16(1);
16241        let b = _mm512_set1_epi16(1);
16242        let r = _mm512_mask_madd_epi16(a, 0, a, b);
16243        assert_eq_m512i(r, a);
16244        let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
16245        let e = _mm512_set_epi32(
16246            1 << 16 | 1,
16247            1 << 16 | 1,
16248            1 << 16 | 1,
16249            1 << 16 | 1,
16250            1 << 16 | 1,
16251            1 << 16 | 1,
16252            1 << 16 | 1,
16253            1 << 16 | 1,
16254            1 << 16 | 1,
16255            1 << 16 | 1,
16256            1 << 16 | 1,
16257            1 << 16 | 1,
16258            2,
16259            2,
16260            2,
16261            2,
16262        );
16263        assert_eq_m512i(r, e);
16264    }
16265
16266    #[simd_test(enable = "avx512bw")]
16267    unsafe fn test_mm512_maskz_madd_epi16() {
16268        let a = _mm512_set1_epi16(1);
16269        let b = _mm512_set1_epi16(1);
16270        let r = _mm512_maskz_madd_epi16(0, a, b);
16271        assert_eq_m512i(r, _mm512_setzero_si512());
16272        let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
16273        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
16274        assert_eq_m512i(r, e);
16275    }
16276
16277    #[simd_test(enable = "avx512bw,avx512vl")]
16278    unsafe fn test_mm256_mask_madd_epi16() {
16279        let a = _mm256_set1_epi16(1);
16280        let b = _mm256_set1_epi16(1);
16281        let r = _mm256_mask_madd_epi16(a, 0, a, b);
16282        assert_eq_m256i(r, a);
16283        let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
16284        let e = _mm256_set_epi32(
16285            1 << 16 | 1,
16286            1 << 16 | 1,
16287            1 << 16 | 1,
16288            1 << 16 | 1,
16289            2,
16290            2,
16291            2,
16292            2,
16293        );
16294        assert_eq_m256i(r, e);
16295    }
16296
16297    #[simd_test(enable = "avx512bw,avx512vl")]
16298    unsafe fn test_mm256_maskz_madd_epi16() {
16299        let a = _mm256_set1_epi16(1);
16300        let b = _mm256_set1_epi16(1);
16301        let r = _mm256_maskz_madd_epi16(0, a, b);
16302        assert_eq_m256i(r, _mm256_setzero_si256());
16303        let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
16304        let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
16305        assert_eq_m256i(r, e);
16306    }
16307
16308    #[simd_test(enable = "avx512bw,avx512vl")]
16309    unsafe fn test_mm_mask_madd_epi16() {
16310        let a = _mm_set1_epi16(1);
16311        let b = _mm_set1_epi16(1);
16312        let r = _mm_mask_madd_epi16(a, 0, a, b);
16313        assert_eq_m128i(r, a);
16314        let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
16315        let e = _mm_set_epi32(2, 2, 2, 2);
16316        assert_eq_m128i(r, e);
16317    }
16318
16319    #[simd_test(enable = "avx512bw,avx512vl")]
16320    unsafe fn test_mm_maskz_madd_epi16() {
16321        let a = _mm_set1_epi16(1);
16322        let b = _mm_set1_epi16(1);
16323        let r = _mm_maskz_madd_epi16(0, a, b);
16324        assert_eq_m128i(r, _mm_setzero_si128());
16325        let r = _mm_maskz_madd_epi16(0b00001111, a, b);
16326        let e = _mm_set_epi32(2, 2, 2, 2);
16327        assert_eq_m128i(r, e);
16328    }
16329
16330    #[simd_test(enable = "avx512bw")]
16331    unsafe fn test_mm512_maddubs_epi16() {
16332        let a = _mm512_set1_epi8(1);
16333        let b = _mm512_set1_epi8(1);
16334        let r = _mm512_maddubs_epi16(a, b);
16335        let e = _mm512_set1_epi16(2);
16336        assert_eq_m512i(r, e);
16337    }
16338
16339    #[simd_test(enable = "avx512bw")]
16340    unsafe fn test_mm512_mask_maddubs_epi16() {
16341        let a = _mm512_set1_epi8(1);
16342        let b = _mm512_set1_epi8(1);
16343        let src = _mm512_set1_epi16(1);
16344        let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
16345        assert_eq_m512i(r, src);
16346        let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
16347        #[rustfmt::skip]
16348        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16349                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
16350        assert_eq_m512i(r, e);
16351    }
16352
16353    #[simd_test(enable = "avx512bw")]
16354    unsafe fn test_mm512_maskz_maddubs_epi16() {
16355        let a = _mm512_set1_epi8(1);
16356        let b = _mm512_set1_epi8(1);
16357        let r = _mm512_maskz_maddubs_epi16(0, a, b);
16358        assert_eq_m512i(r, _mm512_setzero_si512());
16359        let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
16360        #[rustfmt::skip]
16361        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
16362                                 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16363        assert_eq_m512i(r, e);
16364    }
16365
16366    #[simd_test(enable = "avx512bw,avx512vl")]
16367    unsafe fn test_mm256_mask_maddubs_epi16() {
16368        let a = _mm256_set1_epi8(1);
16369        let b = _mm256_set1_epi8(1);
16370        let src = _mm256_set1_epi16(1);
16371        let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
16372        assert_eq_m256i(r, src);
16373        let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
16374        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16375        assert_eq_m256i(r, e);
16376    }
16377
16378    #[simd_test(enable = "avx512bw,avx512vl")]
16379    unsafe fn test_mm256_maskz_maddubs_epi16() {
16380        let a = _mm256_set1_epi8(1);
16381        let b = _mm256_set1_epi8(1);
16382        let r = _mm256_maskz_maddubs_epi16(0, a, b);
16383        assert_eq_m256i(r, _mm256_setzero_si256());
16384        let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
16385        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
16386        assert_eq_m256i(r, e);
16387    }
16388
16389    #[simd_test(enable = "avx512bw,avx512vl")]
16390    unsafe fn test_mm_mask_maddubs_epi16() {
16391        let a = _mm_set1_epi8(1);
16392        let b = _mm_set1_epi8(1);
16393        let src = _mm_set1_epi16(1);
16394        let r = _mm_mask_maddubs_epi16(src, 0, a, b);
16395        assert_eq_m128i(r, src);
16396        let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
16397        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
16398        assert_eq_m128i(r, e);
16399    }
16400
16401    #[simd_test(enable = "avx512bw,avx512vl")]
16402    unsafe fn test_mm_maskz_maddubs_epi16() {
16403        let a = _mm_set1_epi8(1);
16404        let b = _mm_set1_epi8(1);
16405        let r = _mm_maskz_maddubs_epi16(0, a, b);
16406        assert_eq_m128i(r, _mm_setzero_si128());
16407        let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
16408        let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
16409        assert_eq_m128i(r, e);
16410    }
16411
16412    #[simd_test(enable = "avx512bw")]
16413    unsafe fn test_mm512_packs_epi32() {
16414        let a = _mm512_set1_epi32(i32::MAX);
16415        let b = _mm512_set1_epi32(1);
16416        let r = _mm512_packs_epi32(a, b);
16417        #[rustfmt::skip]
16418        let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
16419                                 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16420        assert_eq_m512i(r, e);
16421    }
16422
16423    #[simd_test(enable = "avx512bw")]
16424    unsafe fn test_mm512_mask_packs_epi32() {
16425        let a = _mm512_set1_epi32(i32::MAX);
16426        let b = _mm512_set1_epi32(1 << 16 | 1);
16427        let r = _mm512_mask_packs_epi32(a, 0, a, b);
16428        assert_eq_m512i(r, a);
16429        let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16430        #[rustfmt::skip]
16431        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16432                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16433        assert_eq_m512i(r, e);
16434    }
16435
16436    #[simd_test(enable = "avx512bw")]
16437    unsafe fn test_mm512_maskz_packs_epi32() {
16438        let a = _mm512_set1_epi32(i32::MAX);
16439        let b = _mm512_set1_epi32(1);
16440        let r = _mm512_maskz_packs_epi32(0, a, b);
16441        assert_eq_m512i(r, _mm512_setzero_si512());
16442        let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
16443        #[rustfmt::skip]
16444        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16445                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16446        assert_eq_m512i(r, e);
16447    }
16448
16449    #[simd_test(enable = "avx512bw,avx512vl")]
16450    unsafe fn test_mm256_mask_packs_epi32() {
16451        let a = _mm256_set1_epi32(i32::MAX);
16452        let b = _mm256_set1_epi32(1 << 16 | 1);
16453        let r = _mm256_mask_packs_epi32(a, 0, a, b);
16454        assert_eq_m256i(r, a);
16455        let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
16456        #[rustfmt::skip]
16457        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16458        assert_eq_m256i(r, e);
16459    }
16460
16461    #[simd_test(enable = "avx512bw,avx512vl")]
16462    unsafe fn test_mm256_maskz_packs_epi32() {
16463        let a = _mm256_set1_epi32(i32::MAX);
16464        let b = _mm256_set1_epi32(1);
16465        let r = _mm256_maskz_packs_epi32(0, a, b);
16466        assert_eq_m256i(r, _mm256_setzero_si256());
16467        let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
16468        #[rustfmt::skip]
16469        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16470        assert_eq_m256i(r, e);
16471    }
16472
16473    #[simd_test(enable = "avx512bw,avx512vl")]
16474    unsafe fn test_mm_mask_packs_epi32() {
16475        let a = _mm_set1_epi32(i32::MAX);
16476        let b = _mm_set1_epi32(1 << 16 | 1);
16477        let r = _mm_mask_packs_epi32(a, 0, a, b);
16478        assert_eq_m128i(r, a);
16479        let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
16480        let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16481        assert_eq_m128i(r, e);
16482    }
16483
16484    #[simd_test(enable = "avx512bw,avx512vl")]
16485    unsafe fn test_mm_maskz_packs_epi32() {
16486        let a = _mm_set1_epi32(i32::MAX);
16487        let b = _mm_set1_epi32(1);
16488        let r = _mm_maskz_packs_epi32(0, a, b);
16489        assert_eq_m128i(r, _mm_setzero_si128());
16490        let r = _mm_maskz_packs_epi32(0b00001111, a, b);
16491        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
16492        assert_eq_m128i(r, e);
16493    }
16494
16495    #[simd_test(enable = "avx512bw")]
16496    unsafe fn test_mm512_packs_epi16() {
16497        let a = _mm512_set1_epi16(i16::MAX);
16498        let b = _mm512_set1_epi16(1);
16499        let r = _mm512_packs_epi16(a, b);
16500        #[rustfmt::skip]
16501        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16502                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16503                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
16504                                1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16505        assert_eq_m512i(r, e);
16506    }
16507
16508    #[simd_test(enable = "avx512bw")]
16509    unsafe fn test_mm512_mask_packs_epi16() {
16510        let a = _mm512_set1_epi16(i16::MAX);
16511        let b = _mm512_set1_epi16(1 << 8 | 1);
16512        let r = _mm512_mask_packs_epi16(a, 0, a, b);
16513        assert_eq_m512i(r, a);
16514        let r = _mm512_mask_packs_epi16(
16515            b,
16516            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16517            a,
16518            b,
16519        );
16520        #[rustfmt::skip]
16521        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16522                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16523                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16524                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16525        assert_eq_m512i(r, e);
16526    }
16527
16528    #[simd_test(enable = "avx512bw")]
16529    unsafe fn test_mm512_maskz_packs_epi16() {
16530        let a = _mm512_set1_epi16(i16::MAX);
16531        let b = _mm512_set1_epi16(1);
16532        let r = _mm512_maskz_packs_epi16(0, a, b);
16533        assert_eq_m512i(r, _mm512_setzero_si512());
16534        let r = _mm512_maskz_packs_epi16(
16535            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16536            a,
16537            b,
16538        );
16539        #[rustfmt::skip]
16540        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16541                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16542                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16543                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16544        assert_eq_m512i(r, e);
16545    }
16546
16547    #[simd_test(enable = "avx512bw,avx512vl")]
16548    unsafe fn test_mm256_mask_packs_epi16() {
16549        let a = _mm256_set1_epi16(i16::MAX);
16550        let b = _mm256_set1_epi16(1 << 8 | 1);
16551        let r = _mm256_mask_packs_epi16(a, 0, a, b);
16552        assert_eq_m256i(r, a);
16553        let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
16554        #[rustfmt::skip]
16555        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16556                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16557        assert_eq_m256i(r, e);
16558    }
16559
16560    #[simd_test(enable = "avx512bw,avx512vl")]
16561    unsafe fn test_mm256_maskz_packs_epi16() {
16562        let a = _mm256_set1_epi16(i16::MAX);
16563        let b = _mm256_set1_epi16(1);
16564        let r = _mm256_maskz_packs_epi16(0, a, b);
16565        assert_eq_m256i(r, _mm256_setzero_si256());
16566        let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
16567        #[rustfmt::skip]
16568        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16569                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16570        assert_eq_m256i(r, e);
16571    }
16572
16573    #[simd_test(enable = "avx512bw,avx512vl")]
16574    unsafe fn test_mm_mask_packs_epi16() {
16575        let a = _mm_set1_epi16(i16::MAX);
16576        let b = _mm_set1_epi16(1 << 8 | 1);
16577        let r = _mm_mask_packs_epi16(a, 0, a, b);
16578        assert_eq_m128i(r, a);
16579        let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
16580        #[rustfmt::skip]
16581        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16582        assert_eq_m128i(r, e);
16583    }
16584
16585    #[simd_test(enable = "avx512bw,avx512vl")]
16586    unsafe fn test_mm_maskz_packs_epi16() {
16587        let a = _mm_set1_epi16(i16::MAX);
16588        let b = _mm_set1_epi16(1);
16589        let r = _mm_maskz_packs_epi16(0, a, b);
16590        assert_eq_m128i(r, _mm_setzero_si128());
16591        let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
16592        #[rustfmt::skip]
16593        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
16594        assert_eq_m128i(r, e);
16595    }
16596
16597    #[simd_test(enable = "avx512bw")]
16598    unsafe fn test_mm512_packus_epi32() {
16599        let a = _mm512_set1_epi32(-1);
16600        let b = _mm512_set1_epi32(1);
16601        let r = _mm512_packus_epi32(a, b);
16602        #[rustfmt::skip]
16603        let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
16604                                 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
16605        assert_eq_m512i(r, e);
16606    }
16607
16608    #[simd_test(enable = "avx512bw")]
16609    unsafe fn test_mm512_mask_packus_epi32() {
16610        let a = _mm512_set1_epi32(-1);
16611        let b = _mm512_set1_epi32(1 << 16 | 1);
16612        let r = _mm512_mask_packus_epi32(a, 0, a, b);
16613        assert_eq_m512i(r, a);
16614        let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
16615        #[rustfmt::skip]
16616        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16617                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16618        assert_eq_m512i(r, e);
16619    }
16620
16621    #[simd_test(enable = "avx512bw")]
16622    unsafe fn test_mm512_maskz_packus_epi32() {
16623        let a = _mm512_set1_epi32(-1);
16624        let b = _mm512_set1_epi32(1);
16625        let r = _mm512_maskz_packus_epi32(0, a, b);
16626        assert_eq_m512i(r, _mm512_setzero_si512());
16627        let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
16628        #[rustfmt::skip]
16629        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16630                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
16631        assert_eq_m512i(r, e);
16632    }
16633
16634    #[simd_test(enable = "avx512bw,avx512vl")]
16635    unsafe fn test_mm256_mask_packus_epi32() {
16636        let a = _mm256_set1_epi32(-1);
16637        let b = _mm256_set1_epi32(1 << 16 | 1);
16638        let r = _mm256_mask_packus_epi32(a, 0, a, b);
16639        assert_eq_m256i(r, a);
16640        let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
16641        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16642        assert_eq_m256i(r, e);
16643    }
16644
16645    #[simd_test(enable = "avx512bw,avx512vl")]
16646    unsafe fn test_mm256_maskz_packus_epi32() {
16647        let a = _mm256_set1_epi32(-1);
16648        let b = _mm256_set1_epi32(1);
16649        let r = _mm256_maskz_packus_epi32(0, a, b);
16650        assert_eq_m256i(r, _mm256_setzero_si256());
16651        let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
16652        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
16653        assert_eq_m256i(r, e);
16654    }
16655
16656    #[simd_test(enable = "avx512bw,avx512vl")]
16657    unsafe fn test_mm_mask_packus_epi32() {
16658        let a = _mm_set1_epi32(-1);
16659        let b = _mm_set1_epi32(1 << 16 | 1);
16660        let r = _mm_mask_packus_epi32(a, 0, a, b);
16661        assert_eq_m128i(r, a);
16662        let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
16663        let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
16664        assert_eq_m128i(r, e);
16665    }
16666
16667    #[simd_test(enable = "avx512bw,avx512vl")]
16668    unsafe fn test_mm_maskz_packus_epi32() {
16669        let a = _mm_set1_epi32(-1);
16670        let b = _mm_set1_epi32(1);
16671        let r = _mm_maskz_packus_epi32(0, a, b);
16672        assert_eq_m128i(r, _mm_setzero_si128());
16673        let r = _mm_maskz_packus_epi32(0b00001111, a, b);
16674        let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
16675        assert_eq_m128i(r, e);
16676    }
16677
16678    #[simd_test(enable = "avx512bw")]
16679    unsafe fn test_mm512_packus_epi16() {
16680        let a = _mm512_set1_epi16(-1);
16681        let b = _mm512_set1_epi16(1);
16682        let r = _mm512_packus_epi16(a, b);
16683        #[rustfmt::skip]
16684        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
16685                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
16686                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
16687                                1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
16688        assert_eq_m512i(r, e);
16689    }
16690
16691    #[simd_test(enable = "avx512bw")]
16692    unsafe fn test_mm512_mask_packus_epi16() {
16693        let a = _mm512_set1_epi16(-1);
16694        let b = _mm512_set1_epi16(1 << 8 | 1);
16695        let r = _mm512_mask_packus_epi16(a, 0, a, b);
16696        assert_eq_m512i(r, a);
16697        let r = _mm512_mask_packus_epi16(
16698            b,
16699            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16700            a,
16701            b,
16702        );
16703        #[rustfmt::skip]
16704        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16705                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16706                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16707                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16708        assert_eq_m512i(r, e);
16709    }
16710
16711    #[simd_test(enable = "avx512bw")]
16712    unsafe fn test_mm512_maskz_packus_epi16() {
16713        let a = _mm512_set1_epi16(-1);
16714        let b = _mm512_set1_epi16(1);
16715        let r = _mm512_maskz_packus_epi16(0, a, b);
16716        assert_eq_m512i(r, _mm512_setzero_si512());
16717        let r = _mm512_maskz_packus_epi16(
16718            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16719            a,
16720            b,
16721        );
16722        #[rustfmt::skip]
16723        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16724                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16725                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16726                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
16727        assert_eq_m512i(r, e);
16728    }
16729
16730    #[simd_test(enable = "avx512bw,avx512vl")]
16731    unsafe fn test_mm256_mask_packus_epi16() {
16732        let a = _mm256_set1_epi16(-1);
16733        let b = _mm256_set1_epi16(1 << 8 | 1);
16734        let r = _mm256_mask_packus_epi16(a, 0, a, b);
16735        assert_eq_m256i(r, a);
16736        let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
16737        #[rustfmt::skip]
16738        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16739                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16740        assert_eq_m256i(r, e);
16741    }
16742
16743    #[simd_test(enable = "avx512bw,avx512vl")]
16744    unsafe fn test_mm256_maskz_packus_epi16() {
16745        let a = _mm256_set1_epi16(-1);
16746        let b = _mm256_set1_epi16(1);
16747        let r = _mm256_maskz_packus_epi16(0, a, b);
16748        assert_eq_m256i(r, _mm256_setzero_si256());
16749        let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
16750        #[rustfmt::skip]
16751        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16752                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
16753        assert_eq_m256i(r, e);
16754    }
16755
16756    #[simd_test(enable = "avx512bw,avx512vl")]
16757    unsafe fn test_mm_mask_packus_epi16() {
16758        let a = _mm_set1_epi16(-1);
16759        let b = _mm_set1_epi16(1 << 8 | 1);
16760        let r = _mm_mask_packus_epi16(a, 0, a, b);
16761        assert_eq_m128i(r, a);
16762        let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
16763        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
16764        assert_eq_m128i(r, e);
16765    }
16766
16767    #[simd_test(enable = "avx512bw,avx512vl")]
16768    unsafe fn test_mm_maskz_packus_epi16() {
16769        let a = _mm_set1_epi16(-1);
16770        let b = _mm_set1_epi16(1);
16771        let r = _mm_maskz_packus_epi16(0, a, b);
16772        assert_eq_m128i(r, _mm_setzero_si128());
16773        let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
16774        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
16775        assert_eq_m128i(r, e);
16776    }
16777
16778    #[simd_test(enable = "avx512bw")]
16779    unsafe fn test_mm512_avg_epu16() {
16780        let a = _mm512_set1_epi16(1);
16781        let b = _mm512_set1_epi16(1);
16782        let r = _mm512_avg_epu16(a, b);
16783        let e = _mm512_set1_epi16(1);
16784        assert_eq_m512i(r, e);
16785    }
16786
16787    #[simd_test(enable = "avx512bw")]
16788    unsafe fn test_mm512_mask_avg_epu16() {
16789        let a = _mm512_set1_epi16(1);
16790        let b = _mm512_set1_epi16(1);
16791        let r = _mm512_mask_avg_epu16(a, 0, a, b);
16792        assert_eq_m512i(r, a);
16793        let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
16794        #[rustfmt::skip]
16795        let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16796                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
16797        assert_eq_m512i(r, e);
16798    }
16799
16800    #[simd_test(enable = "avx512bw")]
16801    unsafe fn test_mm512_maskz_avg_epu16() {
16802        let a = _mm512_set1_epi16(1);
16803        let b = _mm512_set1_epi16(1);
16804        let r = _mm512_maskz_avg_epu16(0, a, b);
16805        assert_eq_m512i(r, _mm512_setzero_si512());
16806        let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
16807        #[rustfmt::skip]
16808        let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16809                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
16810        assert_eq_m512i(r, e);
16811    }
16812
16813    #[simd_test(enable = "avx512bw,avx512vl")]
16814    unsafe fn test_mm256_mask_avg_epu16() {
16815        let a = _mm256_set1_epi16(1);
16816        let b = _mm256_set1_epi16(1);
16817        let r = _mm256_mask_avg_epu16(a, 0, a, b);
16818        assert_eq_m256i(r, a);
16819        let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
16820        let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
16821        assert_eq_m256i(r, e);
16822    }
16823
16824    #[simd_test(enable = "avx512bw,avx512vl")]
16825    unsafe fn test_mm256_maskz_avg_epu16() {
16826        let a = _mm256_set1_epi16(1);
16827        let b = _mm256_set1_epi16(1);
16828        let r = _mm256_maskz_avg_epu16(0, a, b);
16829        assert_eq_m256i(r, _mm256_setzero_si256());
16830        let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
16831        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
16832        assert_eq_m256i(r, e);
16833    }
16834
16835    #[simd_test(enable = "avx512bw,avx512vl")]
16836    unsafe fn test_mm_mask_avg_epu16() {
16837        let a = _mm_set1_epi16(1);
16838        let b = _mm_set1_epi16(1);
16839        let r = _mm_mask_avg_epu16(a, 0, a, b);
16840        assert_eq_m128i(r, a);
16841        let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
16842        let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
16843        assert_eq_m128i(r, e);
16844    }
16845
16846    #[simd_test(enable = "avx512bw,avx512vl")]
16847    unsafe fn test_mm_maskz_avg_epu16() {
16848        let a = _mm_set1_epi16(1);
16849        let b = _mm_set1_epi16(1);
16850        let r = _mm_maskz_avg_epu16(0, a, b);
16851        assert_eq_m128i(r, _mm_setzero_si128());
16852        let r = _mm_maskz_avg_epu16(0b00001111, a, b);
16853        let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
16854        assert_eq_m128i(r, e);
16855    }
16856
16857    #[simd_test(enable = "avx512bw")]
16858    unsafe fn test_mm512_avg_epu8() {
16859        let a = _mm512_set1_epi8(1);
16860        let b = _mm512_set1_epi8(1);
16861        let r = _mm512_avg_epu8(a, b);
16862        let e = _mm512_set1_epi8(1);
16863        assert_eq_m512i(r, e);
16864    }
16865
16866    #[simd_test(enable = "avx512bw")]
16867    unsafe fn test_mm512_mask_avg_epu8() {
16868        let a = _mm512_set1_epi8(1);
16869        let b = _mm512_set1_epi8(1);
16870        let r = _mm512_mask_avg_epu8(a, 0, a, b);
16871        assert_eq_m512i(r, a);
16872        let r = _mm512_mask_avg_epu8(
16873            a,
16874            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
16875            a,
16876            b,
16877        );
16878        #[rustfmt::skip]
16879        let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16880                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16881                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16882                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
16883        assert_eq_m512i(r, e);
16884    }
16885
16886    #[simd_test(enable = "avx512bw")]
16887    unsafe fn test_mm512_maskz_avg_epu8() {
16888        let a = _mm512_set1_epi8(1);
16889        let b = _mm512_set1_epi8(1);
16890        let r = _mm512_maskz_avg_epu8(0, a, b);
16891        assert_eq_m512i(r, _mm512_setzero_si512());
16892        let r = _mm512_maskz_avg_epu8(
16893            0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
16894            a,
16895            b,
16896        );
16897        #[rustfmt::skip]
16898        let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16899                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16900                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16901                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
16902        assert_eq_m512i(r, e);
16903    }
16904
16905    #[simd_test(enable = "avx512bw,avx512vl")]
16906    unsafe fn test_mm256_mask_avg_epu8() {
16907        let a = _mm256_set1_epi8(1);
16908        let b = _mm256_set1_epi8(1);
16909        let r = _mm256_mask_avg_epu8(a, 0, a, b);
16910        assert_eq_m256i(r, a);
16911        let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
16912        #[rustfmt::skip]
16913        let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16914                                1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
16915        assert_eq_m256i(r, e);
16916    }
16917
16918    #[simd_test(enable = "avx512bw,avx512vl")]
16919    unsafe fn test_mm256_maskz_avg_epu8() {
16920        let a = _mm256_set1_epi8(1);
16921        let b = _mm256_set1_epi8(1);
16922        let r = _mm256_maskz_avg_epu8(0, a, b);
16923        assert_eq_m256i(r, _mm256_setzero_si256());
16924        let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
16925        #[rustfmt::skip]
16926        let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16927                                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
16928        assert_eq_m256i(r, e);
16929    }
16930
16931    #[simd_test(enable = "avx512bw,avx512vl")]
16932    unsafe fn test_mm_mask_avg_epu8() {
16933        let a = _mm_set1_epi8(1);
16934        let b = _mm_set1_epi8(1);
16935        let r = _mm_mask_avg_epu8(a, 0, a, b);
16936        assert_eq_m128i(r, a);
16937        let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
16938        let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
16939        assert_eq_m128i(r, e);
16940    }
16941
16942    #[simd_test(enable = "avx512bw,avx512vl")]
16943    unsafe fn test_mm_maskz_avg_epu8() {
16944        let a = _mm_set1_epi8(1);
16945        let b = _mm_set1_epi8(1);
16946        let r = _mm_maskz_avg_epu8(0, a, b);
16947        assert_eq_m128i(r, _mm_setzero_si128());
16948        let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
16949        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
16950        assert_eq_m128i(r, e);
16951    }
16952
16953    #[simd_test(enable = "avx512bw")]
16954    unsafe fn test_mm512_sll_epi16() {
16955        let a = _mm512_set1_epi16(1 << 15);
16956        let count = _mm_set1_epi16(2);
16957        let r = _mm512_sll_epi16(a, count);
16958        let e = _mm512_set1_epi16(0);
16959        assert_eq_m512i(r, e);
16960    }
16961
16962    #[simd_test(enable = "avx512bw")]
16963    unsafe fn test_mm512_mask_sll_epi16() {
16964        let a = _mm512_set1_epi16(1 << 15);
16965        let count = _mm_set1_epi16(2);
16966        let r = _mm512_mask_sll_epi16(a, 0, a, count);
16967        assert_eq_m512i(r, a);
16968        let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
16969        let e = _mm512_set1_epi16(0);
16970        assert_eq_m512i(r, e);
16971    }
16972
16973    #[simd_test(enable = "avx512bw")]
16974    unsafe fn test_mm512_maskz_sll_epi16() {
16975        let a = _mm512_set1_epi16(1 << 15);
16976        let count = _mm_set1_epi16(2);
16977        let r = _mm512_maskz_sll_epi16(0, a, count);
16978        assert_eq_m512i(r, _mm512_setzero_si512());
16979        let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
16980        let e = _mm512_set1_epi16(0);
16981        assert_eq_m512i(r, e);
16982    }
16983
16984    #[simd_test(enable = "avx512bw,avx512vl")]
16985    unsafe fn test_mm256_mask_sll_epi16() {
16986        let a = _mm256_set1_epi16(1 << 15);
16987        let count = _mm_set1_epi16(2);
16988        let r = _mm256_mask_sll_epi16(a, 0, a, count);
16989        assert_eq_m256i(r, a);
16990        let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
16991        let e = _mm256_set1_epi16(0);
16992        assert_eq_m256i(r, e);
16993    }
16994
16995    #[simd_test(enable = "avx512bw,avx512vl")]
16996    unsafe fn test_mm256_maskz_sll_epi16() {
16997        let a = _mm256_set1_epi16(1 << 15);
16998        let count = _mm_set1_epi16(2);
16999        let r = _mm256_maskz_sll_epi16(0, a, count);
17000        assert_eq_m256i(r, _mm256_setzero_si256());
17001        let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
17002        let e = _mm256_set1_epi16(0);
17003        assert_eq_m256i(r, e);
17004    }
17005
17006    #[simd_test(enable = "avx512bw,avx512vl")]
17007    unsafe fn test_mm_mask_sll_epi16() {
17008        let a = _mm_set1_epi16(1 << 15);
17009        let count = _mm_set1_epi16(2);
17010        let r = _mm_mask_sll_epi16(a, 0, a, count);
17011        assert_eq_m128i(r, a);
17012        let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
17013        let e = _mm_set1_epi16(0);
17014        assert_eq_m128i(r, e);
17015    }
17016
17017    #[simd_test(enable = "avx512bw,avx512vl")]
17018    unsafe fn test_mm_maskz_sll_epi16() {
17019        let a = _mm_set1_epi16(1 << 15);
17020        let count = _mm_set1_epi16(2);
17021        let r = _mm_maskz_sll_epi16(0, a, count);
17022        assert_eq_m128i(r, _mm_setzero_si128());
17023        let r = _mm_maskz_sll_epi16(0b11111111, a, count);
17024        let e = _mm_set1_epi16(0);
17025        assert_eq_m128i(r, e);
17026    }
17027
17028    #[simd_test(enable = "avx512bw")]
17029    unsafe fn test_mm512_slli_epi16() {
17030        let a = _mm512_set1_epi16(1 << 15);
17031        let r = _mm512_slli_epi16::<1>(a);
17032        let e = _mm512_set1_epi16(0);
17033        assert_eq_m512i(r, e);
17034    }
17035
17036    #[simd_test(enable = "avx512bw")]
17037    unsafe fn test_mm512_mask_slli_epi16() {
17038        let a = _mm512_set1_epi16(1 << 15);
17039        let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
17040        assert_eq_m512i(r, a);
17041        let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
17042        let e = _mm512_set1_epi16(0);
17043        assert_eq_m512i(r, e);
17044    }
17045
17046    #[simd_test(enable = "avx512bw")]
17047    unsafe fn test_mm512_maskz_slli_epi16() {
17048        let a = _mm512_set1_epi16(1 << 15);
17049        let r = _mm512_maskz_slli_epi16::<1>(0, a);
17050        assert_eq_m512i(r, _mm512_setzero_si512());
17051        let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
17052        let e = _mm512_set1_epi16(0);
17053        assert_eq_m512i(r, e);
17054    }
17055
17056    #[simd_test(enable = "avx512bw,avx512vl")]
17057    unsafe fn test_mm256_mask_slli_epi16() {
17058        let a = _mm256_set1_epi16(1 << 15);
17059        let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
17060        assert_eq_m256i(r, a);
17061        let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
17062        let e = _mm256_set1_epi16(0);
17063        assert_eq_m256i(r, e);
17064    }
17065
17066    #[simd_test(enable = "avx512bw,avx512vl")]
17067    unsafe fn test_mm256_maskz_slli_epi16() {
17068        let a = _mm256_set1_epi16(1 << 15);
17069        let r = _mm256_maskz_slli_epi16::<1>(0, a);
17070        assert_eq_m256i(r, _mm256_setzero_si256());
17071        let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
17072        let e = _mm256_set1_epi16(0);
17073        assert_eq_m256i(r, e);
17074    }
17075
17076    #[simd_test(enable = "avx512bw,avx512vl")]
17077    unsafe fn test_mm_mask_slli_epi16() {
17078        let a = _mm_set1_epi16(1 << 15);
17079        let r = _mm_mask_slli_epi16::<1>(a, 0, a);
17080        assert_eq_m128i(r, a);
17081        let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
17082        let e = _mm_set1_epi16(0);
17083        assert_eq_m128i(r, e);
17084    }
17085
17086    #[simd_test(enable = "avx512bw,avx512vl")]
17087    unsafe fn test_mm_maskz_slli_epi16() {
17088        let a = _mm_set1_epi16(1 << 15);
17089        let r = _mm_maskz_slli_epi16::<1>(0, a);
17090        assert_eq_m128i(r, _mm_setzero_si128());
17091        let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
17092        let e = _mm_set1_epi16(0);
17093        assert_eq_m128i(r, e);
17094    }
17095
17096    #[simd_test(enable = "avx512bw")]
17097    unsafe fn test_mm512_sllv_epi16() {
17098        let a = _mm512_set1_epi16(1 << 15);
17099        let count = _mm512_set1_epi16(2);
17100        let r = _mm512_sllv_epi16(a, count);
17101        let e = _mm512_set1_epi16(0);
17102        assert_eq_m512i(r, e);
17103    }
17104
17105    #[simd_test(enable = "avx512bw")]
17106    unsafe fn test_mm512_mask_sllv_epi16() {
17107        let a = _mm512_set1_epi16(1 << 15);
17108        let count = _mm512_set1_epi16(2);
17109        let r = _mm512_mask_sllv_epi16(a, 0, a, count);
17110        assert_eq_m512i(r, a);
17111        let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17112        let e = _mm512_set1_epi16(0);
17113        assert_eq_m512i(r, e);
17114    }
17115
17116    #[simd_test(enable = "avx512bw")]
17117    unsafe fn test_mm512_maskz_sllv_epi16() {
17118        let a = _mm512_set1_epi16(1 << 15);
17119        let count = _mm512_set1_epi16(2);
17120        let r = _mm512_maskz_sllv_epi16(0, a, count);
17121        assert_eq_m512i(r, _mm512_setzero_si512());
17122        let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17123        let e = _mm512_set1_epi16(0);
17124        assert_eq_m512i(r, e);
17125    }
17126
17127    #[simd_test(enable = "avx512bw,avx512vl")]
17128    unsafe fn test_mm256_sllv_epi16() {
17129        let a = _mm256_set1_epi16(1 << 15);
17130        let count = _mm256_set1_epi16(2);
17131        let r = _mm256_sllv_epi16(a, count);
17132        let e = _mm256_set1_epi16(0);
17133        assert_eq_m256i(r, e);
17134    }
17135
17136    #[simd_test(enable = "avx512bw,avx512vl")]
17137    unsafe fn test_mm256_mask_sllv_epi16() {
17138        let a = _mm256_set1_epi16(1 << 15);
17139        let count = _mm256_set1_epi16(2);
17140        let r = _mm256_mask_sllv_epi16(a, 0, a, count);
17141        assert_eq_m256i(r, a);
17142        let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
17143        let e = _mm256_set1_epi16(0);
17144        assert_eq_m256i(r, e);
17145    }
17146
17147    #[simd_test(enable = "avx512bw,avx512vl")]
17148    unsafe fn test_mm256_maskz_sllv_epi16() {
17149        let a = _mm256_set1_epi16(1 << 15);
17150        let count = _mm256_set1_epi16(2);
17151        let r = _mm256_maskz_sllv_epi16(0, a, count);
17152        assert_eq_m256i(r, _mm256_setzero_si256());
17153        let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
17154        let e = _mm256_set1_epi16(0);
17155        assert_eq_m256i(r, e);
17156    }
17157
17158    #[simd_test(enable = "avx512bw,avx512vl")]
17159    unsafe fn test_mm_sllv_epi16() {
17160        let a = _mm_set1_epi16(1 << 15);
17161        let count = _mm_set1_epi16(2);
17162        let r = _mm_sllv_epi16(a, count);
17163        let e = _mm_set1_epi16(0);
17164        assert_eq_m128i(r, e);
17165    }
17166
17167    #[simd_test(enable = "avx512bw,avx512vl")]
17168    unsafe fn test_mm_mask_sllv_epi16() {
17169        let a = _mm_set1_epi16(1 << 15);
17170        let count = _mm_set1_epi16(2);
17171        let r = _mm_mask_sllv_epi16(a, 0, a, count);
17172        assert_eq_m128i(r, a);
17173        let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
17174        let e = _mm_set1_epi16(0);
17175        assert_eq_m128i(r, e);
17176    }
17177
17178    #[simd_test(enable = "avx512bw,avx512vl")]
17179    unsafe fn test_mm_maskz_sllv_epi16() {
17180        let a = _mm_set1_epi16(1 << 15);
17181        let count = _mm_set1_epi16(2);
17182        let r = _mm_maskz_sllv_epi16(0, a, count);
17183        assert_eq_m128i(r, _mm_setzero_si128());
17184        let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
17185        let e = _mm_set1_epi16(0);
17186        assert_eq_m128i(r, e);
17187    }
17188
17189    #[simd_test(enable = "avx512bw")]
17190    unsafe fn test_mm512_srl_epi16() {
17191        let a = _mm512_set1_epi16(1 << 1);
17192        let count = _mm_set1_epi16(2);
17193        let r = _mm512_srl_epi16(a, count);
17194        let e = _mm512_set1_epi16(0);
17195        assert_eq_m512i(r, e);
17196    }
17197
17198    #[simd_test(enable = "avx512bw")]
17199    unsafe fn test_mm512_mask_srl_epi16() {
17200        let a = _mm512_set1_epi16(1 << 1);
17201        let count = _mm_set1_epi16(2);
17202        let r = _mm512_mask_srl_epi16(a, 0, a, count);
17203        assert_eq_m512i(r, a);
17204        let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17205        let e = _mm512_set1_epi16(0);
17206        assert_eq_m512i(r, e);
17207    }
17208
17209    #[simd_test(enable = "avx512bw")]
17210    unsafe fn test_mm512_maskz_srl_epi16() {
17211        let a = _mm512_set1_epi16(1 << 1);
17212        let count = _mm_set1_epi16(2);
17213        let r = _mm512_maskz_srl_epi16(0, a, count);
17214        assert_eq_m512i(r, _mm512_setzero_si512());
17215        let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
17216        let e = _mm512_set1_epi16(0);
17217        assert_eq_m512i(r, e);
17218    }
17219
17220    #[simd_test(enable = "avx512bw,avx512vl")]
17221    unsafe fn test_mm256_mask_srl_epi16() {
17222        let a = _mm256_set1_epi16(1 << 1);
17223        let count = _mm_set1_epi16(2);
17224        let r = _mm256_mask_srl_epi16(a, 0, a, count);
17225        assert_eq_m256i(r, a);
17226        let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
17227        let e = _mm256_set1_epi16(0);
17228        assert_eq_m256i(r, e);
17229    }
17230
17231    #[simd_test(enable = "avx512bw,avx512vl")]
17232    unsafe fn test_mm256_maskz_srl_epi16() {
17233        let a = _mm256_set1_epi16(1 << 1);
17234        let count = _mm_set1_epi16(2);
17235        let r = _mm256_maskz_srl_epi16(0, a, count);
17236        assert_eq_m256i(r, _mm256_setzero_si256());
17237        let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
17238        let e = _mm256_set1_epi16(0);
17239        assert_eq_m256i(r, e);
17240    }
17241
17242    #[simd_test(enable = "avx512bw,avx512vl")]
17243    unsafe fn test_mm_mask_srl_epi16() {
17244        let a = _mm_set1_epi16(1 << 1);
17245        let count = _mm_set1_epi16(2);
17246        let r = _mm_mask_srl_epi16(a, 0, a, count);
17247        assert_eq_m128i(r, a);
17248        let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
17249        let e = _mm_set1_epi16(0);
17250        assert_eq_m128i(r, e);
17251    }
17252
17253    #[simd_test(enable = "avx512bw,avx512vl")]
17254    unsafe fn test_mm_maskz_srl_epi16() {
17255        let a = _mm_set1_epi16(1 << 1);
17256        let count = _mm_set1_epi16(2);
17257        let r = _mm_maskz_srl_epi16(0, a, count);
17258        assert_eq_m128i(r, _mm_setzero_si128());
17259        let r = _mm_maskz_srl_epi16(0b11111111, a, count);
17260        let e = _mm_set1_epi16(0);
17261        assert_eq_m128i(r, e);
17262    }
17263
17264    #[simd_test(enable = "avx512bw")]
17265    unsafe fn test_mm512_srli_epi16() {
17266        let a = _mm512_set1_epi16(1 << 1);
17267        let r = _mm512_srli_epi16::<2>(a);
17268        let e = _mm512_set1_epi16(0);
17269        assert_eq_m512i(r, e);
17270    }
17271
17272    #[simd_test(enable = "avx512bw")]
17273    unsafe fn test_mm512_mask_srli_epi16() {
17274        let a = _mm512_set1_epi16(1 << 1);
17275        let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
17276        assert_eq_m512i(r, a);
17277        let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17278        let e = _mm512_set1_epi16(0);
17279        assert_eq_m512i(r, e);
17280    }
17281
17282    #[simd_test(enable = "avx512bw")]
17283    unsafe fn test_mm512_maskz_srli_epi16() {
17284        let a = _mm512_set1_epi16(1 << 1);
17285        let r = _mm512_maskz_srli_epi16::<2>(0, a);
17286        assert_eq_m512i(r, _mm512_setzero_si512());
17287        let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17288        let e = _mm512_set1_epi16(0);
17289        assert_eq_m512i(r, e);
17290    }
17291
17292    #[simd_test(enable = "avx512bw,avx512vl")]
17293    unsafe fn test_mm256_mask_srli_epi16() {
17294        let a = _mm256_set1_epi16(1 << 1);
17295        let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
17296        assert_eq_m256i(r, a);
17297        let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
17298        let e = _mm256_set1_epi16(0);
17299        assert_eq_m256i(r, e);
17300    }
17301
17302    #[simd_test(enable = "avx512bw,avx512vl")]
17303    unsafe fn test_mm256_maskz_srli_epi16() {
17304        let a = _mm256_set1_epi16(1 << 1);
17305        let r = _mm256_maskz_srli_epi16::<2>(0, a);
17306        assert_eq_m256i(r, _mm256_setzero_si256());
17307        let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
17308        let e = _mm256_set1_epi16(0);
17309        assert_eq_m256i(r, e);
17310    }
17311
17312    #[simd_test(enable = "avx512bw,avx512vl")]
17313    unsafe fn test_mm_mask_srli_epi16() {
17314        let a = _mm_set1_epi16(1 << 1);
17315        let r = _mm_mask_srli_epi16::<2>(a, 0, a);
17316        assert_eq_m128i(r, a);
17317        let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
17318        let e = _mm_set1_epi16(0);
17319        assert_eq_m128i(r, e);
17320    }
17321
17322    #[simd_test(enable = "avx512bw,avx512vl")]
17323    unsafe fn test_mm_maskz_srli_epi16() {
17324        let a = _mm_set1_epi16(1 << 1);
17325        let r = _mm_maskz_srli_epi16::<2>(0, a);
17326        assert_eq_m128i(r, _mm_setzero_si128());
17327        let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
17328        let e = _mm_set1_epi16(0);
17329        assert_eq_m128i(r, e);
17330    }
17331
17332    #[simd_test(enable = "avx512bw")]
17333    unsafe fn test_mm512_srlv_epi16() {
17334        let a = _mm512_set1_epi16(1 << 1);
17335        let count = _mm512_set1_epi16(2);
17336        let r = _mm512_srlv_epi16(a, count);
17337        let e = _mm512_set1_epi16(0);
17338        assert_eq_m512i(r, e);
17339    }
17340
17341    #[simd_test(enable = "avx512bw")]
17342    unsafe fn test_mm512_mask_srlv_epi16() {
17343        let a = _mm512_set1_epi16(1 << 1);
17344        let count = _mm512_set1_epi16(2);
17345        let r = _mm512_mask_srlv_epi16(a, 0, a, count);
17346        assert_eq_m512i(r, a);
17347        let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17348        let e = _mm512_set1_epi16(0);
17349        assert_eq_m512i(r, e);
17350    }
17351
17352    #[simd_test(enable = "avx512bw")]
17353    unsafe fn test_mm512_maskz_srlv_epi16() {
17354        let a = _mm512_set1_epi16(1 << 1);
17355        let count = _mm512_set1_epi16(2);
17356        let r = _mm512_maskz_srlv_epi16(0, a, count);
17357        assert_eq_m512i(r, _mm512_setzero_si512());
17358        let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
17359        let e = _mm512_set1_epi16(0);
17360        assert_eq_m512i(r, e);
17361    }
17362
17363    #[simd_test(enable = "avx512bw,avx512vl")]
17364    unsafe fn test_mm256_srlv_epi16() {
17365        let a = _mm256_set1_epi16(1 << 1);
17366        let count = _mm256_set1_epi16(2);
17367        let r = _mm256_srlv_epi16(a, count);
17368        let e = _mm256_set1_epi16(0);
17369        assert_eq_m256i(r, e);
17370    }
17371
17372    #[simd_test(enable = "avx512bw,avx512vl")]
17373    unsafe fn test_mm256_mask_srlv_epi16() {
17374        let a = _mm256_set1_epi16(1 << 1);
17375        let count = _mm256_set1_epi16(2);
17376        let r = _mm256_mask_srlv_epi16(a, 0, a, count);
17377        assert_eq_m256i(r, a);
17378        let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
17379        let e = _mm256_set1_epi16(0);
17380        assert_eq_m256i(r, e);
17381    }
17382
17383    #[simd_test(enable = "avx512bw,avx512vl")]
17384    unsafe fn test_mm256_maskz_srlv_epi16() {
17385        let a = _mm256_set1_epi16(1 << 1);
17386        let count = _mm256_set1_epi16(2);
17387        let r = _mm256_maskz_srlv_epi16(0, a, count);
17388        assert_eq_m256i(r, _mm256_setzero_si256());
17389        let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
17390        let e = _mm256_set1_epi16(0);
17391        assert_eq_m256i(r, e);
17392    }
17393
17394    #[simd_test(enable = "avx512bw,avx512vl")]
17395    unsafe fn test_mm_srlv_epi16() {
17396        let a = _mm_set1_epi16(1 << 1);
17397        let count = _mm_set1_epi16(2);
17398        let r = _mm_srlv_epi16(a, count);
17399        let e = _mm_set1_epi16(0);
17400        assert_eq_m128i(r, e);
17401    }
17402
17403    #[simd_test(enable = "avx512bw,avx512vl")]
17404    unsafe fn test_mm_mask_srlv_epi16() {
17405        let a = _mm_set1_epi16(1 << 1);
17406        let count = _mm_set1_epi16(2);
17407        let r = _mm_mask_srlv_epi16(a, 0, a, count);
17408        assert_eq_m128i(r, a);
17409        let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
17410        let e = _mm_set1_epi16(0);
17411        assert_eq_m128i(r, e);
17412    }
17413
17414    #[simd_test(enable = "avx512bw,avx512vl")]
17415    unsafe fn test_mm_maskz_srlv_epi16() {
17416        let a = _mm_set1_epi16(1 << 1);
17417        let count = _mm_set1_epi16(2);
17418        let r = _mm_maskz_srlv_epi16(0, a, count);
17419        assert_eq_m128i(r, _mm_setzero_si128());
17420        let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
17421        let e = _mm_set1_epi16(0);
17422        assert_eq_m128i(r, e);
17423    }
17424
17425    #[simd_test(enable = "avx512bw")]
17426    unsafe fn test_mm512_sra_epi16() {
17427        let a = _mm512_set1_epi16(8);
17428        let count = _mm_set1_epi16(1);
17429        let r = _mm512_sra_epi16(a, count);
17430        let e = _mm512_set1_epi16(0);
17431        assert_eq_m512i(r, e);
17432    }
17433
17434    #[simd_test(enable = "avx512bw")]
17435    unsafe fn test_mm512_mask_sra_epi16() {
17436        let a = _mm512_set1_epi16(8);
17437        let count = _mm_set1_epi16(1);
17438        let r = _mm512_mask_sra_epi16(a, 0, a, count);
17439        assert_eq_m512i(r, a);
17440        let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17441        let e = _mm512_set1_epi16(0);
17442        assert_eq_m512i(r, e);
17443    }
17444
17445    #[simd_test(enable = "avx512bw")]
17446    unsafe fn test_mm512_maskz_sra_epi16() {
17447        let a = _mm512_set1_epi16(8);
17448        let count = _mm_set1_epi16(1);
17449        let r = _mm512_maskz_sra_epi16(0, a, count);
17450        assert_eq_m512i(r, _mm512_setzero_si512());
17451        let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
17452        let e = _mm512_set1_epi16(0);
17453        assert_eq_m512i(r, e);
17454    }
17455
17456    #[simd_test(enable = "avx512bw,avx512vl")]
17457    unsafe fn test_mm256_mask_sra_epi16() {
17458        let a = _mm256_set1_epi16(8);
17459        let count = _mm_set1_epi16(1);
17460        let r = _mm256_mask_sra_epi16(a, 0, a, count);
17461        assert_eq_m256i(r, a);
17462        let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
17463        let e = _mm256_set1_epi16(0);
17464        assert_eq_m256i(r, e);
17465    }
17466
17467    #[simd_test(enable = "avx512bw,avx512vl")]
17468    unsafe fn test_mm256_maskz_sra_epi16() {
17469        let a = _mm256_set1_epi16(8);
17470        let count = _mm_set1_epi16(1);
17471        let r = _mm256_maskz_sra_epi16(0, a, count);
17472        assert_eq_m256i(r, _mm256_setzero_si256());
17473        let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
17474        let e = _mm256_set1_epi16(0);
17475        assert_eq_m256i(r, e);
17476    }
17477
17478    #[simd_test(enable = "avx512bw,avx512vl")]
17479    unsafe fn test_mm_mask_sra_epi16() {
17480        let a = _mm_set1_epi16(8);
17481        let count = _mm_set1_epi16(1);
17482        let r = _mm_mask_sra_epi16(a, 0, a, count);
17483        assert_eq_m128i(r, a);
17484        let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
17485        let e = _mm_set1_epi16(0);
17486        assert_eq_m128i(r, e);
17487    }
17488
17489    #[simd_test(enable = "avx512bw,avx512vl")]
17490    unsafe fn test_mm_maskz_sra_epi16() {
17491        let a = _mm_set1_epi16(8);
17492        let count = _mm_set1_epi16(1);
17493        let r = _mm_maskz_sra_epi16(0, a, count);
17494        assert_eq_m128i(r, _mm_setzero_si128());
17495        let r = _mm_maskz_sra_epi16(0b11111111, a, count);
17496        let e = _mm_set1_epi16(0);
17497        assert_eq_m128i(r, e);
17498    }
17499
17500    #[simd_test(enable = "avx512bw")]
17501    unsafe fn test_mm512_srai_epi16() {
17502        let a = _mm512_set1_epi16(8);
17503        let r = _mm512_srai_epi16::<2>(a);
17504        let e = _mm512_set1_epi16(2);
17505        assert_eq_m512i(r, e);
17506    }
17507
17508    #[simd_test(enable = "avx512bw")]
17509    unsafe fn test_mm512_mask_srai_epi16() {
17510        let a = _mm512_set1_epi16(8);
17511        let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
17512        assert_eq_m512i(r, a);
17513        let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
17514        let e = _mm512_set1_epi16(2);
17515        assert_eq_m512i(r, e);
17516    }
17517
17518    #[simd_test(enable = "avx512bw")]
17519    unsafe fn test_mm512_maskz_srai_epi16() {
17520        let a = _mm512_set1_epi16(8);
17521        let r = _mm512_maskz_srai_epi16::<2>(0, a);
17522        assert_eq_m512i(r, _mm512_setzero_si512());
17523        let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
17524        let e = _mm512_set1_epi16(2);
17525        assert_eq_m512i(r, e);
17526    }
17527
17528    #[simd_test(enable = "avx512bw,avx512vl")]
17529    unsafe fn test_mm256_mask_srai_epi16() {
17530        let a = _mm256_set1_epi16(8);
17531        let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
17532        assert_eq_m256i(r, a);
17533        let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
17534        let e = _mm256_set1_epi16(2);
17535        assert_eq_m256i(r, e);
17536    }
17537
17538    #[simd_test(enable = "avx512bw,avx512vl")]
17539    unsafe fn test_mm256_maskz_srai_epi16() {
17540        let a = _mm256_set1_epi16(8);
17541        let r = _mm256_maskz_srai_epi16::<2>(0, a);
17542        assert_eq_m256i(r, _mm256_setzero_si256());
17543        let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
17544        let e = _mm256_set1_epi16(2);
17545        assert_eq_m256i(r, e);
17546    }
17547
17548    #[simd_test(enable = "avx512bw,avx512vl")]
17549    unsafe fn test_mm_mask_srai_epi16() {
17550        let a = _mm_set1_epi16(8);
17551        let r = _mm_mask_srai_epi16::<2>(a, 0, a);
17552        assert_eq_m128i(r, a);
17553        let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
17554        let e = _mm_set1_epi16(2);
17555        assert_eq_m128i(r, e);
17556    }
17557
17558    #[simd_test(enable = "avx512bw,avx512vl")]
17559    unsafe fn test_mm_maskz_srai_epi16() {
17560        let a = _mm_set1_epi16(8);
17561        let r = _mm_maskz_srai_epi16::<2>(0, a);
17562        assert_eq_m128i(r, _mm_setzero_si128());
17563        let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
17564        let e = _mm_set1_epi16(2);
17565        assert_eq_m128i(r, e);
17566    }
17567
17568    #[simd_test(enable = "avx512bw")]
17569    unsafe fn test_mm512_srav_epi16() {
17570        let a = _mm512_set1_epi16(8);
17571        let count = _mm512_set1_epi16(2);
17572        let r = _mm512_srav_epi16(a, count);
17573        let e = _mm512_set1_epi16(2);
17574        assert_eq_m512i(r, e);
17575    }
17576
17577    #[simd_test(enable = "avx512bw")]
17578    unsafe fn test_mm512_mask_srav_epi16() {
17579        let a = _mm512_set1_epi16(8);
17580        let count = _mm512_set1_epi16(2);
17581        let r = _mm512_mask_srav_epi16(a, 0, a, count);
17582        assert_eq_m512i(r, a);
17583        let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
17584        let e = _mm512_set1_epi16(2);
17585        assert_eq_m512i(r, e);
17586    }
17587
17588    #[simd_test(enable = "avx512bw")]
17589    unsafe fn test_mm512_maskz_srav_epi16() {
17590        let a = _mm512_set1_epi16(8);
17591        let count = _mm512_set1_epi16(2);
17592        let r = _mm512_maskz_srav_epi16(0, a, count);
17593        assert_eq_m512i(r, _mm512_setzero_si512());
17594        let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
17595        let e = _mm512_set1_epi16(2);
17596        assert_eq_m512i(r, e);
17597    }
17598
17599    #[simd_test(enable = "avx512bw,avx512vl")]
17600    unsafe fn test_mm256_srav_epi16() {
17601        let a = _mm256_set1_epi16(8);
17602        let count = _mm256_set1_epi16(2);
17603        let r = _mm256_srav_epi16(a, count);
17604        let e = _mm256_set1_epi16(2);
17605        assert_eq_m256i(r, e);
17606    }
17607
17608    #[simd_test(enable = "avx512bw,avx512vl")]
17609    unsafe fn test_mm256_mask_srav_epi16() {
17610        let a = _mm256_set1_epi16(8);
17611        let count = _mm256_set1_epi16(2);
17612        let r = _mm256_mask_srav_epi16(a, 0, a, count);
17613        assert_eq_m256i(r, a);
17614        let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
17615        let e = _mm256_set1_epi16(2);
17616        assert_eq_m256i(r, e);
17617    }
17618
17619    #[simd_test(enable = "avx512bw,avx512vl")]
17620    unsafe fn test_mm256_maskz_srav_epi16() {
17621        let a = _mm256_set1_epi16(8);
17622        let count = _mm256_set1_epi16(2);
17623        let r = _mm256_maskz_srav_epi16(0, a, count);
17624        assert_eq_m256i(r, _mm256_setzero_si256());
17625        let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
17626        let e = _mm256_set1_epi16(2);
17627        assert_eq_m256i(r, e);
17628    }
17629
17630    #[simd_test(enable = "avx512bw,avx512vl")]
17631    unsafe fn test_mm_srav_epi16() {
17632        let a = _mm_set1_epi16(8);
17633        let count = _mm_set1_epi16(2);
17634        let r = _mm_srav_epi16(a, count);
17635        let e = _mm_set1_epi16(2);
17636        assert_eq_m128i(r, e);
17637    }
17638
17639    #[simd_test(enable = "avx512bw,avx512vl")]
17640    unsafe fn test_mm_mask_srav_epi16() {
17641        let a = _mm_set1_epi16(8);
17642        let count = _mm_set1_epi16(2);
17643        let r = _mm_mask_srav_epi16(a, 0, a, count);
17644        assert_eq_m128i(r, a);
17645        let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
17646        let e = _mm_set1_epi16(2);
17647        assert_eq_m128i(r, e);
17648    }
17649
17650    #[simd_test(enable = "avx512bw,avx512vl")]
17651    unsafe fn test_mm_maskz_srav_epi16() {
17652        let a = _mm_set1_epi16(8);
17653        let count = _mm_set1_epi16(2);
17654        let r = _mm_maskz_srav_epi16(0, a, count);
17655        assert_eq_m128i(r, _mm_setzero_si128());
17656        let r = _mm_maskz_srav_epi16(0b11111111, a, count);
17657        let e = _mm_set1_epi16(2);
17658        assert_eq_m128i(r, e);
17659    }
17660
17661    #[simd_test(enable = "avx512bw")]
17662    unsafe fn test_mm512_permutex2var_epi16() {
17663        #[rustfmt::skip]
17664        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17665                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17666        #[rustfmt::skip]
17667        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
17668                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
17669        let b = _mm512_set1_epi16(100);
17670        let r = _mm512_permutex2var_epi16(a, idx, b);
17671        #[rustfmt::skip]
17672        let e = _mm512_set_epi16(
17673            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
17674            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
17675        );
17676        assert_eq_m512i(r, e);
17677    }
17678
17679    #[simd_test(enable = "avx512bw")]
17680    unsafe fn test_mm512_mask_permutex2var_epi16() {
17681        #[rustfmt::skip]
17682        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17683                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17684        #[rustfmt::skip]
17685        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
17686                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
17687        let b = _mm512_set1_epi16(100);
17688        let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
17689        assert_eq_m512i(r, a);
17690        let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
17691        #[rustfmt::skip]
17692        let e = _mm512_set_epi16(
17693            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
17694            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
17695        );
17696        assert_eq_m512i(r, e);
17697    }
17698
17699    #[simd_test(enable = "avx512bw")]
17700    unsafe fn test_mm512_maskz_permutex2var_epi16() {
17701        #[rustfmt::skip]
17702        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17703                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17704        #[rustfmt::skip]
17705        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
17706                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
17707        let b = _mm512_set1_epi16(100);
17708        let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
17709        assert_eq_m512i(r, _mm512_setzero_si512());
17710        let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
17711        #[rustfmt::skip]
17712        let e = _mm512_set_epi16(
17713            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
17714            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
17715        );
17716        assert_eq_m512i(r, e);
17717    }
17718
17719    #[simd_test(enable = "avx512bw")]
17720    unsafe fn test_mm512_mask2_permutex2var_epi16() {
17721        #[rustfmt::skip]
17722        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17723                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17724        #[rustfmt::skip]
17725        let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
17726                                   9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
17727        let b = _mm512_set1_epi16(100);
17728        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
17729        assert_eq_m512i(r, idx);
17730        let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
17731        #[rustfmt::skip]
17732        let e = _mm512_set_epi16(
17733            30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
17734            22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
17735        );
17736        assert_eq_m512i(r, e);
17737    }
17738
17739    #[simd_test(enable = "avx512bw,avx512vl")]
17740    unsafe fn test_mm256_permutex2var_epi16() {
17741        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17742        #[rustfmt::skip]
17743        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
17744        let b = _mm256_set1_epi16(100);
17745        let r = _mm256_permutex2var_epi16(a, idx, b);
17746        let e = _mm256_set_epi16(
17747            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
17748        );
17749        assert_eq_m256i(r, e);
17750    }
17751
17752    #[simd_test(enable = "avx512bw,avx512vl")]
17753    unsafe fn test_mm256_mask_permutex2var_epi16() {
17754        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17755        #[rustfmt::skip]
17756        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
17757        let b = _mm256_set1_epi16(100);
17758        let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
17759        assert_eq_m256i(r, a);
17760        let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
17761        let e = _mm256_set_epi16(
17762            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
17763        );
17764        assert_eq_m256i(r, e);
17765    }
17766
17767    #[simd_test(enable = "avx512bw,avx512vl")]
17768    unsafe fn test_mm256_maskz_permutex2var_epi16() {
17769        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17770        #[rustfmt::skip]
17771        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
17772        let b = _mm256_set1_epi16(100);
17773        let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
17774        assert_eq_m256i(r, _mm256_setzero_si256());
17775        let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
17776        let e = _mm256_set_epi16(
17777            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
17778        );
17779        assert_eq_m256i(r, e);
17780    }
17781
17782    #[simd_test(enable = "avx512bw,avx512vl")]
17783    unsafe fn test_mm256_mask2_permutex2var_epi16() {
17784        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17785        #[rustfmt::skip]
17786        let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
17787        let b = _mm256_set1_epi16(100);
17788        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
17789        assert_eq_m256i(r, idx);
17790        let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
17791        #[rustfmt::skip]
17792        let e = _mm256_set_epi16(
17793            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
17794        );
17795        assert_eq_m256i(r, e);
17796    }
17797
17798    #[simd_test(enable = "avx512bw,avx512vl")]
17799    unsafe fn test_mm_permutex2var_epi16() {
17800        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17801        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
17802        let b = _mm_set1_epi16(100);
17803        let r = _mm_permutex2var_epi16(a, idx, b);
17804        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
17805        assert_eq_m128i(r, e);
17806    }
17807
17808    #[simd_test(enable = "avx512bw,avx512vl")]
17809    unsafe fn test_mm_mask_permutex2var_epi16() {
17810        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17811        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
17812        let b = _mm_set1_epi16(100);
17813        let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
17814        assert_eq_m128i(r, a);
17815        let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
17816        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
17817        assert_eq_m128i(r, e);
17818    }
17819
17820    #[simd_test(enable = "avx512bw,avx512vl")]
17821    unsafe fn test_mm_maskz_permutex2var_epi16() {
17822        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17823        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
17824        let b = _mm_set1_epi16(100);
17825        let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
17826        assert_eq_m128i(r, _mm_setzero_si128());
17827        let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
17828        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
17829        assert_eq_m128i(r, e);
17830    }
17831
17832    #[simd_test(enable = "avx512bw,avx512vl")]
17833    unsafe fn test_mm_mask2_permutex2var_epi16() {
17834        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17835        let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
17836        let b = _mm_set1_epi16(100);
17837        let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
17838        assert_eq_m128i(r, idx);
17839        let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
17840        let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
17841        assert_eq_m128i(r, e);
17842    }
17843
17844    #[simd_test(enable = "avx512bw")]
17845    unsafe fn test_mm512_permutexvar_epi16() {
17846        let idx = _mm512_set1_epi16(1);
17847        #[rustfmt::skip]
17848        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17849                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17850        let r = _mm512_permutexvar_epi16(idx, a);
17851        let e = _mm512_set1_epi16(30);
17852        assert_eq_m512i(r, e);
17853    }
17854
17855    #[simd_test(enable = "avx512bw")]
17856    unsafe fn test_mm512_mask_permutexvar_epi16() {
17857        let idx = _mm512_set1_epi16(1);
17858        #[rustfmt::skip]
17859        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17860                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17861        let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
17862        assert_eq_m512i(r, a);
17863        let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
17864        let e = _mm512_set1_epi16(30);
17865        assert_eq_m512i(r, e);
17866    }
17867
17868    #[simd_test(enable = "avx512bw")]
17869    unsafe fn test_mm512_maskz_permutexvar_epi16() {
17870        let idx = _mm512_set1_epi16(1);
17871        #[rustfmt::skip]
17872        let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
17873                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17874        let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
17875        assert_eq_m512i(r, _mm512_setzero_si512());
17876        let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
17877        let e = _mm512_set1_epi16(30);
17878        assert_eq_m512i(r, e);
17879    }
17880
17881    #[simd_test(enable = "avx512bw,avx512vl")]
17882    unsafe fn test_mm256_permutexvar_epi16() {
17883        let idx = _mm256_set1_epi16(1);
17884        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17885        let r = _mm256_permutexvar_epi16(idx, a);
17886        let e = _mm256_set1_epi16(14);
17887        assert_eq_m256i(r, e);
17888    }
17889
17890    #[simd_test(enable = "avx512bw,avx512vl")]
17891    unsafe fn test_mm256_mask_permutexvar_epi16() {
17892        let idx = _mm256_set1_epi16(1);
17893        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17894        let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
17895        assert_eq_m256i(r, a);
17896        let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
17897        let e = _mm256_set1_epi16(14);
17898        assert_eq_m256i(r, e);
17899    }
17900
17901    #[simd_test(enable = "avx512bw,avx512vl")]
17902    unsafe fn test_mm256_maskz_permutexvar_epi16() {
17903        let idx = _mm256_set1_epi16(1);
17904        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17905        let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
17906        assert_eq_m256i(r, _mm256_setzero_si256());
17907        let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
17908        let e = _mm256_set1_epi16(14);
17909        assert_eq_m256i(r, e);
17910    }
17911
17912    #[simd_test(enable = "avx512bw,avx512vl")]
17913    unsafe fn test_mm_permutexvar_epi16() {
17914        let idx = _mm_set1_epi16(1);
17915        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17916        let r = _mm_permutexvar_epi16(idx, a);
17917        let e = _mm_set1_epi16(6);
17918        assert_eq_m128i(r, e);
17919    }
17920
17921    #[simd_test(enable = "avx512bw,avx512vl")]
17922    unsafe fn test_mm_mask_permutexvar_epi16() {
17923        let idx = _mm_set1_epi16(1);
17924        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17925        let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
17926        assert_eq_m128i(r, a);
17927        let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
17928        let e = _mm_set1_epi16(6);
17929        assert_eq_m128i(r, e);
17930    }
17931
17932    #[simd_test(enable = "avx512bw,avx512vl")]
17933    unsafe fn test_mm_maskz_permutexvar_epi16() {
17934        let idx = _mm_set1_epi16(1);
17935        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17936        let r = _mm_maskz_permutexvar_epi16(0, idx, a);
17937        assert_eq_m128i(r, _mm_setzero_si128());
17938        let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
17939        let e = _mm_set1_epi16(6);
17940        assert_eq_m128i(r, e);
17941    }
17942
17943    #[simd_test(enable = "avx512bw")]
17944    unsafe fn test_mm512_mask_blend_epi16() {
17945        let a = _mm512_set1_epi16(1);
17946        let b = _mm512_set1_epi16(2);
17947        let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
17948        #[rustfmt::skip]
17949        let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
17950                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
17951        assert_eq_m512i(r, e);
17952    }
17953
17954    #[simd_test(enable = "avx512bw,avx512vl")]
17955    unsafe fn test_mm256_mask_blend_epi16() {
17956        let a = _mm256_set1_epi16(1);
17957        let b = _mm256_set1_epi16(2);
17958        let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
17959        let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
17960        assert_eq_m256i(r, e);
17961    }
17962
17963    #[simd_test(enable = "avx512bw,avx512vl")]
17964    unsafe fn test_mm_mask_blend_epi16() {
17965        let a = _mm_set1_epi16(1);
17966        let b = _mm_set1_epi16(2);
17967        let r = _mm_mask_blend_epi16(0b11110000, a, b);
17968        let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
17969        assert_eq_m128i(r, e);
17970    }
17971
17972    #[simd_test(enable = "avx512bw")]
17973    unsafe fn test_mm512_mask_blend_epi8() {
17974        let a = _mm512_set1_epi8(1);
17975        let b = _mm512_set1_epi8(2);
17976        let r = _mm512_mask_blend_epi8(
17977            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
17978            a,
17979            b,
17980        );
17981        #[rustfmt::skip]
17982        let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
17983                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
17984                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
17985                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
17986        assert_eq_m512i(r, e);
17987    }
17988
17989    #[simd_test(enable = "avx512bw,avx512vl")]
17990    unsafe fn test_mm256_mask_blend_epi8() {
17991        let a = _mm256_set1_epi8(1);
17992        let b = _mm256_set1_epi8(2);
17993        let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
17994        #[rustfmt::skip]
17995        let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
17996                                2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
17997        assert_eq_m256i(r, e);
17998    }
17999
18000    #[simd_test(enable = "avx512bw,avx512vl")]
18001    unsafe fn test_mm_mask_blend_epi8() {
18002        let a = _mm_set1_epi8(1);
18003        let b = _mm_set1_epi8(2);
18004        let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
18005        let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
18006        assert_eq_m128i(r, e);
18007    }
18008
18009    #[simd_test(enable = "avx512bw")]
18010    unsafe fn test_mm512_broadcastw_epi16() {
18011        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18012        let r = _mm512_broadcastw_epi16(a);
18013        let e = _mm512_set1_epi16(24);
18014        assert_eq_m512i(r, e);
18015    }
18016
18017    #[simd_test(enable = "avx512bw")]
18018    unsafe fn test_mm512_mask_broadcastw_epi16() {
18019        let src = _mm512_set1_epi16(1);
18020        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18021        let r = _mm512_mask_broadcastw_epi16(src, 0, a);
18022        assert_eq_m512i(r, src);
18023        let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18024        let e = _mm512_set1_epi16(24);
18025        assert_eq_m512i(r, e);
18026    }
18027
18028    #[simd_test(enable = "avx512bw")]
18029    unsafe fn test_mm512_maskz_broadcastw_epi16() {
18030        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18031        let r = _mm512_maskz_broadcastw_epi16(0, a);
18032        assert_eq_m512i(r, _mm512_setzero_si512());
18033        let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
18034        let e = _mm512_set1_epi16(24);
18035        assert_eq_m512i(r, e);
18036    }
18037
18038    #[simd_test(enable = "avx512bw,avx512vl")]
18039    unsafe fn test_mm256_mask_broadcastw_epi16() {
18040        let src = _mm256_set1_epi16(1);
18041        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18042        let r = _mm256_mask_broadcastw_epi16(src, 0, a);
18043        assert_eq_m256i(r, src);
18044        let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
18045        let e = _mm256_set1_epi16(24);
18046        assert_eq_m256i(r, e);
18047    }
18048
18049    #[simd_test(enable = "avx512bw,avx512vl")]
18050    unsafe fn test_mm256_maskz_broadcastw_epi16() {
18051        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18052        let r = _mm256_maskz_broadcastw_epi16(0, a);
18053        assert_eq_m256i(r, _mm256_setzero_si256());
18054        let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
18055        let e = _mm256_set1_epi16(24);
18056        assert_eq_m256i(r, e);
18057    }
18058
18059    #[simd_test(enable = "avx512bw,avx512vl")]
18060    unsafe fn test_mm_mask_broadcastw_epi16() {
18061        let src = _mm_set1_epi16(1);
18062        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18063        let r = _mm_mask_broadcastw_epi16(src, 0, a);
18064        assert_eq_m128i(r, src);
18065        let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
18066        let e = _mm_set1_epi16(24);
18067        assert_eq_m128i(r, e);
18068    }
18069
18070    #[simd_test(enable = "avx512bw,avx512vl")]
18071    unsafe fn test_mm_maskz_broadcastw_epi16() {
18072        let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
18073        let r = _mm_maskz_broadcastw_epi16(0, a);
18074        assert_eq_m128i(r, _mm_setzero_si128());
18075        let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
18076        let e = _mm_set1_epi16(24);
18077        assert_eq_m128i(r, e);
18078    }
18079
18080    #[simd_test(enable = "avx512bw")]
18081    unsafe fn test_mm512_broadcastb_epi8() {
18082        let a = _mm_set_epi8(
18083            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18084        );
18085        let r = _mm512_broadcastb_epi8(a);
18086        let e = _mm512_set1_epi8(32);
18087        assert_eq_m512i(r, e);
18088    }
18089
18090    #[simd_test(enable = "avx512bw")]
18091    unsafe fn test_mm512_mask_broadcastb_epi8() {
18092        let src = _mm512_set1_epi8(1);
18093        let a = _mm_set_epi8(
18094            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18095        );
18096        let r = _mm512_mask_broadcastb_epi8(src, 0, a);
18097        assert_eq_m512i(r, src);
18098        let r = _mm512_mask_broadcastb_epi8(
18099            src,
18100            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18101            a,
18102        );
18103        let e = _mm512_set1_epi8(32);
18104        assert_eq_m512i(r, e);
18105    }
18106
18107    #[simd_test(enable = "avx512bw")]
18108    unsafe fn test_mm512_maskz_broadcastb_epi8() {
18109        let a = _mm_set_epi8(
18110            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18111        );
18112        let r = _mm512_maskz_broadcastb_epi8(0, a);
18113        assert_eq_m512i(r, _mm512_setzero_si512());
18114        let r = _mm512_maskz_broadcastb_epi8(
18115            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18116            a,
18117        );
18118        let e = _mm512_set1_epi8(32);
18119        assert_eq_m512i(r, e);
18120    }
18121
18122    #[simd_test(enable = "avx512bw,avx512vl")]
18123    unsafe fn test_mm256_mask_broadcastb_epi8() {
18124        let src = _mm256_set1_epi8(1);
18125        let a = _mm_set_epi8(
18126            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18127        );
18128        let r = _mm256_mask_broadcastb_epi8(src, 0, a);
18129        assert_eq_m256i(r, src);
18130        let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18131        let e = _mm256_set1_epi8(32);
18132        assert_eq_m256i(r, e);
18133    }
18134
18135    #[simd_test(enable = "avx512bw,avx512vl")]
18136    unsafe fn test_mm256_maskz_broadcastb_epi8() {
18137        let a = _mm_set_epi8(
18138            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18139        );
18140        let r = _mm256_maskz_broadcastb_epi8(0, a);
18141        assert_eq_m256i(r, _mm256_setzero_si256());
18142        let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
18143        let e = _mm256_set1_epi8(32);
18144        assert_eq_m256i(r, e);
18145    }
18146
18147    #[simd_test(enable = "avx512bw,avx512vl")]
18148    unsafe fn test_mm_mask_broadcastb_epi8() {
18149        let src = _mm_set1_epi8(1);
18150        let a = _mm_set_epi8(
18151            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18152        );
18153        let r = _mm_mask_broadcastb_epi8(src, 0, a);
18154        assert_eq_m128i(r, src);
18155        let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
18156        let e = _mm_set1_epi8(32);
18157        assert_eq_m128i(r, e);
18158    }
18159
18160    #[simd_test(enable = "avx512bw,avx512vl")]
18161    unsafe fn test_mm_maskz_broadcastb_epi8() {
18162        let a = _mm_set_epi8(
18163            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18164        );
18165        let r = _mm_maskz_broadcastb_epi8(0, a);
18166        assert_eq_m128i(r, _mm_setzero_si128());
18167        let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
18168        let e = _mm_set1_epi8(32);
18169        assert_eq_m128i(r, e);
18170    }
18171
18172    #[simd_test(enable = "avx512bw")]
18173    unsafe fn test_mm512_unpackhi_epi16() {
18174        #[rustfmt::skip]
18175        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18176                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18177        #[rustfmt::skip]
18178        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18179                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18180        let r = _mm512_unpackhi_epi16(a, b);
18181        #[rustfmt::skip]
18182        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18183                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18184        assert_eq_m512i(r, e);
18185    }
18186
18187    #[simd_test(enable = "avx512bw")]
18188    unsafe fn test_mm512_mask_unpackhi_epi16() {
18189        #[rustfmt::skip]
18190        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18191                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18192        #[rustfmt::skip]
18193        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18194                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18195        let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
18196        assert_eq_m512i(r, a);
18197        let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18198        #[rustfmt::skip]
18199        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18200                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18201        assert_eq_m512i(r, e);
18202    }
18203
18204    #[simd_test(enable = "avx512bw")]
18205    unsafe fn test_mm512_maskz_unpackhi_epi16() {
18206        #[rustfmt::skip]
18207        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18208                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18209        #[rustfmt::skip]
18210        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18211                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18212        let r = _mm512_maskz_unpackhi_epi16(0, a, b);
18213        assert_eq_m512i(r, _mm512_setzero_si512());
18214        let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
18215        #[rustfmt::skip]
18216        let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
18217                                 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
18218        assert_eq_m512i(r, e);
18219    }
18220
18221    #[simd_test(enable = "avx512bw,avx512vl")]
18222    unsafe fn test_mm256_mask_unpackhi_epi16() {
18223        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18224        let b = _mm256_set_epi16(
18225            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18226        );
18227        let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
18228        assert_eq_m256i(r, a);
18229        let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
18230        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18231        assert_eq_m256i(r, e);
18232    }
18233
18234    #[simd_test(enable = "avx512bw,avx512vl")]
18235    unsafe fn test_mm256_maskz_unpackhi_epi16() {
18236        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18237        let b = _mm256_set_epi16(
18238            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18239        );
18240        let r = _mm256_maskz_unpackhi_epi16(0, a, b);
18241        assert_eq_m256i(r, _mm256_setzero_si256());
18242        let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
18243        let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
18244        assert_eq_m256i(r, e);
18245    }
18246
18247    #[simd_test(enable = "avx512bw,avx512vl")]
18248    unsafe fn test_mm_mask_unpackhi_epi16() {
18249        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18250        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18251        let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
18252        assert_eq_m128i(r, a);
18253        let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
18254        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18255        assert_eq_m128i(r, e);
18256    }
18257
18258    #[simd_test(enable = "avx512bw,avx512vl")]
18259    unsafe fn test_mm_maskz_unpackhi_epi16() {
18260        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18261        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18262        let r = _mm_maskz_unpackhi_epi16(0, a, b);
18263        assert_eq_m128i(r, _mm_setzero_si128());
18264        let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
18265        let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
18266        assert_eq_m128i(r, e);
18267    }
18268
18269    #[simd_test(enable = "avx512bw")]
18270    unsafe fn test_mm512_unpackhi_epi8() {
18271        #[rustfmt::skip]
18272        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18273                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18274                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18275                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18276        #[rustfmt::skip]
18277        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18278                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18279                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18280                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18281        let r = _mm512_unpackhi_epi8(a, b);
18282        #[rustfmt::skip]
18283        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18284                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18285                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18286                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18287        assert_eq_m512i(r, e);
18288    }
18289
18290    #[simd_test(enable = "avx512bw")]
18291    unsafe fn test_mm512_mask_unpackhi_epi8() {
18292        #[rustfmt::skip]
18293        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18294                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18295                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18296                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18297        #[rustfmt::skip]
18298        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18299                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18300                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18301                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18302        let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
18303        assert_eq_m512i(r, a);
18304        let r = _mm512_mask_unpackhi_epi8(
18305            a,
18306            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18307            a,
18308            b,
18309        );
18310        #[rustfmt::skip]
18311        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18312                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18313                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18314                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18315        assert_eq_m512i(r, e);
18316    }
18317
18318    #[simd_test(enable = "avx512bw")]
18319    unsafe fn test_mm512_maskz_unpackhi_epi8() {
18320        #[rustfmt::skip]
18321        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18322                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18323                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18324                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18325        #[rustfmt::skip]
18326        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18327                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18328                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18329                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18330        let r = _mm512_maskz_unpackhi_epi8(0, a, b);
18331        assert_eq_m512i(r, _mm512_setzero_si512());
18332        let r = _mm512_maskz_unpackhi_epi8(
18333            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18334            a,
18335            b,
18336        );
18337        #[rustfmt::skip]
18338        let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18339                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
18340                                97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
18341                                113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
18342        assert_eq_m512i(r, e);
18343    }
18344
18345    #[simd_test(enable = "avx512bw,avx512vl")]
18346    unsafe fn test_mm256_mask_unpackhi_epi8() {
18347        #[rustfmt::skip]
18348        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18349                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18350        #[rustfmt::skip]
18351        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18352                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18353        let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
18354        assert_eq_m256i(r, a);
18355        let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18356        #[rustfmt::skip]
18357        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18358                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18359        assert_eq_m256i(r, e);
18360    }
18361
18362    #[simd_test(enable = "avx512bw,avx512vl")]
18363    unsafe fn test_mm256_maskz_unpackhi_epi8() {
18364        #[rustfmt::skip]
18365        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18366                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18367        #[rustfmt::skip]
18368        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18369                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18370        let r = _mm256_maskz_unpackhi_epi8(0, a, b);
18371        assert_eq_m256i(r, _mm256_setzero_si256());
18372        let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
18373        #[rustfmt::skip]
18374        let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
18375                                81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
18376        assert_eq_m256i(r, e);
18377    }
18378
18379    #[simd_test(enable = "avx512bw,avx512vl")]
18380    unsafe fn test_mm_mask_unpackhi_epi8() {
18381        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18382        let b = _mm_set_epi8(
18383            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18384        );
18385        let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
18386        assert_eq_m128i(r, a);
18387        let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
18388        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18389        assert_eq_m128i(r, e);
18390    }
18391
18392    #[simd_test(enable = "avx512bw,avx512vl")]
18393    unsafe fn test_mm_maskz_unpackhi_epi8() {
18394        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18395        let b = _mm_set_epi8(
18396            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18397        );
18398        let r = _mm_maskz_unpackhi_epi8(0, a, b);
18399        assert_eq_m128i(r, _mm_setzero_si128());
18400        let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
18401        let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
18402        assert_eq_m128i(r, e);
18403    }
18404
18405    #[simd_test(enable = "avx512bw")]
18406    unsafe fn test_mm512_unpacklo_epi16() {
18407        #[rustfmt::skip]
18408        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18409                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18410        #[rustfmt::skip]
18411        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18412                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18413        let r = _mm512_unpacklo_epi16(a, b);
18414        #[rustfmt::skip]
18415        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18416                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18417        assert_eq_m512i(r, e);
18418    }
18419
18420    #[simd_test(enable = "avx512bw")]
18421    unsafe fn test_mm512_mask_unpacklo_epi16() {
18422        #[rustfmt::skip]
18423        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18424                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18425        #[rustfmt::skip]
18426        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18427                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18428        let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
18429        assert_eq_m512i(r, a);
18430        let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
18431        #[rustfmt::skip]
18432        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18433                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18434        assert_eq_m512i(r, e);
18435    }
18436
18437    #[simd_test(enable = "avx512bw")]
18438    unsafe fn test_mm512_maskz_unpacklo_epi16() {
18439        #[rustfmt::skip]
18440        let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18441                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18442        #[rustfmt::skip]
18443        let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18444                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18445        let r = _mm512_maskz_unpacklo_epi16(0, a, b);
18446        assert_eq_m512i(r, _mm512_setzero_si512());
18447        let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
18448        #[rustfmt::skip]
18449        let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
18450                                 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
18451        assert_eq_m512i(r, e);
18452    }
18453
18454    #[simd_test(enable = "avx512bw,avx512vl")]
18455    unsafe fn test_mm256_mask_unpacklo_epi16() {
18456        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18457        let b = _mm256_set_epi16(
18458            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18459        );
18460        let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
18461        assert_eq_m256i(r, a);
18462        let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
18463        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18464        assert_eq_m256i(r, e);
18465    }
18466
18467    #[simd_test(enable = "avx512bw,avx512vl")]
18468    unsafe fn test_mm256_maskz_unpacklo_epi16() {
18469        let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18470        let b = _mm256_set_epi16(
18471            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18472        );
18473        let r = _mm256_maskz_unpacklo_epi16(0, a, b);
18474        assert_eq_m256i(r, _mm256_setzero_si256());
18475        let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
18476        let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
18477        assert_eq_m256i(r, e);
18478    }
18479
18480    #[simd_test(enable = "avx512bw,avx512vl")]
18481    unsafe fn test_mm_mask_unpacklo_epi16() {
18482        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18483        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18484        let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
18485        assert_eq_m128i(r, a);
18486        let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
18487        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18488        assert_eq_m128i(r, e);
18489    }
18490
18491    #[simd_test(enable = "avx512bw,avx512vl")]
18492    unsafe fn test_mm_maskz_unpacklo_epi16() {
18493        let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
18494        let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
18495        let r = _mm_maskz_unpacklo_epi16(0, a, b);
18496        assert_eq_m128i(r, _mm_setzero_si128());
18497        let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
18498        let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
18499        assert_eq_m128i(r, e);
18500    }
18501
18502    #[simd_test(enable = "avx512bw")]
18503    unsafe fn test_mm512_unpacklo_epi8() {
18504        #[rustfmt::skip]
18505        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18506                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18507                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18508                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18509        #[rustfmt::skip]
18510        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18511                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18512                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18513                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18514        let r = _mm512_unpacklo_epi8(a, b);
18515        #[rustfmt::skip]
18516        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18517                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18518                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18519                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18520        assert_eq_m512i(r, e);
18521    }
18522
18523    #[simd_test(enable = "avx512bw")]
18524    unsafe fn test_mm512_mask_unpacklo_epi8() {
18525        #[rustfmt::skip]
18526        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18527                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18528                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18529                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18530        #[rustfmt::skip]
18531        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18532                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18533                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18534                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18535        let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
18536        assert_eq_m512i(r, a);
18537        let r = _mm512_mask_unpacklo_epi8(
18538            a,
18539            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18540            a,
18541            b,
18542        );
18543        #[rustfmt::skip]
18544        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18545                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18546                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18547                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18548        assert_eq_m512i(r, e);
18549    }
18550
18551    #[simd_test(enable = "avx512bw")]
18552    unsafe fn test_mm512_maskz_unpacklo_epi8() {
18553        #[rustfmt::skip]
18554        let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18555                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18556                                33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18557                                49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
18558        #[rustfmt::skip]
18559        let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18560                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
18561                                97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
18562                                113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
18563        let r = _mm512_maskz_unpacklo_epi8(0, a, b);
18564        assert_eq_m512i(r, _mm512_setzero_si512());
18565        let r = _mm512_maskz_unpacklo_epi8(
18566            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18567            a,
18568            b,
18569        );
18570        #[rustfmt::skip]
18571        let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18572                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
18573                                105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
18574                                121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
18575        assert_eq_m512i(r, e);
18576    }
18577
18578    #[simd_test(enable = "avx512bw,avx512vl")]
18579    unsafe fn test_mm256_mask_unpacklo_epi8() {
18580        #[rustfmt::skip]
18581        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18582                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18583        #[rustfmt::skip]
18584        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18585                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18586        let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
18587        assert_eq_m256i(r, a);
18588        let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
18589        #[rustfmt::skip]
18590        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18591                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18592        assert_eq_m256i(r, e);
18593    }
18594
18595    #[simd_test(enable = "avx512bw,avx512vl")]
18596    unsafe fn test_mm256_maskz_unpacklo_epi8() {
18597        #[rustfmt::skip]
18598        let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
18599                                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
18600        #[rustfmt::skip]
18601        let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
18602                                81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
18603        let r = _mm256_maskz_unpacklo_epi8(0, a, b);
18604        assert_eq_m256i(r, _mm256_setzero_si256());
18605        let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
18606        #[rustfmt::skip]
18607        let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
18608                                89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
18609        assert_eq_m256i(r, e);
18610    }
18611
18612    #[simd_test(enable = "avx512bw,avx512vl")]
18613    unsafe fn test_mm_mask_unpacklo_epi8() {
18614        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18615        let b = _mm_set_epi8(
18616            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18617        );
18618        let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
18619        assert_eq_m128i(r, a);
18620        let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
18621        let e = _mm_set_epi8(
18622            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
18623        );
18624        assert_eq_m128i(r, e);
18625    }
18626
18627    #[simd_test(enable = "avx512bw,avx512vl")]
18628    unsafe fn test_mm_maskz_unpacklo_epi8() {
18629        let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
18630        let b = _mm_set_epi8(
18631            65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
18632        );
18633        let r = _mm_maskz_unpacklo_epi8(0, a, b);
18634        assert_eq_m128i(r, _mm_setzero_si128());
18635        let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
18636        let e = _mm_set_epi8(
18637            73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
18638        );
18639        assert_eq_m128i(r, e);
18640    }
18641
18642    #[simd_test(enable = "avx512bw")]
18643    unsafe fn test_mm512_mask_mov_epi16() {
18644        let src = _mm512_set1_epi16(1);
18645        let a = _mm512_set1_epi16(2);
18646        let r = _mm512_mask_mov_epi16(src, 0, a);
18647        assert_eq_m512i(r, src);
18648        let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18649        assert_eq_m512i(r, a);
18650    }
18651
18652    #[simd_test(enable = "avx512bw")]
18653    unsafe fn test_mm512_maskz_mov_epi16() {
18654        let a = _mm512_set1_epi16(2);
18655        let r = _mm512_maskz_mov_epi16(0, a);
18656        assert_eq_m512i(r, _mm512_setzero_si512());
18657        let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
18658        assert_eq_m512i(r, a);
18659    }
18660
18661    #[simd_test(enable = "avx512bw,avx512vl")]
18662    unsafe fn test_mm256_mask_mov_epi16() {
18663        let src = _mm256_set1_epi16(1);
18664        let a = _mm256_set1_epi16(2);
18665        let r = _mm256_mask_mov_epi16(src, 0, a);
18666        assert_eq_m256i(r, src);
18667        let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
18668        assert_eq_m256i(r, a);
18669    }
18670
18671    #[simd_test(enable = "avx512bw,avx512vl")]
18672    unsafe fn test_mm256_maskz_mov_epi16() {
18673        let a = _mm256_set1_epi16(2);
18674        let r = _mm256_maskz_mov_epi16(0, a);
18675        assert_eq_m256i(r, _mm256_setzero_si256());
18676        let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
18677        assert_eq_m256i(r, a);
18678    }
18679
18680    #[simd_test(enable = "avx512bw,avx512vl")]
18681    unsafe fn test_mm_mask_mov_epi16() {
18682        let src = _mm_set1_epi16(1);
18683        let a = _mm_set1_epi16(2);
18684        let r = _mm_mask_mov_epi16(src, 0, a);
18685        assert_eq_m128i(r, src);
18686        let r = _mm_mask_mov_epi16(src, 0b11111111, a);
18687        assert_eq_m128i(r, a);
18688    }
18689
18690    #[simd_test(enable = "avx512bw,avx512vl")]
18691    unsafe fn test_mm_maskz_mov_epi16() {
18692        let a = _mm_set1_epi16(2);
18693        let r = _mm_maskz_mov_epi16(0, a);
18694        assert_eq_m128i(r, _mm_setzero_si128());
18695        let r = _mm_maskz_mov_epi16(0b11111111, a);
18696        assert_eq_m128i(r, a);
18697    }
18698
18699    #[simd_test(enable = "avx512bw")]
18700    unsafe fn test_mm512_mask_mov_epi8() {
18701        let src = _mm512_set1_epi8(1);
18702        let a = _mm512_set1_epi8(2);
18703        let r = _mm512_mask_mov_epi8(src, 0, a);
18704        assert_eq_m512i(r, src);
18705        let r = _mm512_mask_mov_epi8(
18706            src,
18707            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18708            a,
18709        );
18710        assert_eq_m512i(r, a);
18711    }
18712
18713    #[simd_test(enable = "avx512bw")]
18714    unsafe fn test_mm512_maskz_mov_epi8() {
18715        let a = _mm512_set1_epi8(2);
18716        let r = _mm512_maskz_mov_epi8(0, a);
18717        assert_eq_m512i(r, _mm512_setzero_si512());
18718        let r = _mm512_maskz_mov_epi8(
18719            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18720            a,
18721        );
18722        assert_eq_m512i(r, a);
18723    }
18724
18725    #[simd_test(enable = "avx512bw,avx512vl")]
18726    unsafe fn test_mm256_mask_mov_epi8() {
18727        let src = _mm256_set1_epi8(1);
18728        let a = _mm256_set1_epi8(2);
18729        let r = _mm256_mask_mov_epi8(src, 0, a);
18730        assert_eq_m256i(r, src);
18731        let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18732        assert_eq_m256i(r, a);
18733    }
18734
18735    #[simd_test(enable = "avx512bw,avx512vl")]
18736    unsafe fn test_mm256_maskz_mov_epi8() {
18737        let a = _mm256_set1_epi8(2);
18738        let r = _mm256_maskz_mov_epi8(0, a);
18739        assert_eq_m256i(r, _mm256_setzero_si256());
18740        let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
18741        assert_eq_m256i(r, a);
18742    }
18743
18744    #[simd_test(enable = "avx512bw,avx512vl")]
18745    unsafe fn test_mm_mask_mov_epi8() {
18746        let src = _mm_set1_epi8(1);
18747        let a = _mm_set1_epi8(2);
18748        let r = _mm_mask_mov_epi8(src, 0, a);
18749        assert_eq_m128i(r, src);
18750        let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
18751        assert_eq_m128i(r, a);
18752    }
18753
18754    #[simd_test(enable = "avx512bw,avx512vl")]
18755    unsafe fn test_mm_maskz_mov_epi8() {
18756        let a = _mm_set1_epi8(2);
18757        let r = _mm_maskz_mov_epi8(0, a);
18758        assert_eq_m128i(r, _mm_setzero_si128());
18759        let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
18760        assert_eq_m128i(r, a);
18761    }
18762
18763    #[simd_test(enable = "avx512bw")]
18764    unsafe fn test_mm512_mask_set1_epi16() {
18765        let src = _mm512_set1_epi16(2);
18766        let a: i16 = 11;
18767        let r = _mm512_mask_set1_epi16(src, 0, a);
18768        assert_eq_m512i(r, src);
18769        let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18770        let e = _mm512_set1_epi16(11);
18771        assert_eq_m512i(r, e);
18772    }
18773
18774    #[simd_test(enable = "avx512bw")]
18775    unsafe fn test_mm512_maskz_set1_epi16() {
18776        let a: i16 = 11;
18777        let r = _mm512_maskz_set1_epi16(0, a);
18778        assert_eq_m512i(r, _mm512_setzero_si512());
18779        let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
18780        let e = _mm512_set1_epi16(11);
18781        assert_eq_m512i(r, e);
18782    }
18783
18784    #[simd_test(enable = "avx512bw,avx512vl")]
18785    unsafe fn test_mm256_mask_set1_epi16() {
18786        let src = _mm256_set1_epi16(2);
18787        let a: i16 = 11;
18788        let r = _mm256_mask_set1_epi16(src, 0, a);
18789        assert_eq_m256i(r, src);
18790        let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
18791        let e = _mm256_set1_epi16(11);
18792        assert_eq_m256i(r, e);
18793    }
18794
18795    #[simd_test(enable = "avx512bw,avx512vl")]
18796    unsafe fn test_mm256_maskz_set1_epi16() {
18797        let a: i16 = 11;
18798        let r = _mm256_maskz_set1_epi16(0, a);
18799        assert_eq_m256i(r, _mm256_setzero_si256());
18800        let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
18801        let e = _mm256_set1_epi16(11);
18802        assert_eq_m256i(r, e);
18803    }
18804
18805    #[simd_test(enable = "avx512bw,avx512vl")]
18806    unsafe fn test_mm_mask_set1_epi16() {
18807        let src = _mm_set1_epi16(2);
18808        let a: i16 = 11;
18809        let r = _mm_mask_set1_epi16(src, 0, a);
18810        assert_eq_m128i(r, src);
18811        let r = _mm_mask_set1_epi16(src, 0b11111111, a);
18812        let e = _mm_set1_epi16(11);
18813        assert_eq_m128i(r, e);
18814    }
18815
18816    #[simd_test(enable = "avx512bw,avx512vl")]
18817    unsafe fn test_mm_maskz_set1_epi16() {
18818        let a: i16 = 11;
18819        let r = _mm_maskz_set1_epi16(0, a);
18820        assert_eq_m128i(r, _mm_setzero_si128());
18821        let r = _mm_maskz_set1_epi16(0b11111111, a);
18822        let e = _mm_set1_epi16(11);
18823        assert_eq_m128i(r, e);
18824    }
18825
18826    #[simd_test(enable = "avx512bw")]
18827    unsafe fn test_mm512_mask_set1_epi8() {
18828        let src = _mm512_set1_epi8(2);
18829        let a: i8 = 11;
18830        let r = _mm512_mask_set1_epi8(src, 0, a);
18831        assert_eq_m512i(r, src);
18832        let r = _mm512_mask_set1_epi8(
18833            src,
18834            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18835            a,
18836        );
18837        let e = _mm512_set1_epi8(11);
18838        assert_eq_m512i(r, e);
18839    }
18840
18841    #[simd_test(enable = "avx512bw")]
18842    unsafe fn test_mm512_maskz_set1_epi8() {
18843        let a: i8 = 11;
18844        let r = _mm512_maskz_set1_epi8(0, a);
18845        assert_eq_m512i(r, _mm512_setzero_si512());
18846        let r = _mm512_maskz_set1_epi8(
18847            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18848            a,
18849        );
18850        let e = _mm512_set1_epi8(11);
18851        assert_eq_m512i(r, e);
18852    }
18853
18854    #[simd_test(enable = "avx512bw,avx512vl")]
18855    unsafe fn test_mm256_mask_set1_epi8() {
18856        let src = _mm256_set1_epi8(2);
18857        let a: i8 = 11;
18858        let r = _mm256_mask_set1_epi8(src, 0, a);
18859        assert_eq_m256i(r, src);
18860        let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18861        let e = _mm256_set1_epi8(11);
18862        assert_eq_m256i(r, e);
18863    }
18864
18865    #[simd_test(enable = "avx512bw,avx512vl")]
18866    unsafe fn test_mm256_maskz_set1_epi8() {
18867        let a: i8 = 11;
18868        let r = _mm256_maskz_set1_epi8(0, a);
18869        assert_eq_m256i(r, _mm256_setzero_si256());
18870        let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
18871        let e = _mm256_set1_epi8(11);
18872        assert_eq_m256i(r, e);
18873    }
18874
18875    #[simd_test(enable = "avx512bw,avx512vl")]
18876    unsafe fn test_mm_mask_set1_epi8() {
18877        let src = _mm_set1_epi8(2);
18878        let a: i8 = 11;
18879        let r = _mm_mask_set1_epi8(src, 0, a);
18880        assert_eq_m128i(r, src);
18881        let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
18882        let e = _mm_set1_epi8(11);
18883        assert_eq_m128i(r, e);
18884    }
18885
18886    #[simd_test(enable = "avx512bw,avx512vl")]
18887    unsafe fn test_mm_maskz_set1_epi8() {
18888        let a: i8 = 11;
18889        let r = _mm_maskz_set1_epi8(0, a);
18890        assert_eq_m128i(r, _mm_setzero_si128());
18891        let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
18892        let e = _mm_set1_epi8(11);
18893        assert_eq_m128i(r, e);
18894    }
18895
18896    #[simd_test(enable = "avx512bw")]
18897    unsafe fn test_mm512_shufflelo_epi16() {
18898        #[rustfmt::skip]
18899        let a = _mm512_set_epi16(
18900            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
18901            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
18902        );
18903        #[rustfmt::skip]
18904        let e = _mm512_set_epi16(
18905            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
18906            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
18907        );
18908        let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
18909        assert_eq_m512i(r, e);
18910    }
18911
18912    #[simd_test(enable = "avx512bw")]
18913    unsafe fn test_mm512_mask_shufflelo_epi16() {
18914        #[rustfmt::skip]
18915        let a = _mm512_set_epi16(
18916            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
18917            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
18918        );
18919        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
18920        assert_eq_m512i(r, a);
18921        let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
18922            a,
18923            0b11111111_11111111_11111111_11111111,
18924            a,
18925        );
18926        #[rustfmt::skip]
18927        let e = _mm512_set_epi16(
18928            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
18929            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
18930        );
18931        assert_eq_m512i(r, e);
18932    }
18933
18934    #[simd_test(enable = "avx512bw")]
18935    unsafe fn test_mm512_maskz_shufflelo_epi16() {
18936        #[rustfmt::skip]
18937        let a = _mm512_set_epi16(
18938            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
18939            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
18940        );
18941        let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
18942        assert_eq_m512i(r, _mm512_setzero_si512());
18943        let r =
18944            _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
18945        #[rustfmt::skip]
18946        let e = _mm512_set_epi16(
18947            0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
18948            16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
18949        );
18950        assert_eq_m512i(r, e);
18951    }
18952
18953    #[simd_test(enable = "avx512bw,avx512vl")]
18954    unsafe fn test_mm256_mask_shufflelo_epi16() {
18955        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18956        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
18957        assert_eq_m256i(r, a);
18958        let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
18959        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
18960        assert_eq_m256i(r, e);
18961    }
18962
18963    #[simd_test(enable = "avx512bw,avx512vl")]
18964    unsafe fn test_mm256_maskz_shufflelo_epi16() {
18965        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18966        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
18967        assert_eq_m256i(r, _mm256_setzero_si256());
18968        let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
18969        let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
18970        assert_eq_m256i(r, e);
18971    }
18972
18973    #[simd_test(enable = "avx512bw,avx512vl")]
18974    unsafe fn test_mm_mask_shufflelo_epi16() {
18975        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18976        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
18977        assert_eq_m128i(r, a);
18978        let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
18979        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
18980        assert_eq_m128i(r, e);
18981    }
18982
18983    #[simd_test(enable = "avx512bw,avx512vl")]
18984    unsafe fn test_mm_maskz_shufflelo_epi16() {
18985        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
18986        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
18987        assert_eq_m128i(r, _mm_setzero_si128());
18988        let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
18989        let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
18990        assert_eq_m128i(r, e);
18991    }
18992
18993    #[simd_test(enable = "avx512bw")]
18994    unsafe fn test_mm512_shufflehi_epi16() {
18995        #[rustfmt::skip]
18996        let a = _mm512_set_epi16(
18997            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
18998            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
18999        );
19000        #[rustfmt::skip]
19001        let e = _mm512_set_epi16(
19002            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19003            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19004        );
19005        let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
19006        assert_eq_m512i(r, e);
19007    }
19008
19009    #[simd_test(enable = "avx512bw")]
19010    unsafe fn test_mm512_mask_shufflehi_epi16() {
19011        #[rustfmt::skip]
19012        let a = _mm512_set_epi16(
19013            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19014            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19015        );
19016        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19017        assert_eq_m512i(r, a);
19018        let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
19019            a,
19020            0b11111111_11111111_11111111_11111111,
19021            a,
19022        );
19023        #[rustfmt::skip]
19024        let e = _mm512_set_epi16(
19025            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19026            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19027        );
19028        assert_eq_m512i(r, e);
19029    }
19030
19031    #[simd_test(enable = "avx512bw")]
19032    unsafe fn test_mm512_maskz_shufflehi_epi16() {
19033        #[rustfmt::skip]
19034        let a = _mm512_set_epi16(
19035            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
19036            16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19037        );
19038        let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19039        assert_eq_m512i(r, _mm512_setzero_si512());
19040        let r =
19041            _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
19042        #[rustfmt::skip]
19043        let e = _mm512_set_epi16(
19044            3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
19045            19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
19046        );
19047        assert_eq_m512i(r, e);
19048    }
19049
19050    #[simd_test(enable = "avx512bw,avx512vl")]
19051    unsafe fn test_mm256_mask_shufflehi_epi16() {
19052        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19053        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19054        assert_eq_m256i(r, a);
19055        let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
19056        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19057        assert_eq_m256i(r, e);
19058    }
19059
19060    #[simd_test(enable = "avx512bw,avx512vl")]
19061    unsafe fn test_mm256_maskz_shufflehi_epi16() {
19062        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19063        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19064        assert_eq_m256i(r, _mm256_setzero_si256());
19065        let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
19066        let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
19067        assert_eq_m256i(r, e);
19068    }
19069
19070    #[simd_test(enable = "avx512bw,avx512vl")]
19071    unsafe fn test_mm_mask_shufflehi_epi16() {
19072        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19073        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
19074        assert_eq_m128i(r, a);
19075        let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
19076        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19077        assert_eq_m128i(r, e);
19078    }
19079
19080    #[simd_test(enable = "avx512bw,avx512vl")]
19081    unsafe fn test_mm_maskz_shufflehi_epi16() {
19082        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
19083        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
19084        assert_eq_m128i(r, _mm_setzero_si128());
19085        let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
19086        let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
19087        assert_eq_m128i(r, e);
19088    }
19089
19090    #[simd_test(enable = "avx512bw")]
19091    unsafe fn test_mm512_shuffle_epi8() {
19092        #[rustfmt::skip]
19093        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19094                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19095                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19096                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19097        let b = _mm512_set1_epi8(1);
19098        let r = _mm512_shuffle_epi8(a, b);
19099        #[rustfmt::skip]
19100        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19101                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19102                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19103                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19104        assert_eq_m512i(r, e);
19105    }
19106
19107    #[simd_test(enable = "avx512bw")]
19108    unsafe fn test_mm512_mask_shuffle_epi8() {
19109        #[rustfmt::skip]
19110        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19111                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19112                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19113                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19114        let b = _mm512_set1_epi8(1);
19115        let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
19116        assert_eq_m512i(r, a);
19117        let r = _mm512_mask_shuffle_epi8(
19118            a,
19119            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19120            a,
19121            b,
19122        );
19123        #[rustfmt::skip]
19124        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19125                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19126                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19127                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19128        assert_eq_m512i(r, e);
19129    }
19130
19131    #[simd_test(enable = "avx512bw")]
19132    unsafe fn test_mm512_maskz_shuffle_epi8() {
19133        #[rustfmt::skip]
19134        let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19135                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
19136                                32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
19137                                48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
19138        let b = _mm512_set1_epi8(1);
19139        let r = _mm512_maskz_shuffle_epi8(0, a, b);
19140        assert_eq_m512i(r, _mm512_setzero_si512());
19141        let r = _mm512_maskz_shuffle_epi8(
19142            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19143            a,
19144            b,
19145        );
19146        #[rustfmt::skip]
19147        let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19148                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
19149                                46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
19150                                62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
19151        assert_eq_m512i(r, e);
19152    }
19153
19154    #[simd_test(enable = "avx512bw,avx512vl")]
19155    unsafe fn test_mm256_mask_shuffle_epi8() {
19156        #[rustfmt::skip]
19157        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19158                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19159        let b = _mm256_set1_epi8(1);
19160        let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
19161        assert_eq_m256i(r, a);
19162        let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
19163        #[rustfmt::skip]
19164        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19165                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19166        assert_eq_m256i(r, e);
19167    }
19168
19169    #[simd_test(enable = "avx512bw,avx512vl")]
19170    unsafe fn test_mm256_maskz_shuffle_epi8() {
19171        #[rustfmt::skip]
19172        let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
19173                                16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
19174        let b = _mm256_set1_epi8(1);
19175        let r = _mm256_maskz_shuffle_epi8(0, a, b);
19176        assert_eq_m256i(r, _mm256_setzero_si256());
19177        let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
19178        #[rustfmt::skip]
19179        let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19180                                30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
19181        assert_eq_m256i(r, e);
19182    }
19183
19184    #[simd_test(enable = "avx512bw,avx512vl")]
19185    unsafe fn test_mm_mask_shuffle_epi8() {
19186        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
19187        let b = _mm_set1_epi8(1);
19188        let r = _mm_mask_shuffle_epi8(a, 0, a, b);
19189        assert_eq_m128i(r, a);
19190        let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
19191        let e = _mm_set_epi8(
19192            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19193        );
19194        assert_eq_m128i(r, e);
19195    }
19196
19197    #[simd_test(enable = "avx512bw,avx512vl")]
19198    unsafe fn test_mm_maskz_shuffle_epi8() {
19199        #[rustfmt::skip]
19200        let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
19201        let b = _mm_set1_epi8(1);
19202        let r = _mm_maskz_shuffle_epi8(0, a, b);
19203        assert_eq_m128i(r, _mm_setzero_si128());
19204        let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
19205        let e = _mm_set_epi8(
19206            14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
19207        );
19208        assert_eq_m128i(r, e);
19209    }
19210
19211    #[simd_test(enable = "avx512bw")]
19212    unsafe fn test_mm512_test_epi16_mask() {
19213        let a = _mm512_set1_epi16(1 << 0);
19214        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19215        let r = _mm512_test_epi16_mask(a, b);
19216        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19217        assert_eq!(r, e);
19218    }
19219
19220    #[simd_test(enable = "avx512bw")]
19221    unsafe fn test_mm512_mask_test_epi16_mask() {
19222        let a = _mm512_set1_epi16(1 << 0);
19223        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19224        let r = _mm512_mask_test_epi16_mask(0, a, b);
19225        assert_eq!(r, 0);
19226        let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19227        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19228        assert_eq!(r, e);
19229    }
19230
19231    #[simd_test(enable = "avx512bw,avx512vl")]
19232    unsafe fn test_mm256_test_epi16_mask() {
19233        let a = _mm256_set1_epi16(1 << 0);
19234        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19235        let r = _mm256_test_epi16_mask(a, b);
19236        let e: __mmask16 = 0b11111111_11111111;
19237        assert_eq!(r, e);
19238    }
19239
19240    #[simd_test(enable = "avx512bw,avx512vl")]
19241    unsafe fn test_mm256_mask_test_epi16_mask() {
19242        let a = _mm256_set1_epi16(1 << 0);
19243        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19244        let r = _mm256_mask_test_epi16_mask(0, a, b);
19245        assert_eq!(r, 0);
19246        let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
19247        let e: __mmask16 = 0b11111111_11111111;
19248        assert_eq!(r, e);
19249    }
19250
19251    #[simd_test(enable = "avx512bw,avx512vl")]
19252    unsafe fn test_mm_test_epi16_mask() {
19253        let a = _mm_set1_epi16(1 << 0);
19254        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19255        let r = _mm_test_epi16_mask(a, b);
19256        let e: __mmask8 = 0b11111111;
19257        assert_eq!(r, e);
19258    }
19259
19260    #[simd_test(enable = "avx512bw,avx512vl")]
19261    unsafe fn test_mm_mask_test_epi16_mask() {
19262        let a = _mm_set1_epi16(1 << 0);
19263        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19264        let r = _mm_mask_test_epi16_mask(0, a, b);
19265        assert_eq!(r, 0);
19266        let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
19267        let e: __mmask8 = 0b11111111;
19268        assert_eq!(r, e);
19269    }
19270
19271    #[simd_test(enable = "avx512bw")]
19272    unsafe fn test_mm512_test_epi8_mask() {
19273        let a = _mm512_set1_epi8(1 << 0);
19274        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19275        let r = _mm512_test_epi8_mask(a, b);
19276        let e: __mmask64 =
19277            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19278        assert_eq!(r, e);
19279    }
19280
19281    #[simd_test(enable = "avx512bw")]
19282    unsafe fn test_mm512_mask_test_epi8_mask() {
19283        let a = _mm512_set1_epi8(1 << 0);
19284        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19285        let r = _mm512_mask_test_epi8_mask(0, a, b);
19286        assert_eq!(r, 0);
19287        let r = _mm512_mask_test_epi8_mask(
19288            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19289            a,
19290            b,
19291        );
19292        let e: __mmask64 =
19293            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19294        assert_eq!(r, e);
19295    }
19296
19297    #[simd_test(enable = "avx512bw,avx512vl")]
19298    unsafe fn test_mm256_test_epi8_mask() {
19299        let a = _mm256_set1_epi8(1 << 0);
19300        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19301        let r = _mm256_test_epi8_mask(a, b);
19302        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19303        assert_eq!(r, e);
19304    }
19305
19306    #[simd_test(enable = "avx512bw,avx512vl")]
19307    unsafe fn test_mm256_mask_test_epi8_mask() {
19308        let a = _mm256_set1_epi8(1 << 0);
19309        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19310        let r = _mm256_mask_test_epi8_mask(0, a, b);
19311        assert_eq!(r, 0);
19312        let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19313        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19314        assert_eq!(r, e);
19315    }
19316
19317    #[simd_test(enable = "avx512bw,avx512vl")]
19318    unsafe fn test_mm_test_epi8_mask() {
19319        let a = _mm_set1_epi8(1 << 0);
19320        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19321        let r = _mm_test_epi8_mask(a, b);
19322        let e: __mmask16 = 0b11111111_11111111;
19323        assert_eq!(r, e);
19324    }
19325
19326    #[simd_test(enable = "avx512bw,avx512vl")]
19327    unsafe fn test_mm_mask_test_epi8_mask() {
19328        let a = _mm_set1_epi8(1 << 0);
19329        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19330        let r = _mm_mask_test_epi8_mask(0, a, b);
19331        assert_eq!(r, 0);
19332        let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
19333        let e: __mmask16 = 0b11111111_11111111;
19334        assert_eq!(r, e);
19335    }
19336
19337    #[simd_test(enable = "avx512bw")]
19338    unsafe fn test_mm512_testn_epi16_mask() {
19339        let a = _mm512_set1_epi16(1 << 0);
19340        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19341        let r = _mm512_testn_epi16_mask(a, b);
19342        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19343        assert_eq!(r, e);
19344    }
19345
19346    #[simd_test(enable = "avx512bw")]
19347    unsafe fn test_mm512_mask_testn_epi16_mask() {
19348        let a = _mm512_set1_epi16(1 << 0);
19349        let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
19350        let r = _mm512_mask_testn_epi16_mask(0, a, b);
19351        assert_eq!(r, 0);
19352        let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
19353        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19354        assert_eq!(r, e);
19355    }
19356
19357    #[simd_test(enable = "avx512bw,avx512vl")]
19358    unsafe fn test_mm256_testn_epi16_mask() {
19359        let a = _mm256_set1_epi16(1 << 0);
19360        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19361        let r = _mm256_testn_epi16_mask(a, b);
19362        let e: __mmask16 = 0b00000000_00000000;
19363        assert_eq!(r, e);
19364    }
19365
19366    #[simd_test(enable = "avx512bw,avx512vl")]
19367    unsafe fn test_mm256_mask_testn_epi16_mask() {
19368        let a = _mm256_set1_epi16(1 << 0);
19369        let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
19370        let r = _mm256_mask_testn_epi16_mask(0, a, b);
19371        assert_eq!(r, 0);
19372        let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
19373        let e: __mmask16 = 0b00000000_00000000;
19374        assert_eq!(r, e);
19375    }
19376
19377    #[simd_test(enable = "avx512bw,avx512vl")]
19378    unsafe fn test_mm_testn_epi16_mask() {
19379        let a = _mm_set1_epi16(1 << 0);
19380        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19381        let r = _mm_testn_epi16_mask(a, b);
19382        let e: __mmask8 = 0b00000000;
19383        assert_eq!(r, e);
19384    }
19385
19386    #[simd_test(enable = "avx512bw,avx512vl")]
19387    unsafe fn test_mm_mask_testn_epi16_mask() {
19388        let a = _mm_set1_epi16(1 << 0);
19389        let b = _mm_set1_epi16(1 << 0 | 1 << 1);
19390        let r = _mm_mask_testn_epi16_mask(0, a, b);
19391        assert_eq!(r, 0);
19392        let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
19393        let e: __mmask8 = 0b00000000;
19394        assert_eq!(r, e);
19395    }
19396
19397    #[simd_test(enable = "avx512bw")]
19398    unsafe fn test_mm512_testn_epi8_mask() {
19399        let a = _mm512_set1_epi8(1 << 0);
19400        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19401        let r = _mm512_testn_epi8_mask(a, b);
19402        let e: __mmask64 =
19403            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19404        assert_eq!(r, e);
19405    }
19406
19407    #[simd_test(enable = "avx512bw")]
19408    unsafe fn test_mm512_mask_testn_epi8_mask() {
19409        let a = _mm512_set1_epi8(1 << 0);
19410        let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
19411        let r = _mm512_mask_testn_epi8_mask(0, a, b);
19412        assert_eq!(r, 0);
19413        let r = _mm512_mask_testn_epi8_mask(
19414            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
19415            a,
19416            b,
19417        );
19418        let e: __mmask64 =
19419            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19420        assert_eq!(r, e);
19421    }
19422
19423    #[simd_test(enable = "avx512bw,avx512vl")]
19424    unsafe fn test_mm256_testn_epi8_mask() {
19425        let a = _mm256_set1_epi8(1 << 0);
19426        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19427        let r = _mm256_testn_epi8_mask(a, b);
19428        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19429        assert_eq!(r, e);
19430    }
19431
19432    #[simd_test(enable = "avx512bw,avx512vl")]
19433    unsafe fn test_mm256_mask_testn_epi8_mask() {
19434        let a = _mm256_set1_epi8(1 << 0);
19435        let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
19436        let r = _mm256_mask_testn_epi8_mask(0, a, b);
19437        assert_eq!(r, 0);
19438        let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
19439        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19440        assert_eq!(r, e);
19441    }
19442
19443    #[simd_test(enable = "avx512bw,avx512vl")]
19444    unsafe fn test_mm_testn_epi8_mask() {
19445        let a = _mm_set1_epi8(1 << 0);
19446        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19447        let r = _mm_testn_epi8_mask(a, b);
19448        let e: __mmask16 = 0b00000000_00000000;
19449        assert_eq!(r, e);
19450    }
19451
19452    #[simd_test(enable = "avx512bw,avx512vl")]
19453    unsafe fn test_mm_mask_testn_epi8_mask() {
19454        let a = _mm_set1_epi8(1 << 0);
19455        let b = _mm_set1_epi8(1 << 0 | 1 << 1);
19456        let r = _mm_mask_testn_epi8_mask(0, a, b);
19457        assert_eq!(r, 0);
19458        let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
19459        let e: __mmask16 = 0b00000000_00000000;
19460        assert_eq!(r, e);
19461    }
19462
19463    #[simd_test(enable = "avx512bw")]
19464    unsafe fn test_store_mask64() {
19465        let a: __mmask64 =
19466            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19467        let mut r = 0;
19468        _store_mask64(&mut r, a);
19469        assert_eq!(r, a);
19470    }
19471
19472    #[simd_test(enable = "avx512bw")]
19473    unsafe fn test_store_mask32() {
19474        let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
19475        let mut r = 0;
19476        _store_mask32(&mut r, a);
19477        assert_eq!(r, a);
19478    }
19479
19480    #[simd_test(enable = "avx512bw")]
19481    unsafe fn test_load_mask64() {
19482        let p: __mmask64 =
19483            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19484        let r = _load_mask64(&p);
19485        let e: __mmask64 =
19486            0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
19487        assert_eq!(r, e);
19488    }
19489
19490    #[simd_test(enable = "avx512bw")]
19491    unsafe fn test_load_mask32() {
19492        let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
19493        let r = _load_mask32(&p);
19494        let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
19495        assert_eq!(r, e);
19496    }
19497
19498    #[simd_test(enable = "avx512bw")]
19499    unsafe fn test_mm512_sad_epu8() {
19500        let a = _mm512_set1_epi8(2);
19501        let b = _mm512_set1_epi8(4);
19502        let r = _mm512_sad_epu8(a, b);
19503        let e = _mm512_set1_epi64(16);
19504        assert_eq_m512i(r, e);
19505    }
19506
19507    #[simd_test(enable = "avx512bw")]
19508    unsafe fn test_mm512_dbsad_epu8() {
19509        let a = _mm512_set1_epi8(2);
19510        let b = _mm512_set1_epi8(4);
19511        let r = _mm512_dbsad_epu8::<0>(a, b);
19512        let e = _mm512_set1_epi16(8);
19513        assert_eq_m512i(r, e);
19514    }
19515
19516    #[simd_test(enable = "avx512bw")]
19517    unsafe fn test_mm512_mask_dbsad_epu8() {
19518        let src = _mm512_set1_epi16(1);
19519        let a = _mm512_set1_epi8(2);
19520        let b = _mm512_set1_epi8(4);
19521        let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
19522        assert_eq_m512i(r, src);
19523        let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
19524        let e = _mm512_set1_epi16(8);
19525        assert_eq_m512i(r, e);
19526    }
19527
19528    #[simd_test(enable = "avx512bw")]
19529    unsafe fn test_mm512_maskz_dbsad_epu8() {
19530        let a = _mm512_set1_epi8(2);
19531        let b = _mm512_set1_epi8(4);
19532        let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
19533        assert_eq_m512i(r, _mm512_setzero_si512());
19534        let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
19535        let e = _mm512_set1_epi16(8);
19536        assert_eq_m512i(r, e);
19537    }
19538
19539    #[simd_test(enable = "avx512bw,avx512vl")]
19540    unsafe fn test_mm256_dbsad_epu8() {
19541        let a = _mm256_set1_epi8(2);
19542        let b = _mm256_set1_epi8(4);
19543        let r = _mm256_dbsad_epu8::<0>(a, b);
19544        let e = _mm256_set1_epi16(8);
19545        assert_eq_m256i(r, e);
19546    }
19547
19548    #[simd_test(enable = "avx512bw,avx512vl")]
19549    unsafe fn test_mm256_mask_dbsad_epu8() {
19550        let src = _mm256_set1_epi16(1);
19551        let a = _mm256_set1_epi8(2);
19552        let b = _mm256_set1_epi8(4);
19553        let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
19554        assert_eq_m256i(r, src);
19555        let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
19556        let e = _mm256_set1_epi16(8);
19557        assert_eq_m256i(r, e);
19558    }
19559
19560    #[simd_test(enable = "avx512bw,avx512vl")]
19561    unsafe fn test_mm256_maskz_dbsad_epu8() {
19562        let a = _mm256_set1_epi8(2);
19563        let b = _mm256_set1_epi8(4);
19564        let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
19565        assert_eq_m256i(r, _mm256_setzero_si256());
19566        let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
19567        let e = _mm256_set1_epi16(8);
19568        assert_eq_m256i(r, e);
19569    }
19570
19571    #[simd_test(enable = "avx512bw,avx512vl")]
19572    unsafe fn test_mm_dbsad_epu8() {
19573        let a = _mm_set1_epi8(2);
19574        let b = _mm_set1_epi8(4);
19575        let r = _mm_dbsad_epu8::<0>(a, b);
19576        let e = _mm_set1_epi16(8);
19577        assert_eq_m128i(r, e);
19578    }
19579
19580    #[simd_test(enable = "avx512bw,avx512vl")]
19581    unsafe fn test_mm_mask_dbsad_epu8() {
19582        let src = _mm_set1_epi16(1);
19583        let a = _mm_set1_epi8(2);
19584        let b = _mm_set1_epi8(4);
19585        let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
19586        assert_eq_m128i(r, src);
19587        let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
19588        let e = _mm_set1_epi16(8);
19589        assert_eq_m128i(r, e);
19590    }
19591
19592    #[simd_test(enable = "avx512bw,avx512vl")]
19593    unsafe fn test_mm_maskz_dbsad_epu8() {
19594        let a = _mm_set1_epi8(2);
19595        let b = _mm_set1_epi8(4);
19596        let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
19597        assert_eq_m128i(r, _mm_setzero_si128());
19598        let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
19599        let e = _mm_set1_epi16(8);
19600        assert_eq_m128i(r, e);
19601    }
19602
19603    #[simd_test(enable = "avx512bw")]
19604    unsafe fn test_mm512_movepi16_mask() {
19605        let a = _mm512_set1_epi16(1 << 15);
19606        let r = _mm512_movepi16_mask(a);
19607        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19608        assert_eq!(r, e);
19609    }
19610
19611    #[simd_test(enable = "avx512bw,avx512vl")]
19612    unsafe fn test_mm256_movepi16_mask() {
19613        let a = _mm256_set1_epi16(1 << 15);
19614        let r = _mm256_movepi16_mask(a);
19615        let e: __mmask16 = 0b11111111_11111111;
19616        assert_eq!(r, e);
19617    }
19618
19619    #[simd_test(enable = "avx512bw,avx512vl")]
19620    unsafe fn test_mm_movepi16_mask() {
19621        let a = _mm_set1_epi16(1 << 15);
19622        let r = _mm_movepi16_mask(a);
19623        let e: __mmask8 = 0b11111111;
19624        assert_eq!(r, e);
19625    }
19626
19627    #[simd_test(enable = "avx512bw")]
19628    unsafe fn test_mm512_movepi8_mask() {
19629        let a = _mm512_set1_epi8(1 << 7);
19630        let r = _mm512_movepi8_mask(a);
19631        let e: __mmask64 =
19632            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19633        assert_eq!(r, e);
19634    }
19635
19636    #[simd_test(enable = "avx512bw,avx512vl")]
19637    unsafe fn test_mm256_movepi8_mask() {
19638        let a = _mm256_set1_epi8(1 << 7);
19639        let r = _mm256_movepi8_mask(a);
19640        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19641        assert_eq!(r, e);
19642    }
19643
19644    #[simd_test(enable = "avx512bw,avx512vl")]
19645    unsafe fn test_mm_movepi8_mask() {
19646        let a = _mm_set1_epi8(1 << 7);
19647        let r = _mm_movepi8_mask(a);
19648        let e: __mmask16 = 0b11111111_11111111;
19649        assert_eq!(r, e);
19650    }
19651
19652    #[simd_test(enable = "avx512bw")]
19653    unsafe fn test_mm512_movm_epi16() {
19654        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
19655        let r = _mm512_movm_epi16(a);
19656        let e = _mm512_set1_epi16(
19657            1 << 15
19658                | 1 << 14
19659                | 1 << 13
19660                | 1 << 12
19661                | 1 << 11
19662                | 1 << 10
19663                | 1 << 9
19664                | 1 << 8
19665                | 1 << 7
19666                | 1 << 6
19667                | 1 << 5
19668                | 1 << 4
19669                | 1 << 3
19670                | 1 << 2
19671                | 1 << 1
19672                | 1 << 0,
19673        );
19674        assert_eq_m512i(r, e);
19675    }
19676
19677    #[simd_test(enable = "avx512bw,avx512vl")]
19678    unsafe fn test_mm256_movm_epi16() {
19679        let a: __mmask16 = 0b11111111_11111111;
19680        let r = _mm256_movm_epi16(a);
19681        let e = _mm256_set1_epi16(
19682            1 << 15
19683                | 1 << 14
19684                | 1 << 13
19685                | 1 << 12
19686                | 1 << 11
19687                | 1 << 10
19688                | 1 << 9
19689                | 1 << 8
19690                | 1 << 7
19691                | 1 << 6
19692                | 1 << 5
19693                | 1 << 4
19694                | 1 << 3
19695                | 1 << 2
19696                | 1 << 1
19697                | 1 << 0,
19698        );
19699        assert_eq_m256i(r, e);
19700    }
19701
19702    #[simd_test(enable = "avx512bw,avx512vl")]
19703    unsafe fn test_mm_movm_epi16() {
19704        let a: __mmask8 = 0b11111111;
19705        let r = _mm_movm_epi16(a);
19706        let e = _mm_set1_epi16(
19707            1 << 15
19708                | 1 << 14
19709                | 1 << 13
19710                | 1 << 12
19711                | 1 << 11
19712                | 1 << 10
19713                | 1 << 9
19714                | 1 << 8
19715                | 1 << 7
19716                | 1 << 6
19717                | 1 << 5
19718                | 1 << 4
19719                | 1 << 3
19720                | 1 << 2
19721                | 1 << 1
19722                | 1 << 0,
19723        );
19724        assert_eq_m128i(r, e);
19725    }
19726
19727    #[simd_test(enable = "avx512bw")]
19728    unsafe fn test_mm512_movm_epi8() {
19729        let a: __mmask64 =
19730            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19731        let r = _mm512_movm_epi8(a);
19732        let e =
19733            _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
19734        assert_eq_m512i(r, e);
19735    }
19736
19737    #[simd_test(enable = "avx512bw,avx512vl")]
19738    unsafe fn test_mm256_movm_epi8() {
19739        let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
19740        let r = _mm256_movm_epi8(a);
19741        let e =
19742            _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
19743        assert_eq_m256i(r, e);
19744    }
19745
19746    #[simd_test(enable = "avx512bw,avx512vl")]
19747    unsafe fn test_mm_movm_epi8() {
19748        let a: __mmask16 = 0b11111111_11111111;
19749        let r = _mm_movm_epi8(a);
19750        let e =
19751            _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
19752        assert_eq_m128i(r, e);
19753    }
19754
19755    #[simd_test(enable = "avx512bw")]
19756    unsafe fn test_cvtmask32_u32() {
19757        let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
19758        let r = _cvtmask32_u32(a);
19759        let e: u32 = 0b11001100_00110011_01100110_10011001;
19760        assert_eq!(r, e);
19761    }
19762
19763    #[simd_test(enable = "avx512bw")]
19764    unsafe fn test_cvtu32_mask32() {
19765        let a: u32 = 0b11001100_00110011_01100110_10011001;
19766        let r = _cvtu32_mask32(a);
19767        let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
19768        assert_eq!(r, e);
19769    }
19770
19771    #[simd_test(enable = "avx512bw")]
19772    unsafe fn test_kadd_mask32() {
19773        let a: __mmask32 = 11;
19774        let b: __mmask32 = 22;
19775        let r = _kadd_mask32(a, b);
19776        let e: __mmask32 = 33;
19777        assert_eq!(r, e);
19778    }
19779
19780    #[simd_test(enable = "avx512bw")]
19781    unsafe fn test_kadd_mask64() {
19782        let a: __mmask64 = 11;
19783        let b: __mmask64 = 22;
19784        let r = _kadd_mask64(a, b);
19785        let e: __mmask64 = 33;
19786        assert_eq!(r, e);
19787    }
19788
19789    #[simd_test(enable = "avx512bw")]
19790    unsafe fn test_kand_mask32() {
19791        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
19792        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
19793        let r = _kand_mask32(a, b);
19794        let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
19795        assert_eq!(r, e);
19796    }
19797
19798    #[simd_test(enable = "avx512bw")]
19799    unsafe fn test_kand_mask64() {
19800        let a: __mmask64 =
19801            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19802        let b: __mmask64 =
19803            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19804        let r = _kand_mask64(a, b);
19805        let e: __mmask64 =
19806            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19807        assert_eq!(r, e);
19808    }
19809
19810    #[simd_test(enable = "avx512bw")]
19811    unsafe fn test_knot_mask32() {
19812        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
19813        let r = _knot_mask32(a);
19814        let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
19815        assert_eq!(r, e);
19816    }
19817
19818    #[simd_test(enable = "avx512bw")]
19819    unsafe fn test_knot_mask64() {
19820        let a: __mmask64 =
19821            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19822        let r = _knot_mask64(a);
19823        let e: __mmask64 =
19824            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
19825        assert_eq!(r, e);
19826    }
19827
19828    #[simd_test(enable = "avx512bw")]
19829    unsafe fn test_kandn_mask32() {
19830        let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
19831        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
19832        let r = _kandn_mask32(a, b);
19833        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19834        assert_eq!(r, e);
19835    }
19836
19837    #[simd_test(enable = "avx512bw")]
19838    unsafe fn test_kandn_mask64() {
19839        let a: __mmask64 =
19840            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19841        let b: __mmask64 =
19842            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19843        let r = _kandn_mask64(a, b);
19844        let e: __mmask64 =
19845            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19846        assert_eq!(r, e);
19847    }
19848
19849    #[simd_test(enable = "avx512bw")]
19850    unsafe fn test_kor_mask32() {
19851        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
19852        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
19853        let r = _kor_mask32(a, b);
19854        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19855        assert_eq!(r, e);
19856    }
19857
19858    #[simd_test(enable = "avx512bw")]
19859    unsafe fn test_kor_mask64() {
19860        let a: __mmask64 =
19861            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
19862        let b: __mmask64 =
19863            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19864        let r = _kor_mask64(a, b);
19865        let e: __mmask64 =
19866            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19867        assert_eq!(r, e);
19868    }
19869
19870    #[simd_test(enable = "avx512bw")]
19871    unsafe fn test_kxor_mask32() {
19872        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
19873        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
19874        let r = _kxor_mask32(a, b);
19875        let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
19876        assert_eq!(r, e);
19877    }
19878
19879    #[simd_test(enable = "avx512bw")]
19880    unsafe fn test_kxor_mask64() {
19881        let a: __mmask64 =
19882            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
19883        let b: __mmask64 =
19884            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19885        let r = _kxor_mask64(a, b);
19886        let e: __mmask64 =
19887            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
19888        assert_eq!(r, e);
19889    }
19890
19891    #[simd_test(enable = "avx512bw")]
19892    unsafe fn test_kxnor_mask32() {
19893        let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
19894        let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
19895        let r = _kxnor_mask32(a, b);
19896        let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
19897        assert_eq!(r, e);
19898    }
19899
19900    #[simd_test(enable = "avx512bw")]
19901    unsafe fn test_kxnor_mask64() {
19902        let a: __mmask64 =
19903            0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
19904        let b: __mmask64 =
19905            0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
19906        let r = _kxnor_mask64(a, b);
19907        let e: __mmask64 =
19908            0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
19909        assert_eq!(r, e);
19910    }
19911
19912    #[simd_test(enable = "avx512bw")]
19913    unsafe fn test_kortest_mask32_u8() {
19914        let a: __mmask32 = 0b0110100101101001_0110100101101001;
19915        let b: __mmask32 = 0b1011011010110110_1011011010110110;
19916        let mut all_ones: u8 = 0;
19917        let r = _kortest_mask32_u8(a, b, &mut all_ones);
19918        assert_eq!(r, 0);
19919        assert_eq!(all_ones, 1);
19920    }
19921
19922    #[simd_test(enable = "avx512bw")]
19923    unsafe fn test_kortest_mask64_u8() {
19924        let a: __mmask64 = 0b0110100101101001_0110100101101001;
19925        let b: __mmask64 = 0b1011011010110110_1011011010110110;
19926        let mut all_ones: u8 = 0;
19927        let r = _kortest_mask64_u8(a, b, &mut all_ones);
19928        assert_eq!(r, 0);
19929        assert_eq!(all_ones, 0);
19930    }
19931
19932    #[simd_test(enable = "avx512bw")]
19933    unsafe fn test_kortestc_mask32_u8() {
19934        let a: __mmask32 = 0b0110100101101001_0110100101101001;
19935        let b: __mmask32 = 0b1011011010110110_1011011010110110;
19936        let r = _kortestc_mask32_u8(a, b);
19937        assert_eq!(r, 1);
19938    }
19939
19940    #[simd_test(enable = "avx512bw")]
19941    unsafe fn test_kortestc_mask64_u8() {
19942        let a: __mmask64 = 0b0110100101101001_0110100101101001;
19943        let b: __mmask64 = 0b1011011010110110_1011011010110110;
19944        let r = _kortestc_mask64_u8(a, b);
19945        assert_eq!(r, 0);
19946    }
19947
19948    #[simd_test(enable = "avx512bw")]
19949    unsafe fn test_kortestz_mask32_u8() {
19950        let a: __mmask32 = 0b0110100101101001_0110100101101001;
19951        let b: __mmask32 = 0b1011011010110110_1011011010110110;
19952        let r = _kortestz_mask32_u8(a, b);
19953        assert_eq!(r, 0);
19954    }
19955
19956    #[simd_test(enable = "avx512bw")]
19957    unsafe fn test_kortestz_mask64_u8() {
19958        let a: __mmask64 = 0b0110100101101001_0110100101101001;
19959        let b: __mmask64 = 0b1011011010110110_1011011010110110;
19960        let r = _kortestz_mask64_u8(a, b);
19961        assert_eq!(r, 0);
19962    }
19963
19964    #[simd_test(enable = "avx512bw")]
19965    unsafe fn test_kshiftli_mask32() {
19966        let a: __mmask32 = 0b0110100101101001_0110100101101001;
19967        let r = _kshiftli_mask32::<3>(a);
19968        let e: __mmask32 = 0b0100101101001011_0100101101001000;
19969        assert_eq!(r, e);
19970    }
19971
19972    #[simd_test(enable = "avx512bw")]
19973    unsafe fn test_kshiftli_mask64() {
19974        let a: __mmask64 = 0b0110100101101001_0110100101101001;
19975        let r = _kshiftli_mask64::<3>(a);
19976        let e: __mmask64 = 0b0110100101101001011_0100101101001000;
19977        assert_eq!(r, e);
19978    }
19979
19980    #[simd_test(enable = "avx512bw")]
19981    unsafe fn test_kshiftri_mask32() {
19982        let a: __mmask32 = 0b0110100101101001_0110100101101001;
19983        let r = _kshiftri_mask32::<3>(a);
19984        let e: __mmask32 = 0b0000110100101101_0010110100101101;
19985        assert_eq!(r, e);
19986    }
19987
19988    #[simd_test(enable = "avx512bw")]
19989    unsafe fn test_kshiftri_mask64() {
19990        let a: __mmask64 = 0b0110100101101001011_0100101101001000;
19991        let r = _kshiftri_mask64::<3>(a);
19992        let e: __mmask64 = 0b0110100101101001_0110100101101001;
19993        assert_eq!(r, e);
19994    }
19995
19996    #[simd_test(enable = "avx512bw")]
19997    unsafe fn test_ktest_mask32_u8() {
19998        let a: __mmask32 = 0b0110100100111100_0110100100111100;
19999        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20000        let mut and_not: u8 = 0;
20001        let r = _ktest_mask32_u8(a, b, &mut and_not);
20002        assert_eq!(r, 1);
20003        assert_eq!(and_not, 0);
20004    }
20005
20006    #[simd_test(enable = "avx512bw")]
20007    unsafe fn test_ktestc_mask32_u8() {
20008        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20009        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20010        let r = _ktestc_mask32_u8(a, b);
20011        assert_eq!(r, 0);
20012    }
20013
20014    #[simd_test(enable = "avx512bw")]
20015    unsafe fn test_ktestz_mask32_u8() {
20016        let a: __mmask32 = 0b0110100100111100_0110100100111100;
20017        let b: __mmask32 = 0b1001011011000011_1001011011000011;
20018        let r = _ktestz_mask32_u8(a, b);
20019        assert_eq!(r, 1);
20020    }
20021
20022    #[simd_test(enable = "avx512bw")]
20023    unsafe fn test_ktest_mask64_u8() {
20024        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20025        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20026        let mut and_not: u8 = 0;
20027        let r = _ktest_mask64_u8(a, b, &mut and_not);
20028        assert_eq!(r, 1);
20029        assert_eq!(and_not, 0);
20030    }
20031
20032    #[simd_test(enable = "avx512bw")]
20033    unsafe fn test_ktestc_mask64_u8() {
20034        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20035        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20036        let r = _ktestc_mask64_u8(a, b);
20037        assert_eq!(r, 0);
20038    }
20039
20040    #[simd_test(enable = "avx512bw")]
20041    unsafe fn test_ktestz_mask64_u8() {
20042        let a: __mmask64 = 0b0110100100111100_0110100100111100;
20043        let b: __mmask64 = 0b1001011011000011_1001011011000011;
20044        let r = _ktestz_mask64_u8(a, b);
20045        assert_eq!(r, 1);
20046    }
20047
20048    #[simd_test(enable = "avx512bw")]
20049    unsafe fn test_mm512_kunpackw() {
20050        let a: u32 = 0x00110011;
20051        let b: u32 = 0x00001011;
20052        let r = _mm512_kunpackw(a, b);
20053        let e: u32 = 0x00111011;
20054        assert_eq!(r, e);
20055    }
20056
20057    #[simd_test(enable = "avx512bw")]
20058    unsafe fn test_mm512_kunpackd() {
20059        let a: u64 = 0x11001100_00110011;
20060        let b: u64 = 0x00101110_00001011;
20061        let r = _mm512_kunpackd(a, b);
20062        let e: u64 = 0x00110011_00001011;
20063        assert_eq!(r, e);
20064    }
20065
20066    #[simd_test(enable = "avx512bw")]
20067    unsafe fn test_mm512_cvtepi16_epi8() {
20068        let a = _mm512_set1_epi16(2);
20069        let r = _mm512_cvtepi16_epi8(a);
20070        let e = _mm256_set1_epi8(2);
20071        assert_eq_m256i(r, e);
20072    }
20073
20074    #[simd_test(enable = "avx512bw")]
20075    unsafe fn test_mm512_mask_cvtepi16_epi8() {
20076        let src = _mm256_set1_epi8(1);
20077        let a = _mm512_set1_epi16(2);
20078        let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
20079        assert_eq_m256i(r, src);
20080        let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20081        let e = _mm256_set1_epi8(2);
20082        assert_eq_m256i(r, e);
20083    }
20084
20085    #[simd_test(enable = "avx512bw")]
20086    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
20087        let a = _mm512_set1_epi16(2);
20088        let r = _mm512_maskz_cvtepi16_epi8(0, a);
20089        assert_eq_m256i(r, _mm256_setzero_si256());
20090        let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20091        let e = _mm256_set1_epi8(2);
20092        assert_eq_m256i(r, e);
20093    }
20094
20095    #[simd_test(enable = "avx512bw,avx512vl")]
20096    unsafe fn test_mm256_cvtepi16_epi8() {
20097        let a = _mm256_set1_epi16(2);
20098        let r = _mm256_cvtepi16_epi8(a);
20099        let e = _mm_set1_epi8(2);
20100        assert_eq_m128i(r, e);
20101    }
20102
20103    #[simd_test(enable = "avx512bw,avx512vl")]
20104    unsafe fn test_mm256_mask_cvtepi16_epi8() {
20105        let src = _mm_set1_epi8(1);
20106        let a = _mm256_set1_epi16(2);
20107        let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
20108        assert_eq_m128i(r, src);
20109        let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
20110        let e = _mm_set1_epi8(2);
20111        assert_eq_m128i(r, e);
20112    }
20113
20114    #[simd_test(enable = "avx512bw,avx512vl")]
20115    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
20116        let a = _mm256_set1_epi16(2);
20117        let r = _mm256_maskz_cvtepi16_epi8(0, a);
20118        assert_eq_m128i(r, _mm_setzero_si128());
20119        let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
20120        let e = _mm_set1_epi8(2);
20121        assert_eq_m128i(r, e);
20122    }
20123
20124    #[simd_test(enable = "avx512bw,avx512vl")]
20125    unsafe fn test_mm_cvtepi16_epi8() {
20126        let a = _mm_set1_epi16(2);
20127        let r = _mm_cvtepi16_epi8(a);
20128        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20129        assert_eq_m128i(r, e);
20130    }
20131
20132    #[simd_test(enable = "avx512bw,avx512vl")]
20133    unsafe fn test_mm_mask_cvtepi16_epi8() {
20134        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20135        let a = _mm_set1_epi16(2);
20136        let r = _mm_mask_cvtepi16_epi8(src, 0, a);
20137        assert_eq_m128i(r, src);
20138        let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
20139        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20140        assert_eq_m128i(r, e);
20141    }
20142
20143    #[simd_test(enable = "avx512bw,avx512vl")]
20144    unsafe fn test_mm_maskz_cvtepi16_epi8() {
20145        let a = _mm_set1_epi16(2);
20146        let r = _mm_maskz_cvtepi16_epi8(0, a);
20147        assert_eq_m128i(r, _mm_setzero_si128());
20148        let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
20149        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
20150        assert_eq_m128i(r, e);
20151    }
20152
20153    #[simd_test(enable = "avx512bw")]
20154    unsafe fn test_mm512_cvtsepi16_epi8() {
20155        let a = _mm512_set1_epi16(i16::MAX);
20156        let r = _mm512_cvtsepi16_epi8(a);
20157        let e = _mm256_set1_epi8(i8::MAX);
20158        assert_eq_m256i(r, e);
20159    }
20160
20161    #[simd_test(enable = "avx512bw")]
20162    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
20163        let src = _mm256_set1_epi8(1);
20164        let a = _mm512_set1_epi16(i16::MAX);
20165        let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
20166        assert_eq_m256i(r, src);
20167        let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20168        let e = _mm256_set1_epi8(i8::MAX);
20169        assert_eq_m256i(r, e);
20170    }
20171
20172    #[simd_test(enable = "avx512bw,avx512vl")]
20173    unsafe fn test_mm256_cvtsepi16_epi8() {
20174        let a = _mm256_set1_epi16(i16::MAX);
20175        let r = _mm256_cvtsepi16_epi8(a);
20176        let e = _mm_set1_epi8(i8::MAX);
20177        assert_eq_m128i(r, e);
20178    }
20179
20180    #[simd_test(enable = "avx512bw,avx512vl")]
20181    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
20182        let src = _mm_set1_epi8(1);
20183        let a = _mm256_set1_epi16(i16::MAX);
20184        let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
20185        assert_eq_m128i(r, src);
20186        let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
20187        let e = _mm_set1_epi8(i8::MAX);
20188        assert_eq_m128i(r, e);
20189    }
20190
20191    #[simd_test(enable = "avx512bw,avx512vl")]
20192    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
20193        let a = _mm256_set1_epi16(i16::MAX);
20194        let r = _mm256_maskz_cvtsepi16_epi8(0, a);
20195        assert_eq_m128i(r, _mm_setzero_si128());
20196        let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
20197        let e = _mm_set1_epi8(i8::MAX);
20198        assert_eq_m128i(r, e);
20199    }
20200
20201    #[simd_test(enable = "avx512bw,avx512vl")]
20202    unsafe fn test_mm_cvtsepi16_epi8() {
20203        let a = _mm_set1_epi16(i16::MAX);
20204        let r = _mm_cvtsepi16_epi8(a);
20205        #[rustfmt::skip]
20206        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20207        assert_eq_m128i(r, e);
20208    }
20209
20210    #[simd_test(enable = "avx512bw,avx512vl")]
20211    unsafe fn test_mm_mask_cvtsepi16_epi8() {
20212        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20213        let a = _mm_set1_epi16(i16::MAX);
20214        let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
20215        assert_eq_m128i(r, src);
20216        let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
20217        #[rustfmt::skip]
20218        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20219        assert_eq_m128i(r, e);
20220    }
20221
20222    #[simd_test(enable = "avx512bw,avx512vl")]
20223    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
20224        let a = _mm_set1_epi16(i16::MAX);
20225        let r = _mm_maskz_cvtsepi16_epi8(0, a);
20226        assert_eq_m128i(r, _mm_setzero_si128());
20227        let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
20228        #[rustfmt::skip]
20229        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
20230        assert_eq_m128i(r, e);
20231    }
20232
20233    #[simd_test(enable = "avx512bw")]
20234    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
20235        let a = _mm512_set1_epi16(i16::MAX);
20236        let r = _mm512_maskz_cvtsepi16_epi8(0, a);
20237        assert_eq_m256i(r, _mm256_setzero_si256());
20238        let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20239        let e = _mm256_set1_epi8(i8::MAX);
20240        assert_eq_m256i(r, e);
20241    }
20242
20243    #[simd_test(enable = "avx512bw")]
20244    unsafe fn test_mm512_cvtusepi16_epi8() {
20245        let a = _mm512_set1_epi16(i16::MIN);
20246        let r = _mm512_cvtusepi16_epi8(a);
20247        let e = _mm256_set1_epi8(-1);
20248        assert_eq_m256i(r, e);
20249    }
20250
20251    #[simd_test(enable = "avx512bw")]
20252    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
20253        let src = _mm256_set1_epi8(1);
20254        let a = _mm512_set1_epi16(i16::MIN);
20255        let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
20256        assert_eq_m256i(r, src);
20257        let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
20258        let e = _mm256_set1_epi8(-1);
20259        assert_eq_m256i(r, e);
20260    }
20261
20262    #[simd_test(enable = "avx512bw")]
20263    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
20264        let a = _mm512_set1_epi16(i16::MIN);
20265        let r = _mm512_maskz_cvtusepi16_epi8(0, a);
20266        assert_eq_m256i(r, _mm256_setzero_si256());
20267        let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
20268        let e = _mm256_set1_epi8(-1);
20269        assert_eq_m256i(r, e);
20270    }
20271
20272    #[simd_test(enable = "avx512bw,avx512vl")]
20273    unsafe fn test_mm256_cvtusepi16_epi8() {
20274        let a = _mm256_set1_epi16(i16::MIN);
20275        let r = _mm256_cvtusepi16_epi8(a);
20276        let e = _mm_set1_epi8(-1);
20277        assert_eq_m128i(r, e);
20278    }
20279
20280    #[simd_test(enable = "avx512bw,avx512vl")]
20281    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
20282        let src = _mm_set1_epi8(1);
20283        let a = _mm256_set1_epi16(i16::MIN);
20284        let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
20285        assert_eq_m128i(r, src);
20286        let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
20287        let e = _mm_set1_epi8(-1);
20288        assert_eq_m128i(r, e);
20289    }
20290
20291    #[simd_test(enable = "avx512bw,avx512vl")]
20292    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
20293        let a = _mm256_set1_epi16(i16::MIN);
20294        let r = _mm256_maskz_cvtusepi16_epi8(0, a);
20295        assert_eq_m128i(r, _mm_setzero_si128());
20296        let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
20297        let e = _mm_set1_epi8(-1);
20298        assert_eq_m128i(r, e);
20299    }
20300
20301    #[simd_test(enable = "avx512bw,avx512vl")]
20302    unsafe fn test_mm_cvtusepi16_epi8() {
20303        let a = _mm_set1_epi16(i16::MIN);
20304        let r = _mm_cvtusepi16_epi8(a);
20305        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20306        assert_eq_m128i(r, e);
20307    }
20308
20309    #[simd_test(enable = "avx512bw,avx512vl")]
20310    unsafe fn test_mm_mask_cvtusepi16_epi8() {
20311        let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
20312        let a = _mm_set1_epi16(i16::MIN);
20313        let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
20314        assert_eq_m128i(r, src);
20315        let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
20316        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20317        assert_eq_m128i(r, e);
20318    }
20319
20320    #[simd_test(enable = "avx512bw,avx512vl")]
20321    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
20322        let a = _mm_set1_epi16(i16::MIN);
20323        let r = _mm_maskz_cvtusepi16_epi8(0, a);
20324        assert_eq_m128i(r, _mm_setzero_si128());
20325        let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
20326        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
20327        assert_eq_m128i(r, e);
20328    }
20329
20330    #[simd_test(enable = "avx512bw")]
20331    unsafe fn test_mm512_cvtepi8_epi16() {
20332        let a = _mm256_set1_epi8(2);
20333        let r = _mm512_cvtepi8_epi16(a);
20334        let e = _mm512_set1_epi16(2);
20335        assert_eq_m512i(r, e);
20336    }
20337
20338    #[simd_test(enable = "avx512bw")]
20339    unsafe fn test_mm512_mask_cvtepi8_epi16() {
20340        let src = _mm512_set1_epi16(1);
20341        let a = _mm256_set1_epi8(2);
20342        let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
20343        assert_eq_m512i(r, src);
20344        let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20345        let e = _mm512_set1_epi16(2);
20346        assert_eq_m512i(r, e);
20347    }
20348
20349    #[simd_test(enable = "avx512bw")]
20350    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
20351        let a = _mm256_set1_epi8(2);
20352        let r = _mm512_maskz_cvtepi8_epi16(0, a);
20353        assert_eq_m512i(r, _mm512_setzero_si512());
20354        let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
20355        let e = _mm512_set1_epi16(2);
20356        assert_eq_m512i(r, e);
20357    }
20358
20359    #[simd_test(enable = "avx512bw,avx512vl")]
20360    unsafe fn test_mm256_mask_cvtepi8_epi16() {
20361        let src = _mm256_set1_epi16(1);
20362        let a = _mm_set1_epi8(2);
20363        let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
20364        assert_eq_m256i(r, src);
20365        let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
20366        let e = _mm256_set1_epi16(2);
20367        assert_eq_m256i(r, e);
20368    }
20369
20370    #[simd_test(enable = "avx512bw,avx512vl")]
20371    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
20372        let a = _mm_set1_epi8(2);
20373        let r = _mm256_maskz_cvtepi8_epi16(0, a);
20374        assert_eq_m256i(r, _mm256_setzero_si256());
20375        let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
20376        let e = _mm256_set1_epi16(2);
20377        assert_eq_m256i(r, e);
20378    }
20379
20380    #[simd_test(enable = "avx512bw,avx512vl")]
20381    unsafe fn test_mm_mask_cvtepi8_epi16() {
20382        let src = _mm_set1_epi16(1);
20383        let a = _mm_set1_epi8(2);
20384        let r = _mm_mask_cvtepi8_epi16(src, 0, a);
20385        assert_eq_m128i(r, src);
20386        let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
20387        let e = _mm_set1_epi16(2);
20388        assert_eq_m128i(r, e);
20389    }
20390
20391    #[simd_test(enable = "avx512bw,avx512vl")]
20392    unsafe fn test_mm_maskz_cvtepi8_epi16() {
20393        let a = _mm_set1_epi8(2);
20394        let r = _mm_maskz_cvtepi8_epi16(0, a);
20395        assert_eq_m128i(r, _mm_setzero_si128());
20396        let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
20397        let e = _mm_set1_epi16(2);
20398        assert_eq_m128i(r, e);
20399    }
20400
20401    #[simd_test(enable = "avx512bw")]
20402    unsafe fn test_mm512_cvtepu8_epi16() {
20403        let a = _mm256_set1_epi8(2);
20404        let r = _mm512_cvtepu8_epi16(a);
20405        let e = _mm512_set1_epi16(2);
20406        assert_eq_m512i(r, e);
20407    }
20408
20409    #[simd_test(enable = "avx512bw")]
20410    unsafe fn test_mm512_mask_cvtepu8_epi16() {
20411        let src = _mm512_set1_epi16(1);
20412        let a = _mm256_set1_epi8(2);
20413        let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
20414        assert_eq_m512i(r, src);
20415        let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
20416        let e = _mm512_set1_epi16(2);
20417        assert_eq_m512i(r, e);
20418    }
20419
20420    #[simd_test(enable = "avx512bw")]
20421    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
20422        let a = _mm256_set1_epi8(2);
20423        let r = _mm512_maskz_cvtepu8_epi16(0, a);
20424        assert_eq_m512i(r, _mm512_setzero_si512());
20425        let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
20426        let e = _mm512_set1_epi16(2);
20427        assert_eq_m512i(r, e);
20428    }
20429
20430    #[simd_test(enable = "avx512bw,avx512vl")]
20431    unsafe fn test_mm256_mask_cvtepu8_epi16() {
20432        let src = _mm256_set1_epi16(1);
20433        let a = _mm_set1_epi8(2);
20434        let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
20435        assert_eq_m256i(r, src);
20436        let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
20437        let e = _mm256_set1_epi16(2);
20438        assert_eq_m256i(r, e);
20439    }
20440
20441    #[simd_test(enable = "avx512bw,avx512vl")]
20442    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
20443        let a = _mm_set1_epi8(2);
20444        let r = _mm256_maskz_cvtepu8_epi16(0, a);
20445        assert_eq_m256i(r, _mm256_setzero_si256());
20446        let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
20447        let e = _mm256_set1_epi16(2);
20448        assert_eq_m256i(r, e);
20449    }
20450
20451    #[simd_test(enable = "avx512bw,avx512vl")]
20452    unsafe fn test_mm_mask_cvtepu8_epi16() {
20453        let src = _mm_set1_epi16(1);
20454        let a = _mm_set1_epi8(2);
20455        let r = _mm_mask_cvtepu8_epi16(src, 0, a);
20456        assert_eq_m128i(r, src);
20457        let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
20458        let e = _mm_set1_epi16(2);
20459        assert_eq_m128i(r, e);
20460    }
20461
20462    #[simd_test(enable = "avx512bw,avx512vl")]
20463    unsafe fn test_mm_maskz_cvtepu8_epi16() {
20464        let a = _mm_set1_epi8(2);
20465        let r = _mm_maskz_cvtepu8_epi16(0, a);
20466        assert_eq_m128i(r, _mm_setzero_si128());
20467        let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
20468        let e = _mm_set1_epi16(2);
20469        assert_eq_m128i(r, e);
20470    }
20471
20472    #[simd_test(enable = "avx512bw")]
20473    unsafe fn test_mm512_bslli_epi128() {
20474        #[rustfmt::skip]
20475        let a = _mm512_set_epi8(
20476            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20477            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20478            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20479            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20480        );
20481        let r = _mm512_bslli_epi128::<9>(a);
20482        #[rustfmt::skip]
20483        let e = _mm512_set_epi8(
20484            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20485            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20486            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20487            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20488        );
20489        assert_eq_m512i(r, e);
20490    }
20491
20492    #[simd_test(enable = "avx512bw")]
20493    unsafe fn test_mm512_bsrli_epi128() {
20494        #[rustfmt::skip]
20495        let a = _mm512_set_epi8(
20496            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
20497            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
20498            33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
20499            49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
20500        );
20501        let r = _mm512_bsrli_epi128::<3>(a);
20502        #[rustfmt::skip]
20503        let e = _mm512_set_epi8(
20504            0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
20505            0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
20506            0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
20507            0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
20508        );
20509        assert_eq_m512i(r, e);
20510    }
20511
20512    #[simd_test(enable = "avx512bw")]
20513    unsafe fn test_mm512_alignr_epi8() {
20514        #[rustfmt::skip]
20515        let a = _mm512_set_epi8(
20516            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20517            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20518            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20519            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20520        );
20521        let b = _mm512_set1_epi8(1);
20522        let r = _mm512_alignr_epi8::<14>(a, b);
20523        #[rustfmt::skip]
20524        let e = _mm512_set_epi8(
20525            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20526            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20527            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20528            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20529        );
20530        assert_eq_m512i(r, e);
20531    }
20532
20533    #[simd_test(enable = "avx512bw")]
20534    unsafe fn test_mm512_mask_alignr_epi8() {
20535        #[rustfmt::skip]
20536        let a = _mm512_set_epi8(
20537            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20538            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20539            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20540            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20541        );
20542        let b = _mm512_set1_epi8(1);
20543        let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
20544        assert_eq_m512i(r, a);
20545        let r = _mm512_mask_alignr_epi8::<14>(
20546            a,
20547            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20548            a,
20549            b,
20550        );
20551        #[rustfmt::skip]
20552        let e = _mm512_set_epi8(
20553            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20554            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20555            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20556            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20557        );
20558        assert_eq_m512i(r, e);
20559    }
20560
20561    #[simd_test(enable = "avx512bw")]
20562    unsafe fn test_mm512_maskz_alignr_epi8() {
20563        #[rustfmt::skip]
20564        let a = _mm512_set_epi8(
20565            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20566            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20567            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20568            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20569        );
20570        let b = _mm512_set1_epi8(1);
20571        let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
20572        assert_eq_m512i(r, _mm512_setzero_si512());
20573        let r = _mm512_maskz_alignr_epi8::<14>(
20574            0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
20575            a,
20576            b,
20577        );
20578        #[rustfmt::skip]
20579        let e = _mm512_set_epi8(
20580            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20581            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20582            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20583            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20584        );
20585        assert_eq_m512i(r, e);
20586    }
20587
20588    #[simd_test(enable = "avx512bw,avx512vl")]
20589    unsafe fn test_mm256_mask_alignr_epi8() {
20590        #[rustfmt::skip]
20591        let a = _mm256_set_epi8(
20592            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20593            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20594        );
20595        let b = _mm256_set1_epi8(1);
20596        let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
20597        assert_eq_m256i(r, a);
20598        let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
20599        #[rustfmt::skip]
20600        let e = _mm256_set_epi8(
20601            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20602            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20603        );
20604        assert_eq_m256i(r, e);
20605    }
20606
20607    #[simd_test(enable = "avx512bw,avx512vl")]
20608    unsafe fn test_mm256_maskz_alignr_epi8() {
20609        #[rustfmt::skip]
20610        let a = _mm256_set_epi8(
20611            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20612            1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
20613        );
20614        let b = _mm256_set1_epi8(1);
20615        let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
20616        assert_eq_m256i(r, _mm256_setzero_si256());
20617        let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
20618        #[rustfmt::skip]
20619        let e = _mm256_set_epi8(
20620            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20621            0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
20622        );
20623        assert_eq_m256i(r, e);
20624    }
20625
20626    #[simd_test(enable = "avx512bw,avx512vl")]
20627    unsafe fn test_mm_mask_alignr_epi8() {
20628        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
20629        let b = _mm_set1_epi8(1);
20630        let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
20631        assert_eq_m128i(r, a);
20632        let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
20633        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
20634        assert_eq_m128i(r, e);
20635    }
20636
20637    #[simd_test(enable = "avx512bw,avx512vl")]
20638    unsafe fn test_mm_maskz_alignr_epi8() {
20639        let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
20640        let b = _mm_set1_epi8(1);
20641        let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
20642        assert_eq_m128i(r, _mm_setzero_si128());
20643        let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
20644        let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
20645        assert_eq_m128i(r, e);
20646    }
20647
20648    #[simd_test(enable = "avx512bw")]
20649    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
20650        let a = _mm512_set1_epi16(i16::MAX);
20651        let mut r = _mm256_undefined_si256();
20652        _mm512_mask_cvtsepi16_storeu_epi8(
20653            &mut r as *mut _ as *mut i8,
20654            0b11111111_11111111_11111111_11111111,
20655            a,
20656        );
20657        let e = _mm256_set1_epi8(i8::MAX);
20658        assert_eq_m256i(r, e);
20659    }
20660
20661    #[simd_test(enable = "avx512bw,avx512vl")]
20662    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
20663        let a = _mm256_set1_epi16(i16::MAX);
20664        let mut r = _mm_undefined_si128();
20665        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
20666        let e = _mm_set1_epi8(i8::MAX);
20667        assert_eq_m128i(r, e);
20668    }
20669
20670    #[simd_test(enable = "avx512bw,avx512vl")]
20671    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
20672        let a = _mm_set1_epi16(i16::MAX);
20673        let mut r = _mm_set1_epi8(0);
20674        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
20675        #[rustfmt::skip]
20676        let e = _mm_set_epi8(
20677            0, 0, 0, 0, 0, 0, 0, 0,
20678            i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
20679        );
20680        assert_eq_m128i(r, e);
20681    }
20682
20683    #[simd_test(enable = "avx512bw")]
20684    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
20685        let a = _mm512_set1_epi16(8);
20686        let mut r = _mm256_undefined_si256();
20687        _mm512_mask_cvtepi16_storeu_epi8(
20688            &mut r as *mut _ as *mut i8,
20689            0b11111111_11111111_11111111_11111111,
20690            a,
20691        );
20692        let e = _mm256_set1_epi8(8);
20693        assert_eq_m256i(r, e);
20694    }
20695
20696    #[simd_test(enable = "avx512bw,avx512vl")]
20697    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
20698        let a = _mm256_set1_epi16(8);
20699        let mut r = _mm_undefined_si128();
20700        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
20701        let e = _mm_set1_epi8(8);
20702        assert_eq_m128i(r, e);
20703    }
20704
20705    #[simd_test(enable = "avx512bw,avx512vl")]
20706    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
20707        let a = _mm_set1_epi16(8);
20708        let mut r = _mm_set1_epi8(0);
20709        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
20710        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
20711        assert_eq_m128i(r, e);
20712    }
20713
20714    #[simd_test(enable = "avx512bw")]
20715    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
20716        let a = _mm512_set1_epi16(i16::MAX);
20717        let mut r = _mm256_undefined_si256();
20718        _mm512_mask_cvtusepi16_storeu_epi8(
20719            &mut r as *mut _ as *mut i8,
20720            0b11111111_11111111_11111111_11111111,
20721            a,
20722        );
20723        let e = _mm256_set1_epi8(u8::MAX as i8);
20724        assert_eq_m256i(r, e);
20725    }
20726
20727    #[simd_test(enable = "avx512bw,avx512vl")]
20728    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
20729        let a = _mm256_set1_epi16(i16::MAX);
20730        let mut r = _mm_undefined_si128();
20731        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
20732        let e = _mm_set1_epi8(u8::MAX as i8);
20733        assert_eq_m128i(r, e);
20734    }
20735
20736    #[simd_test(enable = "avx512bw,avx512vl")]
20737    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
20738        let a = _mm_set1_epi16(i16::MAX);
20739        let mut r = _mm_set1_epi8(0);
20740        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
20741        #[rustfmt::skip]
20742        let e = _mm_set_epi8(
20743            0, 0, 0, 0,
20744            0, 0, 0, 0,
20745            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
20746            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
20747        );
20748        assert_eq_m128i(r, e);
20749    }
20750}