1use crate::arch::asm;
2use crate::core_arch::x86::*;
3
4#[cfg(test)]
5use stdarch_test::assert_instr;
6
7#[inline]
13#[target_feature(enable = "avxneconvert")]
14#[cfg_attr(
15 all(test, any(target_os = "linux", target_env = "msvc")),
16 assert_instr(vbcstnebf162ps)
17)]
18#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
19pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 {
20 bcstnebf162ps_128(a)
21}
22
23#[inline]
29#[target_feature(enable = "avxneconvert")]
30#[cfg_attr(
31 all(test, any(target_os = "linux", target_env = "msvc")),
32 assert_instr(vbcstnebf162ps)
33)]
34#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
35pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 {
36 bcstnebf162ps_256(a)
37}
38
39#[inline]
45#[target_feature(enable = "avxneconvert")]
46#[cfg_attr(
47 all(test, any(target_os = "linux", target_env = "msvc")),
48 assert_instr(vbcstnesh2ps)
49)]
50#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
51pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 {
52 bcstnesh2ps_128(a)
53}
54
55#[inline]
61#[target_feature(enable = "avxneconvert")]
62#[cfg_attr(
63 all(test, any(target_os = "linux", target_env = "msvc")),
64 assert_instr(vbcstnesh2ps)
65)]
66#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
67pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 {
68 bcstnesh2ps_256(a)
69}
70
71#[inline]
76#[target_feature(enable = "avxneconvert")]
77#[cfg_attr(
78 all(test, any(target_os = "linux", target_env = "msvc")),
79 assert_instr(vcvtneebf162ps)
80)]
81#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
82pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 {
83 transmute(cvtneebf162ps_128(a))
84}
85
86#[inline]
91#[target_feature(enable = "avxneconvert")]
92#[cfg_attr(
93 all(test, any(target_os = "linux", target_env = "msvc")),
94 assert_instr(vcvtneebf162ps)
95)]
96#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
97pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 {
98 transmute(cvtneebf162ps_256(a))
99}
100
101#[inline]
106#[target_feature(enable = "avxneconvert")]
107#[cfg_attr(
108 all(test, any(target_os = "linux", target_env = "msvc")),
109 assert_instr(vcvtneeph2ps)
110)]
111#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
112pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 {
113 transmute(cvtneeph2ps_128(a))
114}
115
116#[inline]
121#[target_feature(enable = "avxneconvert")]
122#[cfg_attr(
123 all(test, any(target_os = "linux", target_env = "msvc")),
124 assert_instr(vcvtneeph2ps)
125)]
126#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
127pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 {
128 transmute(cvtneeph2ps_256(a))
129}
130
131#[inline]
136#[target_feature(enable = "avxneconvert")]
137#[cfg_attr(
138 all(test, any(target_os = "linux", target_env = "msvc")),
139 assert_instr(vcvtneobf162ps)
140)]
141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
142pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 {
143 transmute(cvtneobf162ps_128(a))
144}
145
146#[inline]
151#[target_feature(enable = "avxneconvert")]
152#[cfg_attr(
153 all(test, any(target_os = "linux", target_env = "msvc")),
154 assert_instr(vcvtneobf162ps)
155)]
156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
157pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 {
158 transmute(cvtneobf162ps_256(a))
159}
160
161#[inline]
166#[target_feature(enable = "avxneconvert")]
167#[cfg_attr(
168 all(test, any(target_os = "linux", target_env = "msvc")),
169 assert_instr(vcvtneoph2ps)
170)]
171#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
172pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 {
173 transmute(cvtneoph2ps_128(a))
174}
175
176#[inline]
181#[target_feature(enable = "avxneconvert")]
182#[cfg_attr(
183 all(test, any(target_os = "linux", target_env = "msvc")),
184 assert_instr(vcvtneoph2ps)
185)]
186#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
187pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
188 transmute(cvtneoph2ps_256(a))
189}
190
191#[inline]
196#[target_feature(enable = "avxneconvert")]
197#[cfg_attr(
198 all(test, any(target_os = "linux", target_env = "msvc")),
199 assert_instr(vcvtneps2bf16)
200)]
201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
202pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
203 let mut dst: __m128bh;
204 asm!(
205 "{{vex}}vcvtneps2bf16 {dst},{src}",
206 dst = lateout(xmm_reg) dst,
207 src = in(xmm_reg) a,
208 options(pure, nomem, nostack, preserves_flags)
209 );
210 dst
211}
212
213#[inline]
218#[target_feature(enable = "avxneconvert")]
219#[cfg_attr(
220 all(test, any(target_os = "linux", target_env = "msvc")),
221 assert_instr(vcvtneps2bf16)
222)]
223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
224pub unsafe fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
225 let mut dst: __m128bh;
226 asm!(
227 "{{vex}}vcvtneps2bf16 {dst},{src}",
228 dst = lateout(xmm_reg) dst,
229 src = in(ymm_reg) a,
230 options(pure, nomem, nostack, preserves_flags)
231 );
232 dst
233}
234
235#[allow(improper_ctypes)]
236extern "C" {
237 #[link_name = "llvm.x86.vbcstnebf162ps128"]
238 fn bcstnebf162ps_128(a: *const bf16) -> __m128;
239 #[link_name = "llvm.x86.vbcstnebf162ps256"]
240 fn bcstnebf162ps_256(a: *const bf16) -> __m256;
241 #[link_name = "llvm.x86.vbcstnesh2ps128"]
242 fn bcstnesh2ps_128(a: *const f16) -> __m128;
243 #[link_name = "llvm.x86.vbcstnesh2ps256"]
244 fn bcstnesh2ps_256(a: *const f16) -> __m256;
245
246 #[link_name = "llvm.x86.vcvtneebf162ps128"]
247 fn cvtneebf162ps_128(a: *const __m128bh) -> __m128;
248 #[link_name = "llvm.x86.vcvtneebf162ps256"]
249 fn cvtneebf162ps_256(a: *const __m256bh) -> __m256;
250 #[link_name = "llvm.x86.vcvtneeph2ps128"]
251 fn cvtneeph2ps_128(a: *const __m128h) -> __m128;
252 #[link_name = "llvm.x86.vcvtneeph2ps256"]
253 fn cvtneeph2ps_256(a: *const __m256h) -> __m256;
254
255 #[link_name = "llvm.x86.vcvtneobf162ps128"]
256 fn cvtneobf162ps_128(a: *const __m128bh) -> __m128;
257 #[link_name = "llvm.x86.vcvtneobf162ps256"]
258 fn cvtneobf162ps_256(a: *const __m256bh) -> __m256;
259 #[link_name = "llvm.x86.vcvtneoph2ps128"]
260 fn cvtneoph2ps_128(a: *const __m128h) -> __m128;
261 #[link_name = "llvm.x86.vcvtneoph2ps256"]
262 fn cvtneoph2ps_256(a: *const __m256h) -> __m256;
263}
264
265#[cfg(test)]
266mod tests {
267 use crate::core_arch::simd::{u16x4, u16x8};
268 use crate::core_arch::x86::*;
269 use crate::mem::transmute_copy;
270 use std::ptr::addr_of;
271 use stdarch_test::simd_test;
272
273 const BF16_ONE: u16 = 0b0_01111111_0000000;
274 const BF16_TWO: u16 = 0b0_10000000_0000000;
275 const BF16_THREE: u16 = 0b0_10000000_1000000;
276 const BF16_FOUR: u16 = 0b0_10000001_0000000;
277 const BF16_FIVE: u16 = 0b0_10000001_0100000;
278 const BF16_SIX: u16 = 0b0_10000001_1000000;
279 const BF16_SEVEN: u16 = 0b0_10000001_1100000;
280 const BF16_EIGHT: u16 = 0b0_10000010_0000000;
281
282 #[simd_test(enable = "avxneconvert")]
283 unsafe fn test_mm_bcstnebf16_ps() {
284 let a = bf16::from_bits(BF16_ONE);
285 let r = _mm_bcstnebf16_ps(addr_of!(a));
286 let e = _mm_set_ps(1., 1., 1., 1.);
287 assert_eq_m128(r, e);
288 }
289
290 #[simd_test(enable = "avxneconvert")]
291 unsafe fn test_mm256_bcstnebf16_ps() {
292 let a = bf16::from_bits(BF16_ONE);
293 let r = _mm256_bcstnebf16_ps(addr_of!(a));
294 let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
295 assert_eq_m256(r, e);
296 }
297
298 #[simd_test(enable = "avxneconvert")]
299 unsafe fn test_mm_bcstnesh_ps() {
300 let a = 1.0_f16;
301 let r = _mm_bcstnesh_ps(addr_of!(a));
302 let e = _mm_set_ps(1., 1., 1., 1.);
303 assert_eq_m128(r, e);
304 }
305
306 #[simd_test(enable = "avxneconvert")]
307 unsafe fn test_mm256_bcstnesh_ps() {
308 let a = 1.0_f16;
309 let r = _mm256_bcstnesh_ps(addr_of!(a));
310 let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
311 assert_eq_m256(r, e);
312 }
313
314 #[simd_test(enable = "avxneconvert")]
315 unsafe fn test_mm_cvtneebf16_ps() {
316 let a = __m128bh([
317 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
318 ]);
319 let r = _mm_cvtneebf16_ps(addr_of!(a));
320 let e = _mm_setr_ps(1., 3., 5., 7.);
321 assert_eq_m128(r, e);
322 }
323
324 #[simd_test(enable = "avxneconvert")]
325 unsafe fn test_mm256_cvtneebf16_ps() {
326 let a = __m256bh([
327 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
328 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
329 ]);
330 let r = _mm256_cvtneebf16_ps(addr_of!(a));
331 let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.);
332 assert_eq_m256(r, e);
333 }
334
335 #[simd_test(enable = "avxneconvert")]
336 unsafe fn test_mm_cvtneeph_ps() {
337 let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
338 let r = _mm_cvtneeph_ps(addr_of!(a));
339 let e = _mm_setr_ps(1., 3., 5., 7.);
340 assert_eq_m128(r, e);
341 }
342
343 #[simd_test(enable = "avxneconvert")]
344 unsafe fn test_mm256_cvtneeph_ps() {
345 let a = __m256h([
346 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
347 ]);
348 let r = _mm256_cvtneeph_ps(addr_of!(a));
349 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
350 assert_eq_m256(r, e);
351 }
352
353 #[simd_test(enable = "avxneconvert")]
354 unsafe fn test_mm_cvtneobf16_ps() {
355 let a = __m128bh([
356 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
357 ]);
358 let r = _mm_cvtneobf16_ps(addr_of!(a));
359 let e = _mm_setr_ps(2., 4., 6., 8.);
360 assert_eq_m128(r, e);
361 }
362
363 #[simd_test(enable = "avxneconvert")]
364 unsafe fn test_mm256_cvtneobf16_ps() {
365 let a = __m256bh([
366 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
367 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
368 ]);
369 let r = _mm256_cvtneobf16_ps(addr_of!(a));
370 let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.);
371 assert_eq_m256(r, e);
372 }
373
374 #[simd_test(enable = "avxneconvert")]
375 unsafe fn test_mm_cvtneoph_ps() {
376 let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
377 let r = _mm_cvtneoph_ps(addr_of!(a));
378 let e = _mm_setr_ps(2., 4., 6., 8.);
379 assert_eq_m128(r, e);
380 }
381
382 #[simd_test(enable = "avxneconvert")]
383 unsafe fn test_mm256_cvtneoph_ps() {
384 let a = __m256h([
385 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
386 ]);
387 let r = _mm256_cvtneoph_ps(addr_of!(a));
388 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
389 assert_eq_m256(r, e);
390 }
391
392 #[simd_test(enable = "avxneconvert")]
393 unsafe fn test_mm_cvtneps_avx_pbh() {
394 let a = _mm_setr_ps(1., 2., 3., 4.);
395 let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a));
396 let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
397 assert_eq!(r, e);
398 }
399
400 #[simd_test(enable = "avxneconvert")]
401 unsafe fn test_mm256_cvtneps_avx_pbh() {
402 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
403 let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a));
404 let e = u16x8::new(
405 BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
406 );
407 assert_eq!(r, e);
408 }
409}