core/stdarch/crates/core_arch/src/x86_64/
avx512fp16.rs

1use crate::core_arch::x86::*;
2#[cfg(test)]
3use stdarch_test::assert_instr;
4
5/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
6/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
7/// of dst.
8///
9/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sh)
10#[inline]
11#[target_feature(enable = "avx512fp16")]
12#[cfg_attr(test, assert_instr(vcvtsi2sh))]
13#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
14pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
15    vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
16}
17
18/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
19/// result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements
20/// of dst.
21///
22/// Rounding is done according to the rounding parameter, which can be one of:
23///
24/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
25/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
26/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
27/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
28/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
29///
30/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sh)
31#[inline]
32#[target_feature(enable = "avx512fp16")]
33#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
34#[rustc_legacy_const_generics(2)]
35#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
36pub unsafe fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
37    static_assert_rounding!(ROUNDING);
38    vcvtsi642sh(a, b, ROUNDING)
39}
40
41/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
42/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
43/// of dst.
44///
45/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sh)
46#[inline]
47#[target_feature(enable = "avx512fp16")]
48#[cfg_attr(test, assert_instr(vcvtusi2sh))]
49#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
50pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
51    vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
52}
53
54/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
55/// result in the lower element of dst, and copy the upper 1 packed elements from a to the upper elements
56/// of dst.
57///
58/// Rounding is done according to the rounding parameter, which can be one of:
59///
60/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
61/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
62/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
63/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
64/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
65///
66/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sh)
67#[inline]
68#[target_feature(enable = "avx512fp16")]
69#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
70#[rustc_legacy_const_generics(2)]
71#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
72pub unsafe fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
73    static_assert_rounding!(ROUNDING);
74    vcvtusi642sh(a, b, ROUNDING)
75}
76
77/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
78/// the result in dst.
79///
80/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
81#[inline]
82#[target_feature(enable = "avx512fp16")]
83#[cfg_attr(test, assert_instr(vcvtsh2si))]
84#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
85pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 {
86    vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION)
87}
88
89/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
90/// the result in dst.
91///
92/// Rounding is done according to the rounding parameter, which can be one of:
93///
94/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
95/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
96/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
97/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
98/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
99///
100/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
101#[inline]
102#[target_feature(enable = "avx512fp16")]
103#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
104#[rustc_legacy_const_generics(1)]
105#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
106pub unsafe fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
107    static_assert_rounding!(ROUNDING);
108    vcvtsh2si64(a, ROUNDING)
109}
110
111/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
112/// the result in dst.
113///
114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
115#[inline]
116#[target_feature(enable = "avx512fp16")]
117#[cfg_attr(test, assert_instr(vcvtsh2usi))]
118#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
119pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 {
120    vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
121}
122
123/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
124/// the result in dst.
125///
126/// Rounding is done according to the rounding parameter, which can be one of:
127///
128/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
129/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
130/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
131/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
132/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
133///
134/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
135#[inline]
136#[target_feature(enable = "avx512fp16")]
137#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
138#[rustc_legacy_const_generics(1)]
139#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
140pub unsafe fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
141    static_assert_rounding!(ROUNDING);
142    vcvtsh2usi64(a, ROUNDING)
143}
144
145/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
146/// and store the result in dst.
147///
148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
149#[inline]
150#[target_feature(enable = "avx512fp16")]
151#[cfg_attr(test, assert_instr(vcvttsh2si))]
152#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
153pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 {
154    vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION)
155}
156
157/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
158/// and store the result in dst.
159///
160/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
161///
162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
163#[inline]
164#[target_feature(enable = "avx512fp16")]
165#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
166#[rustc_legacy_const_generics(1)]
167#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
168pub unsafe fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
169    static_assert_sae!(SAE);
170    vcvttsh2si64(a, SAE)
171}
172
173/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
174/// and store the result in dst.
175///
176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
177#[inline]
178#[target_feature(enable = "avx512fp16")]
179#[cfg_attr(test, assert_instr(vcvttsh2usi))]
180#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
181pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 {
182    vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
183}
184
185/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
186/// and store the result in dst.
187///
188/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
189///
190/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64)
191#[inline]
192#[target_feature(enable = "avx512fp16")]
193#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
194#[rustc_legacy_const_generics(1)]
195#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
196pub unsafe fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
197    static_assert_sae!(SAE);
198    vcvttsh2usi64(a, SAE)
199}
200
201#[allow(improper_ctypes)]
202extern "C" {
203    #[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"]
204    fn vcvtsi642sh(a: __m128h, b: i64, rounding: i32) -> __m128h;
205    #[link_name = "llvm.x86.avx512fp16.vcvtusi642sh"]
206    fn vcvtusi642sh(a: __m128h, b: u64, rounding: i32) -> __m128h;
207    #[link_name = "llvm.x86.avx512fp16.vcvtsh2si64"]
208    fn vcvtsh2si64(a: __m128h, rounding: i32) -> i64;
209    #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi64"]
210    fn vcvtsh2usi64(a: __m128h, rounding: i32) -> u64;
211    #[link_name = "llvm.x86.avx512fp16.vcvttsh2si64"]
212    fn vcvttsh2si64(a: __m128h, sae: i32) -> i64;
213    #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi64"]
214    fn vcvttsh2usi64(a: __m128h, sae: i32) -> u64;
215}
216
217#[cfg(test)]
218mod tests {
219    use crate::core_arch::{x86::*, x86_64::*};
220    use stdarch_test::simd_test;
221
222    #[simd_test(enable = "avx512fp16")]
223    unsafe fn test_mm_cvti64_sh() {
224        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
225        let r = _mm_cvti64_sh(a, 10);
226        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
227        assert_eq_m128h(r, e);
228    }
229
230    #[simd_test(enable = "avx512fp16")]
231    unsafe fn test_mm_cvt_roundi64_sh() {
232        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
233        let r = _mm_cvt_roundi64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
234        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
235        assert_eq_m128h(r, e);
236    }
237
238    #[simd_test(enable = "avx512fp16")]
239    unsafe fn test_mm_cvtu64_sh() {
240        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
241        let r = _mm_cvtu64_sh(a, 10);
242        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
243        assert_eq_m128h(r, e);
244    }
245
246    #[simd_test(enable = "avx512fp16")]
247    unsafe fn test_mm_cvt_roundu64_sh() {
248        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
249        let r = _mm_cvt_roundu64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
250        let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
251        assert_eq_m128h(r, e);
252    }
253
254    #[simd_test(enable = "avx512fp16")]
255    unsafe fn test_mm_cvtsh_i64() {
256        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
257        let r = _mm_cvtsh_i64(a);
258        assert_eq!(r, 1);
259    }
260
261    #[simd_test(enable = "avx512fp16")]
262    unsafe fn test_mm_cvt_roundsh_i64() {
263        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
264        let r = _mm_cvt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
265        assert_eq!(r, 1);
266    }
267
268    #[simd_test(enable = "avx512fp16")]
269    unsafe fn test_mm_cvtsh_u64() {
270        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
271        let r = _mm_cvtsh_u64(a);
272        assert_eq!(r, 1);
273    }
274
275    #[simd_test(enable = "avx512fp16")]
276    unsafe fn test_mm_cvt_roundsh_u64() {
277        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
278        let r = _mm_cvt_roundsh_u64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
279        assert_eq!(r, 1);
280    }
281
282    #[simd_test(enable = "avx512fp16")]
283    unsafe fn test_mm_cvttsh_i64() {
284        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
285        let r = _mm_cvttsh_i64(a);
286        assert_eq!(r, 1);
287    }
288
289    #[simd_test(enable = "avx512fp16")]
290    unsafe fn test_mm_cvtt_roundsh_i64() {
291        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
292        let r = _mm_cvtt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
293        assert_eq!(r, 1);
294    }
295
296    #[simd_test(enable = "avx512fp16")]
297    unsafe fn test_mm_cvttsh_u64() {
298        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
299        let r = _mm_cvttsh_u64(a);
300        assert_eq!(r, 1);
301    }
302
303    #[simd_test(enable = "avx512fp16")]
304    unsafe fn test_mm_cvtt_roundsh_u64() {
305        let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
306        let r = _mm_cvtt_roundsh_u64::<_MM_FROUND_NO_EXC>(a);
307        assert_eq!(r, 1);
308    }
309}