core/portable-simd/crates/core_simd/src/
ops.rs

1use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
2use core::ops::{Add, Mul};
3use core::ops::{BitAnd, BitOr, BitXor};
4use core::ops::{Div, Rem, Sub};
5use core::ops::{Shl, Shr};
6
7mod assign;
8mod deref;
9mod shift_scalar;
10mod unary;
11
12impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
13where
14    T: SimdElement,
15    LaneCount<N>: SupportedLaneCount,
16    I: core::slice::SliceIndex<[T]>,
17{
18    type Output = I::Output;
19    #[inline]
20    fn index(&self, index: I) -> &Self::Output {
21        &self.as_array()[index]
22    }
23}
24
25impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
26where
27    T: SimdElement,
28    LaneCount<N>: SupportedLaneCount,
29    I: core::slice::SliceIndex<[T]>,
30{
31    #[inline]
32    fn index_mut(&mut self, index: I) -> &mut Self::Output {
33        &mut self.as_mut_array()[index]
34    }
35}
36
37macro_rules! unsafe_base {
38    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
39        // Safety: $lhs and $rhs are vectors
40        unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
41    };
42}
43
44/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
45/// It handles performing a bitand in addition to calling the shift operator, so that the result
46/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
47/// At worst, this will maybe add another instruction and cycle,
48/// at best, it may open up more optimization opportunities,
49/// or simply be elided entirely, especially for SIMD ISAs which default to this.
50///
51// FIXME: Consider implementing this in cg_llvm instead?
52// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
53macro_rules! wrap_bitshift {
54    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
55        #[allow(clippy::suspicious_arithmetic_impl)]
56        // Safety: $lhs and the bitand result are vectors
57        unsafe {
58            core::intrinsics::simd::$simd_call(
59                $lhs,
60                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
61            )
62        }
63    };
64}
65
66/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
67/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
68/// thus guaranteeing a Rust value returns instead.
69///
70/// |                  | LLVM | Rust
71/// | :--------------: | :--- | :----------
72/// | N {/,%} 0        | UB   | panic!()
73/// | <$int>::MIN / -1 | UB   | <$int>::MIN
74/// | <$int>::MIN % -1 | UB   | 0
75///
76macro_rules! int_divrem_guard {
77    (   $lhs:ident,
78        $rhs:ident,
79        {   const PANIC_ZERO: &'static str = $zero:literal;
80            $simd_call:ident, $op:tt
81        },
82        $int:ident ) => {
83        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
84            panic!($zero);
85        } else {
86            // Prevent otherwise-UB overflow on the MIN / -1 case.
87            let rhs = if <$int>::MIN != 0 {
88                // This should, at worst, optimize to a few branchless logical ops
89                // Ideally, this entire conditional should evaporate
90                // Fire LLVM and implement those manually if it doesn't get the hint
91                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
92                // type inference can break here, so cut an SInt to size
93                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
94                .select(Simd::splat(1 as _), $rhs)
95            } else {
96                // Nice base case to make it easy to const-fold away the other branch.
97                $rhs
98            };
99
100            // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
101            // these operations aren't vectorized on aarch64 anyway
102            #[cfg(target_arch = "aarch64")]
103            {
104                let mut out = Simd::splat(0 as _);
105                for i in 0..Self::LEN {
106                    out[i] = $lhs[i] $op rhs[i];
107                }
108                out
109            }
110
111            #[cfg(not(target_arch = "aarch64"))]
112            {
113                // Safety: $lhs and rhs are vectors
114                unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
115            }
116        }
117    };
118}
119
120macro_rules! for_base_types {
121    (   T = ($($scalar:ident),*);
122        type Lhs = Simd<T, N>;
123        type Rhs = Simd<T, N>;
124        type Output = $out:ty;
125
126        impl $op:ident::$call:ident {
127            $macro_impl:ident $inner:tt
128        }) => {
129            $(
130                impl<const N: usize> $op<Self> for Simd<$scalar, N>
131                where
132                    $scalar: SimdElement,
133                    LaneCount<N>: SupportedLaneCount,
134                {
135                    type Output = $out;
136
137                    #[inline]
138                    #[must_use = "operator returns a new vector without mutating the inputs"]
139                    // TODO: only useful for int Div::div, but we hope that this
140                    // will essentially always get inlined anyway.
141                    #[track_caller]
142                    fn $call(self, rhs: Self) -> Self::Output {
143                        $macro_impl!(self, rhs, $inner, $scalar)
144                    }
145                }
146            )*
147    }
148}
149
150// A "TokenTree muncher": takes a set of scalar types `T = {};`
151// type parameters for the ops it implements, `Op::fn` names,
152// and a macro that expands into an expr, substituting in an intrinsic.
153// It passes that to for_base_types, which expands an impl for the types,
154// using the expanded expr in the function, and recurses with itself.
155//
156// tl;dr impls a set of ops::{Traits} for a set of types
157macro_rules! for_base_ops {
158    (
159        T = $types:tt;
160        type Lhs = Simd<T, N>;
161        type Rhs = Simd<T, N>;
162        type Output = $out:ident;
163        impl $op:ident::$call:ident
164            $inner:tt
165        $($rest:tt)*
166    ) => {
167        for_base_types! {
168            T = $types;
169            type Lhs = Simd<T, N>;
170            type Rhs = Simd<T, N>;
171            type Output = $out;
172            impl $op::$call
173                $inner
174        }
175        for_base_ops! {
176            T = $types;
177            type Lhs = Simd<T, N>;
178            type Rhs = Simd<T, N>;
179            type Output = $out;
180            $($rest)*
181        }
182    };
183    ($($done:tt)*) => {
184        // Done.
185    }
186}
187
188// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
189// For all of these operations, simd_* intrinsics apply wrapping logic.
190for_base_ops! {
191    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
192    type Lhs = Simd<T, N>;
193    type Rhs = Simd<T, N>;
194    type Output = Self;
195
196    impl Add::add {
197        unsafe_base { simd_add }
198    }
199
200    impl Mul::mul {
201        unsafe_base { simd_mul }
202    }
203
204    impl Sub::sub {
205        unsafe_base { simd_sub }
206    }
207
208    impl BitAnd::bitand {
209        unsafe_base { simd_and }
210    }
211
212    impl BitOr::bitor {
213        unsafe_base { simd_or }
214    }
215
216    impl BitXor::bitxor {
217        unsafe_base { simd_xor }
218    }
219
220    impl Div::div {
221        int_divrem_guard {
222            const PANIC_ZERO: &'static str = "attempt to divide by zero";
223            simd_div, /
224        }
225    }
226
227    impl Rem::rem {
228        int_divrem_guard {
229            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
230            simd_rem, %
231        }
232    }
233
234    // The only question is how to handle shifts >= <Int>::BITS?
235    // Our current solution uses wrapping logic.
236    impl Shl::shl {
237        wrap_bitshift { simd_shl }
238    }
239
240    impl Shr::shr {
241        wrap_bitshift {
242            // This automatically monomorphizes to lshr or ashr, depending,
243            // so it's fine to use it for both UInts and SInts.
244            simd_shr
245        }
246    }
247}
248
249// We don't need any special precautions here:
250// Floats always accept arithmetic ops, but may become NaN.
251for_base_ops! {
252    T = (f32, f64);
253    type Lhs = Simd<T, N>;
254    type Rhs = Simd<T, N>;
255    type Output = Self;
256
257    impl Add::add {
258        unsafe_base { simd_add }
259    }
260
261    impl Mul::mul {
262        unsafe_base { simd_mul }
263    }
264
265    impl Sub::sub {
266        unsafe_base { simd_sub }
267    }
268
269    impl Div::div {
270        unsafe_base { simd_div }
271    }
272
273    impl Rem::rem {
274        unsafe_base { simd_rem }
275    }
276}