core/stdarch/crates/core_arch/src/
simd.rs

1//! Internal `#[repr(simd)]` types
2
3#![allow(non_camel_case_types)]
4
5macro_rules! simd_ty {
6    ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
7        #[repr(simd)]
8        #[derive(Copy, Clone, Debug, PartialEq)]
9        pub(crate) struct $id([$elem_type; $len]);
10
11        #[allow(clippy::use_self)]
12        impl $id {
13            /// A value of this type where all elements are zeroed out.
14            pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
15
16            #[inline(always)]
17            pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
18                $id([$($param_name),*])
19            }
20            // FIXME: Workaround rust@60637
21            #[inline(always)]
22            pub(crate) fn splat(value: $elem_type) -> Self {
23                #[derive(Copy, Clone)]
24                #[repr(simd)]
25                struct JustOne([$elem_type; 1]);
26                let one = JustOne([value]);
27                // SAFETY: 0 is always in-bounds because we're shuffling
28                // a simd type with exactly one element.
29                unsafe { simd_shuffle!(one, one, [0; $len]) }
30            }
31
32            /// Extract the element at position `index`.
33            /// `index` is not a constant so this is not efficient!
34            /// Use for testing only.
35            // FIXME: Workaround rust@60637
36            #[inline(always)]
37            pub(crate) fn extract(self, index: usize) -> $elem_type {
38                assert!(index < $len);
39                // Now that we know this is in-bounds, use pointer arithmetic to access the right element.
40                let self_ptr = &self as *const Self as *const $elem_type;
41                unsafe {
42                    self_ptr.add(index).read()
43                }
44            }
45        }
46    }
47}
48
49macro_rules! simd_m_ty {
50    ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
51        #[repr(simd)]
52        #[derive(Copy, Clone, Debug, PartialEq)]
53        pub(crate) struct $id([$elem_type; $len]);
54
55        #[allow(clippy::use_self)]
56        impl $id {
57            #[inline(always)]
58            const fn bool_to_internal(x: bool) -> $elem_type {
59                [0 as $elem_type, !(0 as $elem_type)][x as usize]
60            }
61
62            #[inline(always)]
63            pub(crate) const fn new($($param_name: bool),*) -> Self {
64                $id([$(Self::bool_to_internal($param_name)),*])
65            }
66
67            // FIXME: Workaround rust@60637
68            #[inline(always)]
69            pub(crate) fn splat(value: bool) -> Self {
70                #[derive(Copy, Clone)]
71                #[repr(simd)]
72                struct JustOne([$elem_type; 1]);
73                let one = JustOne([Self::bool_to_internal(value)]);
74                // SAFETY: 0 is always in-bounds because we're shuffling
75                // a simd type with exactly one element.
76                unsafe { simd_shuffle!(one, one, [0; $len]) }
77            }
78        }
79    }
80}
81
82// 16-bit wide types:
83
84simd_ty!(u8x2[u8;2]: x0, x1);
85simd_ty!(i8x2[i8;2]: x0, x1);
86
87// 32-bit wide types:
88
89simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
90simd_ty!(u16x2[u16;2]: x0, x1);
91
92simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
93simd_ty!(i16x2[i16;2]: x0, x1);
94
95// 64-bit wide types:
96
97simd_ty!(
98    u8x8[u8;8]:
99    x0,
100    x1,
101    x2,
102    x3,
103    x4,
104    x5,
105    x6,
106    x7
107);
108simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
109simd_ty!(u32x2[u32;2]: x0, x1);
110simd_ty!(u64x1[u64;1]: x1);
111
112simd_ty!(
113    i8x8[i8;8]:
114    x0,
115    x1,
116    x2,
117    x3,
118    x4,
119    x5,
120    x6,
121    x7
122);
123simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
124simd_ty!(i32x2[i32;2]: x0, x1);
125simd_ty!(i64x1[i64;1]: x1);
126
127simd_ty!(f32x2[f32;2]: x0, x1);
128simd_ty!(f64x1[f64;1]: x1);
129
130// 128-bit wide types:
131
132simd_ty!(
133    u8x16[u8;16]:
134    x0,
135    x1,
136    x2,
137    x3,
138    x4,
139    x5,
140    x6,
141    x7,
142    x8,
143    x9,
144    x10,
145    x11,
146    x12,
147    x13,
148    x14,
149    x15
150);
151simd_ty!(
152    u16x8[u16;8]:
153    x0,
154    x1,
155    x2,
156    x3,
157    x4,
158    x5,
159    x6,
160    x7
161);
162simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
163simd_ty!(u64x2[u64;2]: x0, x1);
164
165simd_ty!(
166    i8x16[i8;16]:
167    x0,
168    x1,
169    x2,
170    x3,
171    x4,
172    x5,
173    x6,
174    x7,
175    x8,
176    x9,
177    x10,
178    x11,
179    x12,
180    x13,
181    x14,
182    x15
183);
184simd_ty!(
185    i16x8[i16;8]:
186    x0,
187    x1,
188    x2,
189    x3,
190    x4,
191    x5,
192    x6,
193    x7
194);
195simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
196simd_ty!(i64x2[i64;2]: x0, x1);
197
198simd_ty!(
199    f16x8[f16;8]:
200    x0,
201    x1,
202    x2,
203    x3,
204    x4,
205    x5,
206    x6,
207    x7
208);
209simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
210simd_ty!(f64x2[f64;2]: x0, x1);
211
212simd_m_ty!(
213    m8x16[i8;16]:
214    x0,
215    x1,
216    x2,
217    x3,
218    x4,
219    x5,
220    x6,
221    x7,
222    x8,
223    x9,
224    x10,
225    x11,
226    x12,
227    x13,
228    x14,
229    x15
230);
231simd_m_ty!(
232    m16x8[i16;8]:
233    x0,
234    x1,
235    x2,
236    x3,
237    x4,
238    x5,
239    x6,
240    x7
241);
242simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
243simd_m_ty!(m64x2[i64;2]: x0, x1);
244
245// 256-bit wide types:
246
247simd_ty!(
248    u8x32[u8;32]:
249    x0,
250    x1,
251    x2,
252    x3,
253    x4,
254    x5,
255    x6,
256    x7,
257    x8,
258    x9,
259    x10,
260    x11,
261    x12,
262    x13,
263    x14,
264    x15,
265    x16,
266    x17,
267    x18,
268    x19,
269    x20,
270    x21,
271    x22,
272    x23,
273    x24,
274    x25,
275    x26,
276    x27,
277    x28,
278    x29,
279    x30,
280    x31
281);
282simd_ty!(
283    u16x16[u16;16]:
284    x0,
285    x1,
286    x2,
287    x3,
288    x4,
289    x5,
290    x6,
291    x7,
292    x8,
293    x9,
294    x10,
295    x11,
296    x12,
297    x13,
298    x14,
299    x15
300);
301simd_ty!(
302    u32x8[u32;8]:
303    x0,
304    x1,
305    x2,
306    x3,
307    x4,
308    x5,
309    x6,
310    x7
311);
312simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
313
314simd_ty!(
315    i8x32[i8;32]:
316    x0,
317    x1,
318    x2,
319    x3,
320    x4,
321    x5,
322    x6,
323    x7,
324    x8,
325    x9,
326    x10,
327    x11,
328    x12,
329    x13,
330    x14,
331    x15,
332    x16,
333    x17,
334    x18,
335    x19,
336    x20,
337    x21,
338    x22,
339    x23,
340    x24,
341    x25,
342    x26,
343    x27,
344    x28,
345    x29,
346    x30,
347    x31
348);
349simd_ty!(
350    i16x16[i16;16]:
351    x0,
352    x1,
353    x2,
354    x3,
355    x4,
356    x5,
357    x6,
358    x7,
359    x8,
360    x9,
361    x10,
362    x11,
363    x12,
364    x13,
365    x14,
366    x15
367);
368simd_ty!(
369    i32x8[i32;8]:
370    x0,
371    x1,
372    x2,
373    x3,
374    x4,
375    x5,
376    x6,
377    x7
378);
379simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
380
381simd_ty!(
382    f16x16[f16;16]:
383    x0,
384    x1,
385    x2,
386    x3,
387    x4,
388    x5,
389    x6,
390    x7,
391    x8,
392    x9,
393    x10,
394    x11,
395    x12,
396    x13,
397    x14,
398    x15
399);
400simd_ty!(
401    f32x8[f32;8]:
402    x0,
403    x1,
404    x2,
405    x3,
406    x4,
407    x5,
408    x6,
409    x7
410);
411simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
412
413simd_m_ty!(
414    m8x32[i8;32]:
415    x0,
416    x1,
417    x2,
418    x3,
419    x4,
420    x5,
421    x6,
422    x7,
423    x8,
424    x9,
425    x10,
426    x11,
427    x12,
428    x13,
429    x14,
430    x15,
431    x16,
432    x17,
433    x18,
434    x19,
435    x20,
436    x21,
437    x22,
438    x23,
439    x24,
440    x25,
441    x26,
442    x27,
443    x28,
444    x29,
445    x30,
446    x31
447);
448simd_m_ty!(
449    m16x16[i16;16]:
450    x0,
451    x1,
452    x2,
453    x3,
454    x4,
455    x5,
456    x6,
457    x7,
458    x8,
459    x9,
460    x10,
461    x11,
462    x12,
463    x13,
464    x14,
465    x15
466);
467simd_m_ty!(
468    m32x8[i32;8]:
469    x0,
470    x1,
471    x2,
472    x3,
473    x4,
474    x5,
475    x6,
476    x7
477);
478
479// 512-bit wide types:
480
481simd_ty!(
482    i8x64[i8;64]:
483    x0,
484    x1,
485    x2,
486    x3,
487    x4,
488    x5,
489    x6,
490    x7,
491    x8,
492    x9,
493    x10,
494    x11,
495    x12,
496    x13,
497    x14,
498    x15,
499    x16,
500    x17,
501    x18,
502    x19,
503    x20,
504    x21,
505    x22,
506    x23,
507    x24,
508    x25,
509    x26,
510    x27,
511    x28,
512    x29,
513    x30,
514    x31,
515    x32,
516    x33,
517    x34,
518    x35,
519    x36,
520    x37,
521    x38,
522    x39,
523    x40,
524    x41,
525    x42,
526    x43,
527    x44,
528    x45,
529    x46,
530    x47,
531    x48,
532    x49,
533    x50,
534    x51,
535    x52,
536    x53,
537    x54,
538    x55,
539    x56,
540    x57,
541    x58,
542    x59,
543    x60,
544    x61,
545    x62,
546    x63
547);
548
549simd_ty!(
550    u8x64[u8;64]:
551    x0,
552    x1,
553    x2,
554    x3,
555    x4,
556    x5,
557    x6,
558    x7,
559    x8,
560    x9,
561    x10,
562    x11,
563    x12,
564    x13,
565    x14,
566    x15,
567    x16,
568    x17,
569    x18,
570    x19,
571    x20,
572    x21,
573    x22,
574    x23,
575    x24,
576    x25,
577    x26,
578    x27,
579    x28,
580    x29,
581    x30,
582    x31,
583    x32,
584    x33,
585    x34,
586    x35,
587    x36,
588    x37,
589    x38,
590    x39,
591    x40,
592    x41,
593    x42,
594    x43,
595    x44,
596    x45,
597    x46,
598    x47,
599    x48,
600    x49,
601    x50,
602    x51,
603    x52,
604    x53,
605    x54,
606    x55,
607    x56,
608    x57,
609    x58,
610    x59,
611    x60,
612    x61,
613    x62,
614    x63
615);
616
617simd_ty!(
618    i16x32[i16;32]:
619    x0,
620    x1,
621    x2,
622    x3,
623    x4,
624    x5,
625    x6,
626    x7,
627    x8,
628    x9,
629    x10,
630    x11,
631    x12,
632    x13,
633    x14,
634    x15,
635    x16,
636    x17,
637    x18,
638    x19,
639    x20,
640    x21,
641    x22,
642    x23,
643    x24,
644    x25,
645    x26,
646    x27,
647    x28,
648    x29,
649    x30,
650    x31
651);
652
653simd_ty!(
654    u16x32[u16;32]:
655    x0,
656    x1,
657    x2,
658    x3,
659    x4,
660    x5,
661    x6,
662    x7,
663    x8,
664    x9,
665    x10,
666    x11,
667    x12,
668    x13,
669    x14,
670    x15,
671    x16,
672    x17,
673    x18,
674    x19,
675    x20,
676    x21,
677    x22,
678    x23,
679    x24,
680    x25,
681    x26,
682    x27,
683    x28,
684    x29,
685    x30,
686    x31
687);
688
689simd_ty!(
690    i32x16[i32;16]:
691    x0,
692    x1,
693    x2,
694    x3,
695    x4,
696    x5,
697    x6,
698    x7,
699    x8,
700    x9,
701    x10,
702    x11,
703    x12,
704    x13,
705    x14,
706    x15
707);
708
709simd_ty!(
710    u32x16[u32;16]:
711    x0,
712    x1,
713    x2,
714    x3,
715    x4,
716    x5,
717    x6,
718    x7,
719    x8,
720    x9,
721    x10,
722    x11,
723    x12,
724    x13,
725    x14,
726    x15
727);
728
729simd_ty!(
730    f16x32[f16;32]:
731    x0,
732    x1,
733    x2,
734    x3,
735    x4,
736    x5,
737    x6,
738    x7,
739    x8,
740    x9,
741    x10,
742    x11,
743    x12,
744    x13,
745    x14,
746    x15,
747    x16,
748    x17,
749    x18,
750    x19,
751    x20,
752    x21,
753    x22,
754    x23,
755    x24,
756    x25,
757    x26,
758    x27,
759    x28,
760    x29,
761    x30,
762    x31
763);
764simd_ty!(
765    f32x16[f32;16]:
766    x0,
767    x1,
768    x2,
769    x3,
770    x4,
771    x5,
772    x6,
773    x7,
774    x8,
775    x9,
776    x10,
777    x11,
778    x12,
779    x13,
780    x14,
781    x15
782);
783
784simd_ty!(
785    i64x8[i64;8]:
786    x0,
787    x1,
788    x2,
789    x3,
790    x4,
791    x5,
792    x6,
793    x7
794);
795
796simd_ty!(
797    u64x8[u64;8]:
798    x0,
799    x1,
800    x2,
801    x3,
802    x4,
803    x5,
804    x6,
805    x7
806);
807
808simd_ty!(
809    f64x8[f64;8]:
810    x0,
811    x1,
812    x2,
813    x3,
814    x4,
815    x5,
816    x6,
817    x7
818);
819
820// 1024-bit wide types:
821simd_ty!(
822    u16x64[u16;64]:
823    x0,
824    x1,
825    x2,
826    x3,
827    x4,
828    x5,
829    x6,
830    x7,
831    x8,
832    x9,
833    x10,
834    x11,
835    x12,
836    x13,
837    x14,
838    x15,
839    x16,
840    x17,
841    x18,
842    x19,
843    x20,
844    x21,
845    x22,
846    x23,
847    x24,
848    x25,
849    x26,
850    x27,
851    x28,
852    x29,
853    x30,
854    x31,
855    x32,
856    x33,
857    x34,
858    x35,
859    x36,
860    x37,
861    x38,
862    x39,
863    x40,
864    x41,
865    x42,
866    x43,
867    x44,
868    x45,
869    x46,
870    x47,
871    x48,
872    x49,
873    x50,
874    x51,
875    x52,
876    x53,
877    x54,
878    x55,
879    x56,
880    x57,
881    x58,
882    x59,
883    x60,
884    x61,
885    x62,
886    x63
887);
888simd_ty!(
889    i32x32[i32;32]:
890    x0,
891    x1,
892    x2,
893    x3,
894    x4,
895    x5,
896    x6,
897    x7,
898    x8,
899    x9,
900    x10,
901    x11,
902    x12,
903    x13,
904    x14,
905    x15,
906    x16,
907    x17,
908    x18,
909    x19,
910    x20,
911    x21,
912    x22,
913    x23,
914    x24,
915    x25,
916    x26,
917    x27,
918    x28,
919    x29,
920    x30,
921    x31
922);
923simd_ty!(
924    u32x32[u32;32]:
925    x0,
926    x1,
927    x2,
928    x3,
929    x4,
930    x5,
931    x6,
932    x7,
933    x8,
934    x9,
935    x10,
936    x11,
937    x12,
938    x13,
939    x14,
940    x15,
941    x16,
942    x17,
943    x18,
944    x19,
945    x20,
946    x21,
947    x22,
948    x23,
949    x24,
950    x25,
951    x26,
952    x27,
953    x28,
954    x29,
955    x30,
956    x31
957);
958
959/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
960/// were before moving to array-based simd.
961#[inline]
962pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
963    formatter: &mut crate::fmt::Formatter<'_>,
964    type_name: &str,
965    array: [T; N],
966) -> crate::fmt::Result {
967    crate::fmt::Formatter::debug_tuple_fields_finish(
968        formatter,
969        type_name,
970        &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i| &array[i]),
971    )
972}