From bdc08aeb91626171a7dc5bfbc1e0d109770ee457 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 6 Jun 2024 12:49:31 -0700 Subject: [PATCH 1/3] Reword the caveats on `array::map` Thanks to 107634 and some improvements in LLVM (particularly `dead_on_unwind`), the method actually optimizes reasonably well now. So focus the discussion on the fundamental ordering differences where the optimizer might never be able to fix it because of the different behaviour, and encouraging `Iterator::map` where an array wasn't actually ever needed. --- library/core/src/array/mod.rs | 55 ++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index 6cca2e6358b63..4a2cf15277441 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -516,20 +516,47 @@ impl [T; N] { /// /// # Note on performance and stack usage /// - /// Unfortunately, usages of this method are currently not always optimized - /// as well as they could be. This mainly concerns large arrays, as mapping - /// over small arrays seem to be optimized just fine. Also note that in - /// debug mode (i.e. without any optimizations), this method can use a lot - /// of stack space (a few times the size of the array or more). - /// - /// Therefore, in performance-critical code, try to avoid using this method - /// on large arrays or check the emitted code. Also try to avoid chained - /// maps (e.g. `arr.map(...).map(...)`). - /// - /// In many cases, you can instead use [`Iterator::map`] by calling `.iter()` - /// or `.into_iter()` on your array. `[T; N]::map` is only necessary if you - /// really need a new array of the same size as the result. Rust's lazy - /// iterators tend to get optimized very well. + /// Note that this method is *eager*. It evaluates `f` all `N` times before + /// returning the new array. + /// + /// That means that `arr.map(f).map(g)` is, in general, *not* equivalent to + /// `array.map(|x| g(f(x)))`, as the former calls `f` 4 times then `g` 4 times, + /// whereas the latter interleaves the calls (`fgfgfgfg`). + /// + /// A consequence of this is that it can have fairly-high stack usage, especially + /// in debug mode or for long arrays. The backend may be able to optimize it + /// away, but especially for complicated mappings it might not be able to. + /// + /// If you're doing a one-step `map` and really want an array as the result, + /// then absolutely use this method. Its implementation uses a bunch of tricks + /// to help the optimizer handle it well. Particularly for simple arrays, + /// like `[u8; 3]` or `[f32; 4]`, there's nothing to be concerned about. + /// + /// However, if you don't actually need an *array* of the results specifically, + /// just to process them, then you likely want [`Iterator::map`] instead. + /// + /// For example, rather than doing an array-to-array map of all the elements + /// in the array up-front and only iterating after that completes, + /// + /// ``` + /// # let my_array = [1, 2, 3]; + /// # let f = |x: i32| x + 1; + /// for x in my_array.map(f) { + /// // ... + /// } + /// ``` + /// + /// It's often better to use an iterator along the lines of + /// + /// ``` + /// # let my_array = [1, 2, 3]; + /// # let f = |x: i32| x + 1; + /// for x in my_array.into_iter().map(f) { + /// // ... + /// } + /// ``` + /// + /// as that's more likely to avoid large temporaries. /// /// /// # Examples From ef7a7809c743baff98aaa7dcea49e1fafdde190b Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 30 Jan 2026 17:50:52 +0100 Subject: [PATCH 2/3] add test for simd from array repeat codegen --- tests/codegen-llvm/simd/array-repeat.rs | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 tests/codegen-llvm/simd/array-repeat.rs diff --git a/tests/codegen-llvm/simd/array-repeat.rs b/tests/codegen-llvm/simd/array-repeat.rs new file mode 100644 index 0000000000000..691167f866626 --- /dev/null +++ b/tests/codegen-llvm/simd/array-repeat.rs @@ -0,0 +1,40 @@ +//@ add-minicore +//@ revisions: X86 AARCH64 RISCV S390X +//@ [X86] compile-flags: -Copt-level=3 --target=x86_64-unknown-linux-gnu +//@ [X86] needs-llvm-components: x86 +//@ [AARCH64] compile-flags: -Copt-level=3 --target=aarch64-unknown-linux-gnu +//@ [AARCH64] needs-llvm-components: aarch64 +//@ [RISCV] compile-flags: -Copt-level=3 --target riscv64gc-unknown-linux-gnu -Ctarget-feature=+v +//@ [RISCV] needs-llvm-components: riscv +//@ [S390X] compile-flags: -Copt-level=3 --target s390x-unknown-linux-gnu -Ctarget-feature=+vector +//@ [S390X] needs-llvm-components: systemz +#![crate_type = "lib"] +#![feature(repr_simd)] +#![feature(no_core)] +#![no_std] +#![no_core] +extern crate minicore; +use minicore::*; + +#[repr(simd)] +pub struct Simd(pub [T; N]); + +pub type u8x16 = Simd; + +// Regression test for https://github.com/rust-lang/rust/issues/97804. + +#[unsafe(no_mangle)] +fn foo(v: u16, p: &mut [u8; 16]) { + // An array repeat transmuted into a SIMD type should emit a canonical LLVM splat sequence: + // + // CHECK-LABEL: foo + // CHECK: start + // CHECK-NEXT: %0 = insertelement <8 x i16> poison, i16 %v, i64 0 + // CHECK-NEXT: %1 = shufflevector <8 x i16> %0, <8 x i16> poison, <8 x i32> zeroinitializer + // CHECK-NEXT: store <8 x i16> %1, ptr %p, align 1 + // CHECK-NEXT: ret void + unsafe { + let v: u8x16 = mem::transmute([v; 8]); + *p = mem::transmute(v); + } +} From cdd0ede64037938b5ef89da2b5ac3c77ea7c6375 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 3 Feb 2026 16:09:21 -0800 Subject: [PATCH 3/3] bootstrap: always propagate `CARGO_TARGET_{host}_LINKER` We were already setting `CARGO_TARGET_{target}_LINKER` when there is a setting in `bootstrap.toml`, and when the host and target are the same, this is also used for build scripts and proc-macros. However, the host value wasn't set when building for any other target, and Cargo would see that as a fingerprint change for those build artifacts, rebuilding them. If we always set the `CARGO_TARGET_{host}_LINKER`, then those build scripts will keep a consistent Cargo fingerprint, so they'll remain cached no matter how we're alternating targets. --- src/bootstrap/src/core/builder/cargo.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/bootstrap/src/core/builder/cargo.rs b/src/bootstrap/src/core/builder/cargo.rs index 7150b2b0d59f2..488125863ee0d 100644 --- a/src/bootstrap/src/core/builder/cargo.rs +++ b/src/bootstrap/src/core/builder/cargo.rs @@ -10,7 +10,7 @@ use crate::core::build_steps::tool::SourceType; use crate::core::config::SplitDebuginfo; use crate::core::config::flags::Color; use crate::utils::build_stamp; -use crate::utils::helpers::{self, LldThreads, check_cfg_arg, linker_args, linker_flags}; +use crate::utils::helpers::{self, LldThreads, check_cfg_arg, linker_flags}; use crate::{ BootstrapCommand, CLang, Compiler, Config, DryRun, EXTRA_CHECK_CFGS, GitRepo, Mode, RemapScheme, TargetSelection, command, prepare_behaviour_dump_dir, t, @@ -310,7 +310,15 @@ impl Cargo { } } - for arg in linker_args(builder, compiler.host, LldThreads::Yes) { + // We need to set host linker flags for compiling build scripts and proc-macros. + // This is done the same way as the target linker flags below, so cargo won't see + // any fingerprint difference between host==target versus cross-compiled targets + // when it comes to those host build artifacts. + if let Some(host_linker) = builder.linker(compiler.host) { + let host = crate::envify(&compiler.host.triple); + self.command.env(format!("CARGO_TARGET_{host}_LINKER"), host_linker); + } + for arg in linker_flags(builder, compiler.host, LldThreads::Yes) { self.hostflags.arg(&arg); } @@ -319,11 +327,11 @@ impl Cargo { self.command.env(format!("CARGO_TARGET_{target}_LINKER"), target_linker); } // We want to set -Clinker using Cargo, therefore we only call `linker_flags` and not - // `linker_args` here. + // `linker_args` here. Cargo will pass that to both rustc and rustdoc invocations. for flag in linker_flags(builder, target, LldThreads::Yes) { self.rustflags.arg(&flag); } - for arg in linker_args(builder, target, LldThreads::Yes) { + for arg in linker_flags(builder, target, LldThreads::Yes) { self.rustdocflags.arg(&arg); }