From 2b2ad090f63bba9a261d572238af2706b32b8aa9 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 12:42:54 +0100 Subject: [PATCH 01/19] Save the version before running the x86 generator --- Cargo.lock | 15 +- Cargo.toml | 2 + fearless_simd_core/Cargo.toml | 17 + fearless_simd_core/gen/Cargo.toml | 6 + fearless_simd_core/gen/src/data.rs | 2 + fearless_simd_core/gen/src/data/x86.rs | 370 ++++++++++++++++++++ fearless_simd_core/gen/src/main.rs | 166 +++++++++ fearless_simd_core/gen/templates/aarch64.rs | 0 fearless_simd_core/gen/templates/x86.rs | 90 +++++ fearless_simd_core/src/lib.rs | 282 +++++++++++++++ fearless_simd_core/src/trampoline.rs | 231 ++++++++++++ fearless_simd_core/src/x86/mod.rs | 15 + fearless_simd_core/src/x86/v1/fxsr.rs | 80 +++++ fearless_simd_core/src/x86/v1/mod.rs | 38 ++ fearless_simd_core/src/x86/v1/sse.rs | 90 +++++ 15 files changed, 1402 insertions(+), 2 deletions(-) create mode 100644 fearless_simd_core/Cargo.toml create mode 100644 fearless_simd_core/gen/Cargo.toml create mode 100644 fearless_simd_core/gen/src/data.rs create mode 100644 fearless_simd_core/gen/src/data/x86.rs create mode 100644 fearless_simd_core/gen/src/main.rs create mode 100644 fearless_simd_core/gen/templates/aarch64.rs create mode 100644 fearless_simd_core/gen/templates/x86.rs create mode 100644 fearless_simd_core/src/lib.rs create mode 100644 fearless_simd_core/src/trampoline.rs create mode 100644 fearless_simd_core/src/x86/mod.rs create mode 100644 fearless_simd_core/src/x86/v1/fxsr.rs create mode 100644 fearless_simd_core/src/x86/v1/mod.rs create mode 100644 fearless_simd_core/src/x86/v1/sse.rs diff --git a/Cargo.lock b/Cargo.lock index 161950a6f..672a09133 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,9 +60,9 @@ checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "bytemuck" -version = "1.23.1" +version = "1.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" +checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" [[package]] name = "cc" @@ -133,6 +133,17 @@ dependencies = [ "libm", ] +[[package]] +name = "fearless_simd_core" +version = "0.1.0" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "fearless_simd_core_gen" +version = "0.1.0" + [[package]] name = "fearless_simd_dev_macros" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 81395978f..e84d0a2ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,8 @@ resolver = "2" members = [ "fearless_simd", + "fearless_simd_core", + "fearless_simd_core/gen", "fearless_simd_dev_macros", "fearless_simd_gen", "fearless_simd_tests", diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml new file mode 100644 index 000000000..e16a9823d --- /dev/null +++ b/fearless_simd_core/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "fearless_simd_core" +version = "0.1.0" +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dev-dependencies] +bytemuck = { version = "1.24.0", features = ["must_cast"] } + +[lints] +workspace = true + +[features] +default = ["std"] +std = [] diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml new file mode 100644 index 000000000..5617f7be1 --- /dev/null +++ b/fearless_simd_core/gen/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "fearless_simd_core_gen" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs new file mode 100644 index 000000000..87b9ec467 --- /dev/null +++ b/fearless_simd_core/gen/src/data.rs @@ -0,0 +1,2 @@ +mod x86; +pub(crate) use x86::{X86_FEATURES, X86_TEMPLATE}; diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs new file mode 100644 index 000000000..39e59990a --- /dev/null +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -0,0 +1,370 @@ +use crate::Feature; + +macro_rules! f { + ($(#[doc = $doc_addition: literal])* + struct ::$module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] + fn $example_function_name: ident + $($additional_impls: tt)* + ) => { + Feature { + struct_name: stringify!($struct_name), + feature_name: $feature_name, + directly_implicitly_enabled: &[$($implicitly_enabled),*], + extra_docs: concat!($($doc_addition, "\n",)*), + example_function_name: stringify!($example_function_name), + feature_docs_name: $display_name, + additional_impls: stringify!($($additional_impls)*), + module: stringify!($module) + } + } +} + +pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); + +// Data taken from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 +// (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64) +// TODO: Do we need to add their license attribution to our license? +// TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html +// In particular, we're missing lahfsahf +pub(crate) const X86_FEATURES: &[Feature] = &[ + f!( + /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions + /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX + struct ::adx::Adx("ADX"): "adx" + [] + fn uses_adx + ), + f!( + /// [AES] --- Advanced Encryption Standard + /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set + struct ::crypto::Aes("AES"): "aes" + ["sse2"] + fn uses_aes + ), + f!( + /// [AVX] --- Advanced Vector Extensions + /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions + struct ::avx::Avx("AVX"): "avx" + ["sse4.2"] + fn uses_avx + ), + f!( + /// [AVX2] --- Advanced Vector Extensions 2 + /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 + struct ::avx::Avx2("AVX2"): "avx2" + ["avx"] + fn uses_avx2 + ), + f!( + /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions + /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 + struct ::avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] + fn uses_avx512bf16 + ), + f!( + /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms + /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG + struct ::avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] + fn uses_avx512bitalg + ), + f!( + /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions + /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct ::avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] + fn uses_avx512bw + ), + f!( + /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions + /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection + struct ::avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] + fn uses_avx512cd + ), + f!( + /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions + /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct ::avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] + fn uses_avx512dq + ), + f!( + /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation + /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 + struct ::avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] + fn uses_avx512f + ), + f!( + /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions + /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 + struct ::avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] + fn uses_avx512fp16 + ), + f!( + /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add + /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA + struct ::avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] + fn uses_avx512ifma + ), + f!( + /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions + /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI + struct ::avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] + fn uses_avx512vbmi + ), + f!( + /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 + /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 + struct ::avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] + fn uses_avx512vbmi2 + ), + f!( + /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions + /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 + struct ::avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] + fn uses_avx512vl + ), + f!( + /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions + /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI + struct ::avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] + fn uses_avx512vnni + ), + f!( + /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers + /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT + struct ::avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] + fn uses_avx512vp2intersect + ), + f!( + /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction + /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG + struct ::avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] + fn uses_avx512vpopcntdq + ), + f!( + /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add + /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] + fn uses_avxifma + ), + f!( + /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions + /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] + fn uses_avxneconvert + ), + f!( + /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions + /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] + fn uses_avxvnni + ), + f!( + /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers + /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] + fn uses_avxvnniint16 + ), + f!( + /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers + /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA + struct ::avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] + fn uses_avxvnniint8 + ), + f!( + /// [BMI1] --- Bit Manipulation Instruction Sets + /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets + struct ::v3::Bmi1(" 1"): "bmi1" + [] + fn uses_bmi1 + ), + f!( + /// [BMI2] --- Bit Manipulation Instruction Sets 2 + /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 + struct ::v3::Bmi2("BMI2"): "bmi2" + [] + fn uses_bmi2 + ), + f!( + /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + struct ::v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] + fn uses_cmpxchg16b + ), + f!( + /// [F16C] --- 16-bit floating point conversion instructions + /// [F16C]: https://en.wikipedia.org/wiki/F16C + struct ::v3::F16c("F16C"): "f16c" + ["avx"] + fn uses_f16c + ), + f!( + /// [FMA3] --- Three-operand fused multiply-add + /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set + struct ::v3::Fma("FMA3"): "fma" + ["avx"] + fn uses_fma + ), + f!( + /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, + struct ::sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] + fn uses_fxsr + ), + f!( + /// [GFNI] --- Galois Field New Instructions + /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI + struct ::crypto::Gfni("GFNI"): "gfni" + ["sse2"] + fn uses_gfni + ), + f!( + /// [KEYLOCKER] --- Intel Key Locker Instructions + /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions + struct ::crypto::Keylocker("KEYLOCKER"): "kl" + [] + fn uses_keylocker + ), + f!( + /// ["lzcnt"] --- Leading zeros count + /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt + struct ::v3::Lzcnt("`lzcnt`"): "lzcnt" + [] + fn uses_lzcnt + ), + f!( + /// ["movbe"] --- Move data after swapping bytes + /// ["movbe"]: https://www.felixcloutier.com/x86/movbe + struct ::v3::Movbe("`movbe`"): "movbe" + [] + fn uses_movbe + ), + f!( + /// ["pclmulqdq"] --- Packed carry-less multiplication quadword + /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq + struct ::crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] + fn uses_pclmulqdq + ), + f!( + /// ["popcnt"] --- Count of bits set to 1 + /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt + struct ::v2::Popcnt("`popcnt`"): "popcnt" + [] + fn uses_popcnt + ), + f!( + /// ["rdrand"] --- Read random number + /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand + struct ::crypto::Rdrand("`rdrand`"): "rdrand" + [] + fn uses_rdrand + ), + f!( + /// ["rdseed"] --- Read random seed + /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand + struct ::crypto::Rdseed("`rdseed"): "rdseed" + [] + fn uses_rdseed + ), + f!( + /// [SHA] --- Secure Hash Algorithm + /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions + struct ::crypto::Sha("SHA"): "sha" + ["sse2"] + fn uses_sha + ), + f!( + /// [SHA512] --- Secure Hash Algorithm with 512-bit digest + /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions + struct ::crypto::Sha512("SHA512"): "sha512" + ["avx2"] + fn uses_sha512 + ), + f!( + /// [SM3] --- ShangMi 3 Hash Algorithm + /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions + struct ::crypto::Sm3("SM3"): "sm3" + ["avx"] + fn uses_sm3 + ), + f!( + /// [SM4] --- ShangMi 4 Cipher Algorithm + /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions + struct ::crypto::Sm4("SM4"): "sm4" + ["avx2"] + fn uses_sm4 + ), + f!( + /// [SSE] --- Streaming SIMD Extensions + /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions + struct ::sse::Sse("SSE"): "sse" + [] + fn uses_sse + ), + f!( + /// [SSE2] --- Streaming SIMD Extensions 2 + /// [SSE2]: https://en.wikipedia.org/wiki/SSE2 + struct ::sse::Sse2("SSE2"): "sse2" + ["sse"] + fn uses_sse2 + ), + f!( + /// [SSE3] --- Streaming SIMD Extensions 3 + /// [SSE3]: https://en.wikipedia.org/wiki/SSE3 + struct ::sse::Sse3("SSE3"): "sse3" + ["sse2"] + fn uses_sse3 + ), + f!( + /// [SSE4.1] --- Streaming SIMD Extensions 4.1 + /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 + struct ::sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] + fn uses_sse4 + ), + f!( + /// [SSE4.2] --- StreamingSIMDExtensions 4.2 + /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 + struct ::sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] + fn uses_sse4 + ), + // // TODO: This only exists from 1.91 and above (current beta) + // f!( + // /// [SSE4a] --- StreamingSIMDExtensions 4a + // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a + // struct Sse4a("SSE4a"): "sse4a" + ["sse3"] + // fn uses_sse4a + // ), + f!( + /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 + /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 + struct ::sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] + fn uses_ssse3 + ), + f!( + /// [TBM] --- Trailing Bit Manipulation + /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) + struct ::discontinued::Tbm("TBM"): "tbm" + [] + fn uses_tbm + ), + f!( + /// [VAES] --- Vector AES Instructions + /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES + struct ::crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] + fn uses_vaes + ), + f!( + /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords + /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ + struct ::crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] + fn uses_vpclmulqdq + ), + f!( + /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions + /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions + struct ::crypto::WideKeylocker("KEYLOCKER_WIDE"): "widekl" + ["kl"] + fn uses_wide_keylocker + ), + f!( + /// [`xsave`] --- Save processor extended states + /// ["xsave"]: https://www.felixcloutier.com/x86/xsave + struct ::xsave::Xsave("`xsave`"): "xsave" + [] + fn uses_xsave + ), + f!( + /// ["xsavec"] --- Save processor extended states with compaction + /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec + struct ::xsave::Xsavec("`xsavec`"): "xsavec" + [] + fn uses_xsavec + ), + f!( + /// ["xsaveopt"] --- Save processor extended states optimized + /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt + struct ::xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] + fn uses_xsaveopt + ), + f!( + /// ["xsaves"] --- Save processor extended states supervisor + /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves + struct ::xsave::Xsaves("`xsaves`"): "xsaves" + [] + fn uses_xsaves + ), +]; + +#[test] +fn all_features_included() {} diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs new file mode 100644 index 000000000..2961bd33c --- /dev/null +++ b/fearless_simd_core/gen/src/main.rs @@ -0,0 +1,166 @@ +mod data; + +use std::fmt::{Write, format}; +use std::fs; +use std::{ + cell::RefCell, + collections::HashMap, + fs::create_dir_all, + io, + path::{Path, PathBuf}, +}; + +fn main() { + let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src"); + generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, data::X86_FEATURES).unwrap(); +} + +fn generate_for_arch( + root_dir: &Path, + arch_module_name: &str, + template: &str, + features: &'static [Feature], +) -> io::Result<()> { + let arch_dir = root_dir.join(arch_module_name); + let features = normalize_features(features); + for feature in &features { + let mut new_docs = String::new(); + for line in feature.feature.extra_docs.lines() { + writeln!(&mut new_docs, "///{line}").unwrap(); + } + let enabled_feature_docs = format!("`{}`", feature.children.join("`, `")); + let enabled_feature_str_list = format!(r#""{}""#, feature.children.join(r#"", ""#)); + let mut from_impls = String::new(); + for child in &feature.children { + let from_feature = features + .iter() + .find(|it| it.feature.feature_name == *child) + .unwrap(); + let type_path = format!( + "crate::{arch_module_name}::{}::{}", + from_feature.feature.module, from_feature.feature.struct_name + ); + write!( + from_impls, + r#"\n\ + impl From for {type_path} {{ + fn from(value: Self) -> {type_path} {{ + trampoline!([Self = value] => "{{FEATURE_ID}}", fn() -> {type_path} {{ {{type_path}}::new() }}) + }} + }}\n + "# + ).unwrap(); + } + let mut result = format!( + "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ + {template}" + ); + // We replace the from impls first, as they use template variables from the rest of this. + result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); + result = result.replace("/// {NEW_DOCS}", &new_docs); + result = result.replace("{FEATURE_ID}", feature.feature.feature_name); + result = result.replace("{ENABLED_FEATURES_DOCS_LIST}", &enabled_feature_docs); + result = result.replace( + "{EXAMPLE_FUNCTION_NAME}", + feature.feature.example_function_name, + ); + result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name); + result = result.replace("{ENABLED_FEATURES_STR_LIST}", &enabled_feature_str_list); + let module_dir = arch_dir.join(feature.feature.module); + create_dir_all(&module_dir)?; + let mut file = module_dir.join(feature.feature.feature_name); + file.set_extension("rs"); + fs::write(file, result)?; + } + Ok(()) +} + +#[derive(Debug)] +struct Feature { + /// The name of the struct to be generated. + struct_name: &'static str, + /// The Rust name for the feature, e.g. `"sse"`. + feature_name: &'static str, + /// The array of features which are implicitly enabled by this feature. + /// Note that this array does not include transitive enabled features. + directly_implicitly_enabled: &'static [&'static str], + /// Any additional docs which we want to add to the module. + extra_docs: &'static str, + /// The name of the function used in the examples. + /// Ideally, we'd make this optional, but that starts making the templating look more complicated. + example_function_name: &'static str, + /// The "display name" for the feature, used inside the docs. + feature_docs_name: &'static str, + /// Extra code added at the end. + /// Used for implicitly enabled features. + additional_impls: &'static str, + /// The module (if any) this feature will belong to. + /// + /// (Note that imports into the module are checked to exist, but not automatically inserted). + module: &'static str, +} + +/// Implementation detail intermediate struct of `normalize_features`. +struct MaybeNormalizedFeature { + /// The actual feature. + feature: &'static Feature, + /// The fully deduplicated, sorted list of target features enabled by this feature, including with all + /// implicitly enabled features resolved. + /// + /// Note that this *excludes* the parent target feature. + // We use a RefCell here as we know there cannot be loops. + children: RefCell>>, +} + +#[derive(Debug)] +struct NormalizedFeature { + feature: &'static Feature, + children: Vec<&'static str>, +} + +fn normalize_features(features: &'static [Feature]) -> Vec { + let mut state = HashMap::new(); + for feature in features { + state.insert( + feature.feature_name, + MaybeNormalizedFeature { + feature, + children: RefCell::new(None), + }, + ); + } + fn handle_item(state: &HashMap<&str, MaybeNormalizedFeature>, item: &MaybeNormalizedFeature) { + // We borrow for the entire lifetime to avoid infinite loops. + let mut borrowed_children = item.children.borrow_mut(); + if borrowed_children.is_some() { + return; + } + let mut new_children = Vec::new(); + for child in item.feature.directly_implicitly_enabled { + new_children.push(*child); + let child = state + .get(child) + .expect("Every implicitly enabled feature should exist."); + handle_item(state, child); + new_children.extend_from_slice(child.children.borrow().as_ref().unwrap()); + } + new_children.sort(); + new_children.dedup(); + *borrowed_children = Some(new_children); + } + for feature in state.values() { + handle_item(&state, feature); + } + let mut output = Vec::new(); + for (_, feature) in state { + output.push(NormalizedFeature { + feature: feature.feature, + children: feature.children.into_inner().unwrap(), + }); + } + output.sort_by_key(|it| it.feature.feature_name); + output +} diff --git a/fearless_simd_core/gen/templates/aarch64.rs b/fearless_simd_core/gen/templates/aarch64.rs new file mode 100644 index 000000000..e69de29bb diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs new file mode 100644 index 000000000..a37207a37 --- /dev/null +++ b/fearless_simd_core/gen/templates/x86.rs @@ -0,0 +1,90 @@ +//! The {FEATURE_DOCS_NAME} target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// {NEW_DOCS} +/// +/// A token indicating that the current CPU has the `{FEATURE_ID}` target feature. +/// +/// This feature also enables {ENABLED_FEATURES_DOCS_LIST}; +/// the tokens for these features can be created using [`From`] implementations. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "{FEATURE_ID}")] +/// fn {EXAMPLE_FUNCTION_NAME}() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct FEATURE_STRUCT_NAME { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for FEATURE_STRUCT_NAME { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""{FEATURE_ID}" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &[{ ENABLED_FEATURES_STR_LIST }]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + trampoline!([Self = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"{FEATURE_ID}"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("{FEATURE_ID}") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "{FEATURE_ID}")] + /// Create a new token for the "{FEATURE_ID}" target feature is enabled. + /// + /// This method is useful to get a new token if you have an external proof that + /// {FEATURE_ID} is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "{FEATURE_DOCS_NAME}" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} +/*{FROM_IMPLS}*/ + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs new file mode 100644 index 000000000..e212b2406 --- /dev/null +++ b/fearless_simd_core/src/lib.rs @@ -0,0 +1,282 @@ +//! Tooling for Rust's target features. + +// LINEBENDER LINT SET - lib.rs - v4 +// See https://linebender.org/wiki/canonical-lints/ +// These lints shouldn't apply to examples or tests. +#![cfg_attr(not(test), warn(unused_crate_dependencies))] +// These lints shouldn't apply to examples. +#![warn(clippy::print_stdout, clippy::print_stderr)] +// Targeting e.g. 32-bit means structs containing usize can give false positives for 64-bit. +#![cfg_attr(target_pointer_width = "64", warn(clippy::trivially_copy_pass_by_ref))] +// END LINEBENDER LINT SET +#![cfg_attr(docsrs, feature(doc_cfg))] +#![no_std] + +// TODO: Do we want both an `x86` and `x86_64` module? +#[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] +pub mod x86; + +pub mod trampoline; + +#[cfg(feature = "std")] +extern crate std; + +/// Token that a set of target feature is available. +/// +/// Note that this trait is only meaningful when there are values of this type. +/// That is, to enable the target features in `FEATURES`, you *must* have a value +/// of this type. +/// +/// Values which implement this trait are used in the second argument to [`trampoline!`], +/// which is a safe abstraction over enabling target features. +/// +/// # Safety +/// +/// To construct a value of a type implementing this trait, you must have proven that each +/// target feature in `FEATURES` is available. +pub unsafe trait TargetFeatureToken: Copy { + /// The set of target features which are enabled for this run, if + /// you have a value of this type. + const FEATURES: &[&str]; + + /// Enable the target features in `FEATURES` for a single run of `f`, and run it. + /// + /// `f` must be marked `#[inline(always)]` for this to work. + /// + /// Note that this does *not* enable the target features on the Rust side (e.g. for calling). + /// To do so, you should instead use [`trampoline!`] directly - this is a convenience wrapper around `trampoline` + /// for cases where the dispatch of simd values is handled elsewhere. + fn vectorize(self, f: impl FnOnce() -> R) -> R; +} + +/// Run an operation in a context with specific target features enabled, validated with [`TargetFeatureToken`] values. +/// +/// This is effectively a stable implementation of the "Struct Target Features" Rust feature, +/// which at the time of writing is neither in stable or nightly Rust. +/// This macro can be used to make SIMD dispatch safe in addition to make explicit SIMD, both safely. +/// +/// # Reference +/// +/// These reference examples presume that you have (values in brackets are the "variables"): +/// +/// - An expression (`token`) of a type (`Token`) which is `TargetFeatureToken` for some target features (`"f1,f2,f3"`); +/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset of those target features (`"f1,f2"`); +/// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`) +/// +/// ```rust,ignore +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// Multiple tokens are also supported by providing them in a sequence in square brackets: +/// +/// ```rust,ignore +/// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// A more advanced syntax is available if you need to use generics. +/// That syntax is explained in comments around the macro's definition, which can be seen above. +/// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: +/// +/// ```rust,ignore +/// trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) +/// ``` +/// +/// There is also support for where clauses after the return type. +/// +/// # Motivation +/// +/// In Fearless SIMD, this macro has two primary use cases: +/// +/// 1) To dispatch to a specialised SIMD implementation of a function using target specific +/// instructions which will be more efficient than generic version written using the portable subset. +/// 2) To implement the portable subset of SIMD operations. +/// +/// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are +/// instantiated for multiple different SIMD levels (using generics). +/// However, for certain SIMD levels, there may be specific instructions which solve your problem more +/// efficiently than using the generic implementations (as an example, consider SHA256 hashing, which has +/// built-in instructions on several architectures). +/// However, in such generic implementations, the Rust type system doesn't know which target features are enabled, +/// so it would ordinarily require writing code to: +/// +/// - detect whether a specific target feature is supported. +/// - unsafely, enter a context where the target feature is enabled in a way which makes the type system aware of this. +/// +/// This macro provides a way to do the second safely once you have completed the first. +/// +/// # Example +/// +/// This expands upon the example in the reference, written out completely. +/// +/// ```rust,ignore +/// // Just once, acquire a token. +/// let token = Token::try_new(); +/// // Later, dispatch based on whether that token is available, potentially multiple times: +/// +/// /// Perform some computation using SIMD. +/// #[target_feature(enable = "f1,f2")] +/// fn uses_simd(val: [f32; 4]) -> [f32; 4] { +/// // ... +/// } +/// +/// let a = [1., 2., 3., 4.]; +/// let Some(token) = token else { return scalar_fallback(a) }; +/// +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// ``` +/// +/// Note that a function only operating on 128 bytes is probably too small for checking +/// whether a token exists just for it is worthwhile. +/// However, if you have amorphised the cost of that check between many function calls, +/// the `trampoline!` macro itself compiles down to a function call. +/// (This would be the case when this macro is being used to implement the portable subset of SIMD operations) +/// +// TODO: We could write an example for each of ARM, x86, and conditionally compile it in? +/// Note that our examples are all ignored as there is no target feature which is available on every platform, +/// but we need these docs to compile for users on any platform. +/// +/// # Soundness +/// +/// This macro is designed to be sound, i.e. no input to this macro can lead to undefined behaviour +/// without using the `unsafe` keyword. +/// +/// The operation provided will only ever be immediately called once on the same thread as the macro caller, +/// so safety justifications within the operation can rely on the context of the call site of this macro. +/// The shorthand format does not allow calling unsafe functions. +#[macro_export] +macro_rules! trampoline { + // [Sse = sse] for "sse", <(u32)> fn<(T: Int)>(a: [T; 4]) -> T where (...) {...} + ( + // The token types, with an expression to get a value of that token kind. + [$($token_type: path = $token: expr),+$(,)?] + // The target feature to enable. Must be a string literal. + => $to_enable: literal, + // The generic arguments to instantiate the call to the generated function with. + // Note the inner brackets, needed because we can't write a parser for this in macros. + $(<($($generic_instantiation: tt)+)>)? + // The generic parameters to give the inner generated function. + // Brackets needed as above. + fn$(<($($generic_args: tt)*)>)? + // The arguments to the function, with provided explicit values, plus return type and where clause. + ($($arg_name: ident: $arg_type: ty = $arg_value: expr),*$(,)?) $(-> $ret: ty)? + // The where clause of the generated function. + // Note the inner brackets after `where`, needed as above. + $(where ($($where: tt)*))? + // The operation to run inside the context with the target feature enabled. + $op: block + ) => {{ + #[target_feature(enable = $to_enable)] + #[inline] + // TODO: Do we want any other attributes here? + // Soundness: We wrap the $op in a wrapping block, to ensure that any inner attributes don't apply to the function. + // This ensures that the user can't add `#![target_feature(enable = "xxx")]` to their block. + // Soundness: Either of generic_args and `$where` could be used to exit the function item early, so aren't + // inside an unsafe block. + fn trampoline_impl$(<$($generic_args)*>)?($($arg_name: $arg_type),*) $(-> $ret)? $(where $($where)*)? { $op } + + $( + // We validate that we actually have a token of each claimed type. + let _: $token_type = $token; + )+ + const { + // And that the claimed types justify enabling the enabled target features. + $crate::trampoline::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) + // TODO: Better failure message here (i.e. at least concatting the set of requested features) + .unwrap(); + } + + $( + // Soundness: We use `arg_value` outside of the macro body to ensure it doesn't + // accidentally gain an unsafe capability. + #[allow(clippy::redundant_locals, reason="Required for consistency/safety.")] + let $arg_name = $arg_value; + )* + // Safety: We have validated that the target features enabled in `trampoline_impl` are enabled, + // because we have values of token types which implement $crate::TargetFeatureToken + // Soundness: `$generic_args` could be used to exit the path expression early. As `<>` are + // not treated as "real" brackets by macros, this isn't practical to detect and avoid statically. + // To try and ensure that this can't turn into unsoundess, the + // `trampoline_impl::<$generic_instantiation>` is evaluated outside of an unsafe block. + // In theory, if a user could make the value of `func` be an `unsafe` fn pointer or + // item type, this would still be unsound. + // However, we haven't found a way for this to compile given the trailing `>`, + // so aren't aware of any actual unsoundess. But note that this hasn't been rigorously proven, + // and new Rust features could open this up wider. + let func = trampoline_impl$(::<$($generic_instantiation)*>)?; + unsafe { func($($arg_name),*) } + }}; + // Sse = sse => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4] + ($token_type: path = $token: expr => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => { + $crate::trampoline!( + [$token_type = $token] + => $to_enable, + $function($($arg_name: $arg_type),*) $(-> $ret)? + ) + }; + // [Sse = sse] => "sse", sse_do_x(a: [f32; 4], b: [f32; 4]) -> [f32; 4] + ([$($token_type: path = $token: expr),+$(,)?] => $to_enable: literal, $function: ident($($arg_name: ident: $arg_type: ty),*$(,)?) $(-> $ret: ty)?) => { + $crate::trampoline!( + [$($token_type = $token),+] + => $to_enable, + fn($($arg_name: $arg_type = $arg_name),*) $(-> $ret)? { $function($($arg_name),*) } + ) + }; +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(test)] +mod example_expansion { + use core::arch::x86_64::{__m128, _mm_mul_ps}; + + use crate::x86::{self, v1::Sse}; + + #[target_feature(enable = "sse")] + fn sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + let a: __m128 = bytemuck::must_cast(a); + let b: __m128 = bytemuck::must_cast(b); + bytemuck::must_cast(_mm_mul_ps(a, b)) + } + + #[test] + // This is a test so that it is runnable + fn example_output() { + let Some(sse) = x86::v1::Sse::try_new() else { + panic!("Example code") + }; + let a = [10_f32, 20_f32, 30_f32, 40_f32]; + let b = [4_f32, 5_f32, 6_f32, 7_f32]; + + // Both of these example expansions, the former using the shorthand form: + let res = + trampoline!(Sse = sse => "sse", sse_mul_f32s(a: [f32; 4], b: [f32; 4]) -> [f32; 4]); + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + let res = trampoline!([Sse = sse] => "sse", fn(a: [f32; 4] = a, b: [f32; 4] = b) -> [f32; 4] { sse_mul_f32s(a, b)}); + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + // will expand to: + #[expect(unused_braces, reason = "Required for macro soundness.")] + // Start expansion: + let res = { + #[target_feature(enable = "sse")] + #[inline] + fn trampoline_impl(a: [f32; 4], b: [f32; 4]) -> [f32; 4] { + { sse_mul_f32s(a, b) } + } + let _: Sse = sse; + const { + crate::trampoline::is_feature_subset( + "sse", + [::FEATURES], + ) + .unwrap(); + } + #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] + let a = a; + #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] + let b = b; + let func = trampoline_impl; + unsafe { func(a, b) } + }; + // End expansion + assert_eq!(res, [40_f32, 100_f32, 180_f32, 280_f32]); + } +} diff --git a/fearless_simd_core/src/trampoline.rs b/fearless_simd_core/src/trampoline.rs new file mode 100644 index 000000000..14c73f62d --- /dev/null +++ b/fearless_simd_core/src/trampoline.rs @@ -0,0 +1,231 @@ +//! Support for the safety checks in [`trampoline!`](crate::trampoline!). +//! +//! Methods to compute whether a each feature in a target feature string (e.g. "sse2,fma") +//! is supported by a set of target features. +//! +//! The [`trampoline`](crate::trampoline!) macro takes both a target feature string, +//! and one (or more) [`TargetFeatureToken`](crate::TargetFeatureToken). +//! It uses the functions in this module to validate that the target feature string is +//! supported by the provided tokens. +//! +//! Because evaluating whether this is safe needs to happen at compile time (for both performance +//! and predictability), the methods in this file are written as `const` functions. +//! This leads to a bit of weirdness, including treating strings as `&[u8]` internally, as that +//! actually allows slicing (i.e. reading individual bytes). As far as I know, that isn't +//! currently possibly in const contexts for strings. +//! Note that the code is still written to be UTF-8 compatible, although we believe that +//! all currently supported target features are ASCII anyway. + +/// The result of `is_feature_subset`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[must_use] +pub enum SubsetResult { + /// The required features are a subset of the permitted features. + Yes, + /// The required features are not all available. + No { + /// The feature which was found to be missing (there may be several such features). + failing: &'static str, + }, +} + +impl SubsetResult { + /// A utility method to panic if the target features aren't supported. + // TODO: How much more context would we be able to give if we inlined this? + pub const fn unwrap(self) { + match self { + Self::Yes => (), + // This is const, so we can't actually format out the failing value :( + Self::No { .. } => panic!("Tokens provided are missing a necessary target feature."), + } + } +} + +/// Determine whether the features in the target feature string `required` are a subset of the features in `permitted`. +/// See the module level docs [self]. +/// +/// We require static lifetimes as this is primarily internal to the macro. +pub const fn is_feature_subset( + required: &'static str, + permitted: [&[&'static str]; N], +) -> SubsetResult { + let mut required_bytes = required.as_bytes(); + let mut finished = false; + 'input_feature: while !finished { + let mut comma_idx = 0; + // Find the first comma in required_bytes, or the end of the string. + while comma_idx < required_bytes.len() && required_bytes[comma_idx] != b',' { + comma_idx += 1; + } + // `comma_idx` is now the index of the comma, e.g. if the string was "sse,", idx would be 3 + // This is the feature we need to validate exists in permitted. + let (to_find, remaining_required) = &required_bytes.split_at(comma_idx); + if let [comma, rest @ ..] = remaining_required { + if *comma != b',' { + panic!("Internal failure of expected behaviour."); + } else { + required_bytes = rest; + } + } else { + // Exit out of the loop after this iteration. + // Note that for input of `""`` and "sse,", we still need to search + // for the input target feature `` (i.e. the empty string), to match Rust's behaviour here. + finished = true; + } + + let mut local_permitted = permitted.as_slice(); + while let [to_test, rest @ ..] = local_permitted { + local_permitted = rest; + if str_array_contains(to_test, to_find) { + continue 'input_feature; + } + } + // We tried all of the items, and `to_find` wasn't one of them. + // Therefore, at least one of the features in the requested features wasn't supported + return SubsetResult::No { + failing: match core::str::from_utf8(to_find) { + Ok(x) => x, + Err(_) => panic!( + "We either found a comma or the end of the string, so before then should have been valid UTF-8." + ), + }, + }; + } + // We found all of the required features. + SubsetResult::Yes +} + +const fn str_array_contains(mut haystack: &[&str], needle: &[u8]) -> bool { + while let [to_test, rest @ ..] = haystack { + haystack = rest; + if byte_arrays_eq(to_test.as_bytes(), needle) { + return true; + } + } + false +} + +const fn byte_arrays_eq(lhs: &[u8], rhs: &[u8]) -> bool { + if lhs.len() != rhs.len() { + return false; + } + let mut idx = 0; + while idx < lhs.len() { + if lhs[idx] != rhs[idx] { + return false; + } + idx += 1; + } + true +} + +#[cfg(test)] +mod tests { + use super::{SubsetResult, is_feature_subset}; + + /// Test if each feature in the feature string `required` is an element in `permitted`. + /// + /// Should be equivalent to [`is_feature_subset`], but not written to be const compatible. + fn is_feature_subset_simple( + required: &'static str, + permitted: [&[&'static str]; N], + ) -> SubsetResult { + 'feature: for feature in required.split(',') { + for permitted_group in &permitted { + for permitted_feature in *permitted_group { + if feature == *permitted_feature { + continue 'feature; + } + } + } + // We tried all permitted feature, and this item wasn't present. + return SubsetResult::No { failing: feature }; + } + SubsetResult::Yes + } + + /// Expect `is_feature_subset` to succeed. + #[track_caller] + fn expect_success(required: &'static str, permitted: [&[&'static str]; N]) { + let res1 = is_feature_subset(required, permitted); + assert_eq!(res1, SubsetResult::Yes, "Const version failed."); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert_eq!(res2, SubsetResult::Yes, "Simpler version failed."); + } + + /// Expect `is_feature_subset` to fail (with only a single possible failure). + #[track_caller] + fn expect_failure( + required: &'static str, + permitted: [&[&'static str]; N], + failing: &'static str, + ) { + let res1 = is_feature_subset(required, permitted); + assert_eq!(res1, SubsetResult::No { failing }, "Const version failed."); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert_eq!( + res2, + SubsetResult::No { failing }, + "Simpler version failed." + ); + } + + /// Expect `is_feature_subset` to fail, possibly with multiple potential missing features. + #[track_caller] + fn expect_any_failure(required: &'static str, permitted: [&[&'static str]; N]) { + let res1 = is_feature_subset(required, permitted); + assert!( + matches!(res1, SubsetResult::No { .. }), + "Const version failed." + ); + // Sanity check against the "trivially correct" version. + let res2 = is_feature_subset_simple(required, permitted); + assert!( + matches!(res2, SubsetResult::No { .. }), + "Simpler version failed." + ); + } + + #[test] + fn simple_cases() { + expect_success("a,b,c", [&["a", "b", "c"]]); + expect_failure("a,b,c", [&["a", "b"]], "c"); + expect_success("c,a,b", [&["a", "b", "c"]]); + expect_failure("c,a,b", [&["a", "b"]], "c"); + expect_success("a,b", [&["a", "b", "c"]]); + expect_failure("a,b", [&["a", "c"]], "b"); + + // Check it correctly catches more than single item failures + expect_success("a1,a2,a3", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2,a3", [&["a1", "a2"]], "a3"); + expect_success("a3,a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a3,a1,a2", [&["a1", "a2"]], "a3"); + expect_success("a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2", [&["a1", "a3"]], "a2"); + + // Check it doesn't have false positives with prefixes + expect_failure("a1,a2,a3", [&["a1", "a2", "a"]], "a3"); + expect_any_failure("a3,a1,a2", [&["a"]]); + expect_success("a1,a2", [&["a1", "a2", "a3"]]); + expect_failure("a1,a2", [&["a1", "a3"]], "a2"); + + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); + } + + #[test] + fn empty_feature() { + expect_failure("a,b,", [&["a", "b"]], ""); + expect_failure("", [&["a", "b"]], ""); + } + + #[test] + fn non_ascii_features() { + expect_success("café", [&["café"]]); + expect_failure("café", [&["cafe"]], "café"); + } +} diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs new file mode 100644 index 000000000..10caa27ad --- /dev/null +++ b/fearless_simd_core/src/x86/mod.rs @@ -0,0 +1,15 @@ +//! Target feature tokens for the x86 and x86-64 CPU families. +//! +//! The general compuotation CPU features associated with each [microarchitecture level] can +//! be found in their corresponding modules: +//! +//! - [`v1`] for x86-64-v1. +//! - [`v2`] for x86-64-v2. +//! - [`v3`] for x86-64-v3. +//! - [`v4`] for x86-64-v4. +//! +//! Tokens for target features which not associated with these levels can be found in this module. +//! +//! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels + +pub mod v1; diff --git a/fearless_simd_core/src/x86/v1/fxsr.rs b/fearless_simd_core/src/x86/v1/fxsr.rs new file mode 100644 index 000000000..a4c99085d --- /dev/null +++ b/fearless_simd_core/src/x86/v1/fxsr.rs @@ -0,0 +1,80 @@ +//! The FXSR target feature. + +use core::fmt::Debug; + +use crate::{TargetFeatureToken, trampoline}; + +/// A token indicating that the current CPU has the FXSR target feature. +/// +/// The Rust target feature name for this feature is `fxsr`. +/// For example, this can be used to [`trampoline!`] into: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr")] +/// fn uses_fxsr() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fxsr { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fxsr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fxsr" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Fxsr { + const FEATURES: &[&str] = &["fxsr"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + trampoline!([Self = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fxsr { + #[cfg(feature = "std")] + /// Create a new token if the "fxsr" target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fxsr") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fxsr")] + /// Create a new token for the "fxsr" target feature is enabled. + /// + /// This method is useful to get a new token if you have an + /// external proof that FXSR is available. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr" target feature is available. + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs new file mode 100644 index 000000000..0e360b506 --- /dev/null +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -0,0 +1,38 @@ +//! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This is the baseline for x86-64 support. + +mod sse; +pub use sse::Sse; + +mod fxsr; +pub use fxsr::Fxsr; + +/// A token that the current CPU is on the x86-64-v1 microarchitecture level. +// TODO: (This is currently incomplete) +pub struct V1 { + pub sse: Sse, + pub fxsr: Fxsr, +} + +impl V1 { + /// Create a new token if the current CPU is at the x86-64-v1 microarchitecture level or better. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + #[cfg(feature = "std")] + pub fn try_new() -> Option { + // TODO: Caching + Some(Self { + fxsr: Fxsr::try_new()?, + sse: Sse::try_new()?, + }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/sse.rs b/fearless_simd_core/src/x86/v1/sse.rs new file mode 100644 index 000000000..67f2160cd --- /dev/null +++ b/fearless_simd_core/src/x86/v1/sse.rs @@ -0,0 +1,90 @@ +//! The SSE target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// A token indicating that the current CPU has the SSE target feature. +/// +/// The Rust target feature name for this feature is `sse`. +/// +/// See for more information about these instructions. +/// This feature also implictily enables +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse")] +/// fn uses_sse() { +/// // ... +/// } +/// ``` +/// +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &["sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"sse"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse")] + /// Create a new token for the "sse" target feature is enabled. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE is available. This could happen if you have a token for a target feature + /// which [implicitly enables] `sse`. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From 78d23d8f79ea48e047018e77568f1da06ab03443 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:43:24 +0100 Subject: [PATCH 02/19] Remove old v1 items --- fearless_simd_core/src/x86/v1/fxsr.rs | 80 ------------------------ fearless_simd_core/src/x86/v1/sse.rs | 90 --------------------------- 2 files changed, 170 deletions(-) delete mode 100644 fearless_simd_core/src/x86/v1/fxsr.rs delete mode 100644 fearless_simd_core/src/x86/v1/sse.rs diff --git a/fearless_simd_core/src/x86/v1/fxsr.rs b/fearless_simd_core/src/x86/v1/fxsr.rs deleted file mode 100644 index a4c99085d..000000000 --- a/fearless_simd_core/src/x86/v1/fxsr.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! The FXSR target feature. - -use core::fmt::Debug; - -use crate::{TargetFeatureToken, trampoline}; - -/// A token indicating that the current CPU has the FXSR target feature. -/// -/// The Rust target feature name for this feature is `fxsr`. -/// For example, this can be used to [`trampoline!`] into: -/// -/// ```rust -/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -/// #[target_feature(enable = "fxsr")] -/// fn uses_fxsr() { -/// // ... -/// } -/// ``` -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub struct Fxsr { - // We don't use non_exhaustive because we don't want this struct to be constructible. - // in different modules in this crate. - _private: (), -} - -impl Debug for Fxsr { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, r#""fxsr" enabled."#) - } -} - -unsafe impl TargetFeatureToken for Fxsr { - const FEATURES: &[&str] = &["fxsr"]; - - #[inline(always)] - fn vectorize(self, f: impl FnOnce() -> R) -> R { - trampoline!([Self = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) - } -} - -impl Fxsr { - #[cfg(feature = "std")] - /// Create a new token if the "fxsr" target feature is detected as enabled. - /// - /// This does not do any caching internally, although note that the standard - /// library does internally cache the features it detects. - // TODO: Consider a manual override feature/env var? - pub fn try_new() -> Option { - // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - if std::arch::is_x86_feature_detected!("fxsr") { - // Safety: The required CPU feature was detected. - unsafe { Some(Self::new()) } - } else { - None - } - } - - #[target_feature(enable = "fxsr")] - /// Create a new token for the "fxsr" target feature is enabled. - /// - /// This method is useful to get a new token if you have an - /// external proof that FXSR is available. - /// - /// # Safety - /// - /// No conditions other than those inherited from the target feature attribute, - /// i.e. that the "fxsr" target feature is available. - pub fn new() -> Self { - Self { _private: () } - } -} - -const _: () = { - assert!( - core::mem::size_of::() == 0, - "Target feature tokens should be zero sized." - ); -}; diff --git a/fearless_simd_core/src/x86/v1/sse.rs b/fearless_simd_core/src/x86/v1/sse.rs deleted file mode 100644 index 67f2160cd..000000000 --- a/fearless_simd_core/src/x86/v1/sse.rs +++ /dev/null @@ -1,90 +0,0 @@ -//! The SSE target feature. - -use crate::{TargetFeatureToken, trampoline}; - -use core::fmt::Debug; - -/// A token indicating that the current CPU has the SSE target feature. -/// -/// The Rust target feature name for this feature is `sse`. -/// -/// See for more information about these instructions. -/// This feature also implictily enables -/// -/// # Example -/// -/// This can be used to [`trampoline!`] into: -/// -/// ```rust -/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -/// #[target_feature(enable = "sse")] -/// fn uses_sse() { -/// // ... -/// } -/// ``` -/// -#[derive(Copy, Clone, Hash, PartialEq, Eq)] -pub struct Sse { - // We don't use non_exhaustive because we don't want this struct to be constructible. - // in different modules in this crate. - _private: (), -} - -impl Debug for Sse { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - write!(f, r#""sse" enabled."#) - } -} - -unsafe impl TargetFeatureToken for Sse { - const FEATURES: &[&str] = &["sse"]; - - #[inline(always)] - fn vectorize(self, f: impl FnOnce() -> R) -> R { - trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) - } -} - -impl Sse { - #[cfg(feature = "std")] - /// Create a new token if the `"sse"` target feature is detected as enabled. - /// - /// This does not do any caching internally, although note that the standard - /// library does internally cache the features it detects. - // TODO: Consider a manual override feature/env var? - pub fn try_new() -> Option { - // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? - #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - if std::arch::is_x86_feature_detected!("sse") { - // Safety: The required CPU feature was detected. - unsafe { Some(Self::new()) } - } else { - None - } - } - - #[target_feature(enable = "sse")] - /// Create a new token for the "sse" target feature is enabled. - /// - /// This method is useful to get a new token if you have an external proof that - /// SSE is available. This could happen if you have a token for a target feature - /// which [implicitly enables] `sse`. - /// - /// # Safety - /// - /// No conditions other than those inherited from the target feature attribute, - /// i.e. that the "sse" target feature is available. - /// - /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 - pub fn new() -> Self { - Self { _private: () } - } -} - -const _: () = { - assert!( - core::mem::size_of::() == 0, - "Target feature tokens should be zero sized." - ); -}; From 9cfa3345c085a88997f73a115428b1d723461e35 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:43:37 +0100 Subject: [PATCH 03/19] Improve the generator --- fearless_simd_core/gen/src/data/x86.rs | 170 ++++++++++++++++-------- fearless_simd_core/gen/src/main.rs | 38 ++++-- fearless_simd_core/gen/templates/x86.rs | 14 +- 3 files changed, 144 insertions(+), 78 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 39e59990a..346a9f79f 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -2,7 +2,7 @@ use crate::Feature; macro_rules! f { ($(#[doc = $doc_addition: literal])* - struct ::$module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] + struct $module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] fn $example_function_name: ident $($additional_impls: tt)* ) => { @@ -29,339 +29,397 @@ pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); pub(crate) const X86_FEATURES: &[Feature] = &[ f!( /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions + /// /// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX - struct ::adx::Adx("ADX"): "adx" + [] + struct adx::Adx("ADX"): "adx" + [] fn uses_adx ), f!( /// [AES] --- Advanced Encryption Standard + /// /// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set - struct ::crypto::Aes("AES"): "aes" + ["sse2"] + struct crypto::Aes("AES"): "aes" + ["sse2"] fn uses_aes ), f!( /// [AVX] --- Advanced Vector Extensions + /// /// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions - struct ::avx::Avx("AVX"): "avx" + ["sse4.2"] + struct avx::Avx("AVX"): "avx" + ["sse4.2"] fn uses_avx ), f!( /// [AVX2] --- Advanced Vector Extensions 2 + /// /// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 - struct ::avx::Avx2("AVX2"): "avx2" + ["avx"] + struct avx::Avx2("AVX2"): "avx2" + ["avx"] fn uses_avx2 ), f!( /// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions + /// /// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 - struct ::avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] + struct avx512::Avx512bf16("AVX512-BF16"): "avx512bf16" + ["avx512bw"] fn uses_avx512bf16 ), f!( /// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms + /// + /// /// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG - struct ::avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] + struct avx512::Avx512bitalg("AVX512-BITALG"): "avx512bitalg" + ["avx512bw"] fn uses_avx512bitalg ), f!( /// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions + /// /// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI - struct ::avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] + struct avx512::Avx512bw("AVX512-BW"): "avx512bw" + ["avx512f"] fn uses_avx512bw ), f!( /// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions + /// /// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection - struct ::avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] + struct avx512::Avx512cd("AVX512-CD"): "avx512cd" + ["avx512f"] fn uses_avx512cd ), f!( /// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions + /// /// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI - struct ::avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] + struct avx512::Avx512dq("AVX512-DQ"): "avx512dq" + ["avx512f"] fn uses_avx512dq ), f!( /// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation + /// /// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 - struct ::avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] + struct avx512::Avx512f("AVX512-F"): "avx512f" + ["avx2", "fma", "f16c"] fn uses_avx512f ), f!( /// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions + /// /// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 - struct ::avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] + struct avx512::Avx512fp16("AVX512-FP16"): "avx512fp16" + ["avx512bw"] fn uses_avx512fp16 ), f!( /// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add + /// /// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA - struct ::avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] + struct avx512::Avx512ifma("AVX512-IFMA"): "avx512ifma" + ["avx512f"] fn uses_avx512ifma ), f!( /// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions + /// /// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI - struct ::avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] + struct avx512::Avx512vbmi("AVX512-VBMI"): "avx512vbmi" + ["avx512bw"] fn uses_avx512vbmi ), f!( /// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 + /// /// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 - struct ::avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] + struct avx512::Avx512vbmi2("AVX512-VBMI2"): "avx512vbmi2" + ["avx512bw"] fn uses_avx512vbmi2 ), f!( /// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions + /// /// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 - struct ::avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] + struct avx512::Avx512vl("AVX512-VL"): "avx512vl" + ["avx512f"] fn uses_avx512vl ), f!( /// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions + /// /// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI - struct ::avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] + struct avx512::Avx512vnni("AVX512-VNNI"): "avx512vnni" + ["avx512f"] fn uses_avx512vnni ), f!( /// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers + /// /// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT - struct ::avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] + struct avx512::Avx512vp2intersect("AVX512-VP2INTERSECT"): "avx512vp2intersect" + ["avx512f"] fn uses_avx512vp2intersect ), f!( /// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction + /// /// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG - struct ::avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] + struct avx512::Avx512vpopcntdq("AVX512-VPOPCNTDQ"): "avx512vpopcntdq" + ["avx512f"] fn uses_avx512vpopcntdq ), f!( /// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add + /// /// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] + struct avx::Avxifma("AVX-IFMA"): "avxifma" + ["avx2"] fn uses_avxifma ), f!( /// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions + /// /// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] + struct avx::Avxneconvert("AVX-NE-CONVERT"): "avxneconvert" + ["avx2"] fn uses_avxneconvert ), f!( /// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions + /// /// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] + struct avx::Avxvnni("AVX-VNNI"): "avxvnni" + ["avx2"] fn uses_avxvnni ), f!( /// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers + /// /// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] + struct avx::Avxvnniint16("AVX-VNNI-INT16"): "avxvnniint16" + ["avx2"] fn uses_avxvnniint16 ), f!( /// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers + /// /// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA - struct ::avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] + struct avx::Avxvnniint8("AVX-VNNI-INT8"): "avxvnniint8" + ["avx2"] fn uses_avxvnniint8 ), f!( /// [BMI1] --- Bit Manipulation Instruction Sets + /// /// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets - struct ::v3::Bmi1(" 1"): "bmi1" + [] + struct v3::Bmi1(" 1"): "bmi1" + [] fn uses_bmi1 ), f!( /// [BMI2] --- Bit Manipulation Instruction Sets 2 + /// /// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 - struct ::v3::Bmi2("BMI2"): "bmi2" + [] + struct v3::Bmi2("BMI2"): "bmi2" + [] fn uses_bmi2 ), f!( /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b - struct ::v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] + struct v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] fn uses_cmpxchg16b ), f!( /// [F16C] --- 16-bit floating point conversion instructions + /// /// [F16C]: https://en.wikipedia.org/wiki/F16C - struct ::v3::F16c("F16C"): "f16c" + ["avx"] + struct v3::F16c("F16C"): "f16c" + ["avx"] fn uses_f16c ), f!( /// [FMA3] --- Three-operand fused multiply-add + /// /// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set - struct ::v3::Fma("FMA3"): "fma" + ["avx"] + struct v3::Fma("FMA3"): "fma" + ["avx"] fn uses_fma ), f!( /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, - struct ::sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] + /// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, + struct sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] fn uses_fxsr ), f!( /// [GFNI] --- Galois Field New Instructions + /// /// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI - struct ::crypto::Gfni("GFNI"): "gfni" + ["sse2"] + struct crypto::Gfni("GFNI"): "gfni" + ["sse2"] fn uses_gfni ), f!( /// [KEYLOCKER] --- Intel Key Locker Instructions + /// /// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions - struct ::crypto::Keylocker("KEYLOCKER"): "kl" + [] + struct crypto::Keylocker("KEYLOCKER"): "kl" + [] fn uses_keylocker ), f!( /// ["lzcnt"] --- Leading zeros count + /// /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt - struct ::v3::Lzcnt("`lzcnt`"): "lzcnt" + [] + struct v3::Lzcnt("`lzcnt`"): "lzcnt" + [] fn uses_lzcnt ), f!( /// ["movbe"] --- Move data after swapping bytes + /// /// ["movbe"]: https://www.felixcloutier.com/x86/movbe - struct ::v3::Movbe("`movbe`"): "movbe" + [] + struct v3::Movbe("`movbe`"): "movbe" + [] fn uses_movbe ), f!( /// ["pclmulqdq"] --- Packed carry-less multiplication quadword + /// /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq - struct ::crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] + struct crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] fn uses_pclmulqdq ), f!( /// ["popcnt"] --- Count of bits set to 1 + /// /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt - struct ::v2::Popcnt("`popcnt`"): "popcnt" + [] + struct v2::Popcnt("`popcnt`"): "popcnt" + [] fn uses_popcnt ), f!( /// ["rdrand"] --- Read random number + /// /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand - struct ::crypto::Rdrand("`rdrand`"): "rdrand" + [] + struct crypto::Rdrand("`rdrand`"): "rdrand" + [] fn uses_rdrand ), f!( /// ["rdseed"] --- Read random seed + /// /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand - struct ::crypto::Rdseed("`rdseed"): "rdseed" + [] + struct crypto::Rdseed("`rdseed"): "rdseed" + [] fn uses_rdseed ), f!( /// [SHA] --- Secure Hash Algorithm + /// /// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions - struct ::crypto::Sha("SHA"): "sha" + ["sse2"] + struct crypto::Sha("SHA"): "sha" + ["sse2"] fn uses_sha ), f!( /// [SHA512] --- Secure Hash Algorithm with 512-bit digest + /// /// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions - struct ::crypto::Sha512("SHA512"): "sha512" + ["avx2"] + struct crypto::Sha512("SHA512"): "sha512" + ["avx2"] fn uses_sha512 ), f!( /// [SM3] --- ShangMi 3 Hash Algorithm + /// /// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions - struct ::crypto::Sm3("SM3"): "sm3" + ["avx"] + struct crypto::Sm3("SM3"): "sm3" + ["avx"] fn uses_sm3 ), f!( /// [SM4] --- ShangMi 4 Cipher Algorithm + /// /// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions - struct ::crypto::Sm4("SM4"): "sm4" + ["avx2"] + struct crypto::Sm4("SM4"): "sm4" + ["avx2"] fn uses_sm4 ), f!( /// [SSE] --- Streaming SIMD Extensions + /// /// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions - struct ::sse::Sse("SSE"): "sse" + [] + struct sse::Sse("SSE"): "sse" + [] fn uses_sse ), f!( /// [SSE2] --- Streaming SIMD Extensions 2 + /// /// [SSE2]: https://en.wikipedia.org/wiki/SSE2 - struct ::sse::Sse2("SSE2"): "sse2" + ["sse"] + struct sse::Sse2("SSE2"): "sse2" + ["sse"] fn uses_sse2 ), f!( /// [SSE3] --- Streaming SIMD Extensions 3 + /// /// [SSE3]: https://en.wikipedia.org/wiki/SSE3 - struct ::sse::Sse3("SSE3"): "sse3" + ["sse2"] + struct sse::Sse3("SSE3"): "sse3" + ["sse2"] fn uses_sse3 ), f!( /// [SSE4.1] --- Streaming SIMD Extensions 4.1 + /// /// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 - struct ::sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] + struct sse::Sse4_1("SSE4.1"): "sse4.1" + ["ssse3"] fn uses_sse4 ), f!( /// [SSE4.2] --- StreamingSIMDExtensions 4.2 + /// /// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 - struct ::sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] + struct sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] fn uses_sse4 ), // // TODO: This only exists from 1.91 and above (current beta) // f!( // /// [SSE4a] --- StreamingSIMDExtensions 4a + // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a // struct Sse4a("SSE4a"): "sse4a" + ["sse3"] // fn uses_sse4a // ), f!( /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 + /// /// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 - struct ::sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] + struct sse::SupplementalSse3("SSSE3"): "ssse3" + ["sse3"] fn uses_ssse3 ), f!( /// [TBM] --- Trailing Bit Manipulation + /// /// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) - struct ::discontinued::Tbm("TBM"): "tbm" + [] + struct discontinued::Tbm("TBM"): "tbm" + [] fn uses_tbm ), f!( /// [VAES] --- Vector AES Instructions + /// /// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES - struct ::crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] + struct crypto::Vaes("VAES"): "vaes" + ["avx2", "aes"] fn uses_vaes ), f!( /// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords + /// /// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ - struct ::crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] + struct crypto::Vpclmulqdq("VPCLMULQDQ"): "vpclmulqdq" + ["avx", "pclmulqdq"] fn uses_vpclmulqdq ), f!( /// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions + /// /// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions - struct ::crypto::WideKeylocker("KEYLOCKER_WIDE"): "widekl" + ["kl"] + struct crypto::WideKeylocker("WIDE KEYLOCKER"): "widekl" + ["kl"] fn uses_wide_keylocker ), f!( /// [`xsave`] --- Save processor extended states + /// /// ["xsave"]: https://www.felixcloutier.com/x86/xsave - struct ::xsave::Xsave("`xsave`"): "xsave" + [] + struct xsave::Xsave("`xsave`"): "xsave" + [] fn uses_xsave ), f!( /// ["xsavec"] --- Save processor extended states with compaction + /// /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec - struct ::xsave::Xsavec("`xsavec`"): "xsavec" + [] + struct xsave::Xsavec("`xsavec`"): "xsavec" + [] fn uses_xsavec ), f!( /// ["xsaveopt"] --- Save processor extended states optimized + /// /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt - struct ::xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] + struct xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] fn uses_xsaveopt ), f!( /// ["xsaves"] --- Save processor extended states supervisor + /// /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves - struct ::xsave::Xsaves("`xsaves`"): "xsaves" + [] + struct xsave::Xsaves("`xsaves`"): "xsaves" + [] fn uses_xsaves ), ]; diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index 2961bd33c..1c0c8d430 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -1,6 +1,6 @@ mod data; -use std::fmt::{Write, format}; +use std::fmt::Write; use std::fs; use std::{ cell::RefCell, @@ -29,8 +29,16 @@ fn generate_for_arch( for line in feature.feature.extra_docs.lines() { writeln!(&mut new_docs, "///{line}").unwrap(); } - let enabled_feature_docs = format!("`{}`", feature.children.join("`, `")); - let enabled_feature_str_list = format!(r#""{}""#, feature.children.join(r#"", ""#)); + let enabled_feature_str_list = format!( + r#""{}", {}"#, + feature.feature.feature_name, + feature + .children + .iter() + .map(|it| format!(r#""{it}""#)) + .collect::>() + .join(", ") + ); let mut from_impls = String::new(); for child in &feature.children { let from_feature = features @@ -43,13 +51,13 @@ fn generate_for_arch( ); write!( from_impls, - r#"\n\ - impl From for {type_path} {{ - fn from(value: Self) -> {type_path} {{ - trampoline!([Self = value] => "{{FEATURE_ID}}", fn() -> {type_path} {{ {{type_path}}::new() }}) - }} - }}\n - "# + "\n\ +impl From for {type_path} {{ + fn from(value: FEATURE_STRUCT_NAME) -> Self {{ + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([FEATURE_STRUCT_NAME = value] => \"{{FEATURE_ID}}\", fn() -> {type_path} {{ {type_path}::new() }}) + }} +}}\n" ).unwrap(); } let mut result = format!( @@ -60,18 +68,20 @@ fn generate_for_arch( // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); - result = result.replace("/// {NEW_DOCS}", &new_docs); + result = result.replace("/// {NEW_DOCS}\n", &new_docs); result = result.replace("{FEATURE_ID}", feature.feature.feature_name); - result = result.replace("{ENABLED_FEATURES_DOCS_LIST}", &enabled_feature_docs); result = result.replace( "{EXAMPLE_FUNCTION_NAME}", feature.feature.example_function_name, ); result = result.replace("FEATURE_STRUCT_NAME", feature.feature.struct_name); - result = result.replace("{ENABLED_FEATURES_STR_LIST}", &enabled_feature_str_list); + result = result.replace( + r#""{ENABLED_FEATURES_STR_LIST}""#, + &enabled_feature_str_list, + ); let module_dir = arch_dir.join(feature.feature.module); create_dir_all(&module_dir)?; - let mut file = module_dir.join(feature.feature.feature_name); + let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_")); file.set_extension("rs"); fs::write(file, result)?; } diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index a37207a37..48a294d68 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -8,12 +8,9 @@ use core::fmt::Debug; /// /// A token indicating that the current CPU has the `{FEATURE_ID}` target feature. /// -/// This feature also enables {ENABLED_FEATURES_DOCS_LIST}; -/// the tokens for these features can be created using [`From`] implementations. -/// /// # Example /// -/// This can be used to [`trampoline!`] into: +/// This can be used to [`trampoline!`] into functions like: /// /// ```rust /// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -35,16 +32,17 @@ impl Debug for FEATURE_STRUCT_NAME { } } -unsafe impl TargetFeatureToken for Sse { - const FEATURES: &[&str] = &[{ ENABLED_FEATURES_STR_LIST }]; +unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME { + const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - trampoline!([Self = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + // Because we want this constant to be eagerly evaluated. + trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } -impl Sse { +impl FEATURE_STRUCT_NAME { #[cfg(feature = "std")] /// Create a new token if the `"{FEATURE_ID}"` target feature is detected as enabled. /// From 594a525b65eb2444f5fc09041a67a1c92675e8cd Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 14:50:50 +0100 Subject: [PATCH 04/19] Minor fixups in the generator Also removes unused additional impl support --- fearless_simd_core/gen/src/data/x86.rs | 4 +--- fearless_simd_core/gen/src/main.rs | 3 --- fearless_simd_core/gen/templates/x86.rs | 6 +++--- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 346a9f79f..52c13c4df 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -4,7 +4,6 @@ macro_rules! f { ($(#[doc = $doc_addition: literal])* struct $module: ident:: $struct_name: ident($display_name: literal): $feature_name: literal + [$($implicitly_enabled: literal),*] fn $example_function_name: ident - $($additional_impls: tt)* ) => { Feature { struct_name: stringify!($struct_name), @@ -13,7 +12,6 @@ macro_rules! f { extra_docs: concat!($($doc_addition, "\n",)*), example_function_name: stringify!($example_function_name), feature_docs_name: $display_name, - additional_impls: stringify!($($additional_impls)*), module: stringify!($module) } } @@ -285,7 +283,7 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ /// ["rdseed"] --- Read random seed /// /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand - struct crypto::Rdseed("`rdseed"): "rdseed" + [] + struct crypto::Rdseed("`rdseed`"): "rdseed" + [] fn uses_rdseed ), f!( diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index 1c0c8d430..aec7057f4 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -104,9 +104,6 @@ struct Feature { example_function_name: &'static str, /// The "display name" for the feature, used inside the docs. feature_docs_name: &'static str, - /// Extra code added at the end. - /// Used for implicitly enabled features. - additional_impls: &'static str, /// The module (if any) this feature will belong to. /// /// (Note that imports into the module are checked to exist, but not automatically inserted). diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 48a294d68..7742b8e98 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -62,16 +62,16 @@ impl FEATURE_STRUCT_NAME { } #[target_feature(enable = "{FEATURE_ID}")] - /// Create a new token for the "{FEATURE_ID}" target feature is enabled. + /// Create a new token for the "{FEATURE_ID}" target feature. /// /// This method is useful to get a new token if you have an external proof that - /// {FEATURE_ID} is available. This could happen if you are in a target feature + /// {FEATURE_DOCS_NAME} is available. This could happen if you are in a target feature /// function called by an external library user. /// /// # Safety /// /// No conditions other than those inherited from the target feature attribute, - /// i.e. that the "{FEATURE_DOCS_NAME}" target feature is available. + /// i.e. that the "{FEATURE_ID}" target feature is available. /// /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions pub fn new() -> Self { From 7b595b44f51bcb713bed15984c0be0421f28c9d7 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:51:21 +0100 Subject: [PATCH 05/19] Add the generated x86 code --- fearless_simd_core/src/x86/adx/adx.rs | 92 +++++++++ fearless_simd_core/src/x86/adx/mod.rs | 8 + fearless_simd_core/src/x86/avx/avx.rs | 134 ++++++++++++ fearless_simd_core/src/x86/avx/avx2.rs | 143 +++++++++++++ fearless_simd_core/src/x86/avx/avxifma.rs | 150 ++++++++++++++ .../src/x86/avx/avxneconvert.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/avx/avxvnni.rs | 150 ++++++++++++++ .../src/x86/avx/avxvnniint16.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/avx/avxvnniint8.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/avx/mod.rs | 24 +++ .../src/x86/avx512/avx512bf16.rs | 190 +++++++++++++++++ .../src/x86/avx512/avx512bitalg.rs | 191 ++++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512bw.rs | 172 ++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512cd.rs | 172 ++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512dq.rs | 172 ++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512f.rs | 164 +++++++++++++++ .../src/x86/avx512/avx512fp16.rs | 190 +++++++++++++++++ .../src/x86/avx512/avx512ifma.rs | 182 +++++++++++++++++ .../src/x86/avx512/avx512vbmi.rs | 190 +++++++++++++++++ .../src/x86/avx512/avx512vbmi2.rs | 190 +++++++++++++++++ fearless_simd_core/src/x86/avx512/avx512vl.rs | 172 ++++++++++++++++ .../src/x86/avx512/avx512vnni.rs | 182 +++++++++++++++++ .../src/x86/avx512/avx512vp2intersect.rs | 182 +++++++++++++++++ .../src/x86/avx512/avx512vpopcntdq.rs | 182 +++++++++++++++++ fearless_simd_core/src/x86/avx512/mod.rs | 41 ++++ fearless_simd_core/src/x86/crypto/aes.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/gfni.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/kl.rs | 92 +++++++++ fearless_simd_core/src/x86/crypto/mod.rs | 38 ++++ .../src/x86/crypto/pclmulqdq.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/rdrand.rs | 92 +++++++++ fearless_simd_core/src/x86/crypto/rdseed.rs | 92 +++++++++ fearless_simd_core/src/x86/crypto/sha.rs | 106 ++++++++++ fearless_simd_core/src/x86/crypto/sha512.rs | 150 ++++++++++++++ fearless_simd_core/src/x86/crypto/sm3.rs | 143 +++++++++++++ fearless_simd_core/src/x86/crypto/sm4.rs | 150 ++++++++++++++ fearless_simd_core/src/x86/crypto/vaes.rs | 157 ++++++++++++++ .../src/x86/crypto/vpclmulqdq.rs | 158 +++++++++++++++ fearless_simd_core/src/x86/crypto/widekl.rs | 99 +++++++++ .../src/x86/discontinued/mod.rs | 3 + .../src/x86/discontinued/tbm.rs | 93 +++++++++ fearless_simd_core/src/x86/mod.rs | 8 + fearless_simd_core/src/x86/sse/fxsr.rs | 93 +++++++++ fearless_simd_core/src/x86/sse/mod.rs | 24 +++ fearless_simd_core/src/x86/sse/sse.rs | 92 +++++++++ fearless_simd_core/src/x86/sse/sse2.rs | 99 +++++++++ fearless_simd_core/src/x86/sse/sse3.rs | 106 ++++++++++ fearless_simd_core/src/x86/sse/sse4_1.rs | 120 +++++++++++ fearless_simd_core/src/x86/sse/sse4_2.rs | 127 ++++++++++++ fearless_simd_core/src/x86/sse/ssse3.rs | 113 +++++++++++ fearless_simd_core/src/x86/v1/mod.rs | 9 +- fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 92 +++++++++ fearless_simd_core/src/x86/v2/mod.rs | 5 + fearless_simd_core/src/x86/v2/popcnt.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/bmi1.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/bmi2.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/f16c.rs | 143 +++++++++++++ fearless_simd_core/src/x86/v3/fma.rs | 143 +++++++++++++ fearless_simd_core/src/x86/v3/lzcnt.rs | 92 +++++++++ fearless_simd_core/src/x86/v3/mod.rs | 17 ++ fearless_simd_core/src/x86/v3/movbe.rs | 92 +++++++++ fearless_simd_core/src/x86/xsave/xsave.rs | 93 +++++++++ fearless_simd_core/src/x86/xsave/xsavec.rs | 93 +++++++++ fearless_simd_core/src/x86/xsave/xsaveopt.rs | 93 +++++++++ fearless_simd_core/src/x86/xsave/xsaves.rs | 93 +++++++++ 65 files changed, 7455 insertions(+), 6 deletions(-) create mode 100644 fearless_simd_core/src/x86/adx/adx.rs create mode 100644 fearless_simd_core/src/x86/adx/mod.rs create mode 100644 fearless_simd_core/src/x86/avx/avx.rs create mode 100644 fearless_simd_core/src/x86/avx/avx2.rs create mode 100644 fearless_simd_core/src/x86/avx/avxifma.rs create mode 100644 fearless_simd_core/src/x86/avx/avxneconvert.rs create mode 100644 fearless_simd_core/src/x86/avx/avxvnni.rs create mode 100644 fearless_simd_core/src/x86/avx/avxvnniint16.rs create mode 100644 fearless_simd_core/src/x86/avx/avxvnniint8.rs create mode 100644 fearless_simd_core/src/x86/avx/mod.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512bf16.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512bitalg.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512bw.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512cd.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512dq.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512f.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512fp16.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512ifma.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vbmi.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vbmi2.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vl.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vnni.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs create mode 100644 fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs create mode 100644 fearless_simd_core/src/x86/avx512/mod.rs create mode 100644 fearless_simd_core/src/x86/crypto/aes.rs create mode 100644 fearless_simd_core/src/x86/crypto/gfni.rs create mode 100644 fearless_simd_core/src/x86/crypto/kl.rs create mode 100644 fearless_simd_core/src/x86/crypto/mod.rs create mode 100644 fearless_simd_core/src/x86/crypto/pclmulqdq.rs create mode 100644 fearless_simd_core/src/x86/crypto/rdrand.rs create mode 100644 fearless_simd_core/src/x86/crypto/rdseed.rs create mode 100644 fearless_simd_core/src/x86/crypto/sha.rs create mode 100644 fearless_simd_core/src/x86/crypto/sha512.rs create mode 100644 fearless_simd_core/src/x86/crypto/sm3.rs create mode 100644 fearless_simd_core/src/x86/crypto/sm4.rs create mode 100644 fearless_simd_core/src/x86/crypto/vaes.rs create mode 100644 fearless_simd_core/src/x86/crypto/vpclmulqdq.rs create mode 100644 fearless_simd_core/src/x86/crypto/widekl.rs create mode 100644 fearless_simd_core/src/x86/discontinued/mod.rs create mode 100644 fearless_simd_core/src/x86/discontinued/tbm.rs create mode 100644 fearless_simd_core/src/x86/sse/fxsr.rs create mode 100644 fearless_simd_core/src/x86/sse/mod.rs create mode 100644 fearless_simd_core/src/x86/sse/sse.rs create mode 100644 fearless_simd_core/src/x86/sse/sse2.rs create mode 100644 fearless_simd_core/src/x86/sse/sse3.rs create mode 100644 fearless_simd_core/src/x86/sse/sse4_1.rs create mode 100644 fearless_simd_core/src/x86/sse/sse4_2.rs create mode 100644 fearless_simd_core/src/x86/sse/ssse3.rs create mode 100644 fearless_simd_core/src/x86/v2/cmpxchg16b.rs create mode 100644 fearless_simd_core/src/x86/v2/mod.rs create mode 100644 fearless_simd_core/src/x86/v2/popcnt.rs create mode 100644 fearless_simd_core/src/x86/v3/bmi1.rs create mode 100644 fearless_simd_core/src/x86/v3/bmi2.rs create mode 100644 fearless_simd_core/src/x86/v3/f16c.rs create mode 100644 fearless_simd_core/src/x86/v3/fma.rs create mode 100644 fearless_simd_core/src/x86/v3/lzcnt.rs create mode 100644 fearless_simd_core/src/x86/v3/mod.rs create mode 100644 fearless_simd_core/src/x86/v3/movbe.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsave.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsavec.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsaveopt.rs create mode 100644 fearless_simd_core/src/x86/xsave/xsaves.rs diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs new file mode 100644 index 000000000..b10b63790 --- /dev/null +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The ADX target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [ADX] --- Multi-Precision Add-Carry Instruction Extensions +/// +/// [ADX]: https://en.wikipedia.org/wiki/Intel_ADX +/// +/// A token indicating that the current CPU has the `adx` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "adx")] +/// fn uses_adx() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Adx { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Adx { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""adx" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Adx { + const FEATURES: &[&str] = &["adx"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Adx = self] => "adx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Adx { + #[cfg(feature = "std")] + /// Create a new token if the `"adx"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("adx") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "adx")] + /// Create a new token for the "adx" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// ADX is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "adx" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs new file mode 100644 index 000000000..3c74dc60a --- /dev/null +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -0,0 +1,8 @@ +//! The "adx" target feature. + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod adx; +pub use adx::Adx; diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs new file mode 100644 index 000000000..baa56e369 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -0,0 +1,134 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX] --- Advanced Vector Extensions +/// +/// [AVX]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions +/// +/// A token indicating that the current CPU has the `avx` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx")] +/// fn uses_avx() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx { + const FEATURES: &[&str] = &["avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx = self] => "avx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx { + #[cfg(feature = "std")] + /// Create a new token if the `"avx"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx")] + /// Create a new token for the "avx" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx = value] => "avx", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs new file mode 100644 index 000000000..b6f252bf5 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX2] --- Advanced Vector Extensions 2 +/// +/// [AVX2]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX2 +/// +/// A token indicating that the current CPU has the `avx2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx2")] +/// fn uses_avx2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx2 { + const FEATURES: &[&str] = &[ + "avx2", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx2 = self] => "avx2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx2 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx2")] + /// Create a new token for the "avx2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx2 = value] => "avx2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs new file mode 100644 index 000000000..12fda758a --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-IFMA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-IFMA] --- Advanced Vector Extensions - Integer Fused Multiply Add +/// +/// [AVX-IFMA]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxifma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxifma")] +/// fn uses_avxifma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxifma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxifma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxifma" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxifma { + const FEATURES: &[&str] = &[ + "avxifma", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxifma = self] => "avxifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxifma { + #[cfg(feature = "std")] + /// Create a new token if the `"avxifma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxifma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxifma")] + /// Create a new token for the "avxifma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-IFMA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxifma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxifma = value] => "avxifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs new file mode 100644 index 000000000..a2adef3c8 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-NE-CONVERT target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-NE-CONVERT] --- Advanced Vector Extensions - No-Exception Floating-Point conversion Instructions +/// +/// [AVX-NE-CONVERT]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxneconvert` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxneconvert")] +/// fn uses_avxneconvert() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxneconvert { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxneconvert { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxneconvert" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxneconvert { + const FEATURES: &[&str] = &[ + "avxneconvert", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxneconvert = self] => "avxneconvert", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxneconvert { + #[cfg(feature = "std")] + /// Create a new token if the `"avxneconvert"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxneconvert") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxneconvert")] + /// Create a new token for the "avxneconvert" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-NE-CONVERT is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxneconvert" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxneconvert) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxneconvert = value] => "avxneconvert", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs new file mode 100644 index 000000000..48148a0c0 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-VNNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI] --- Advanced Vector Extensions - Vector Neural Network Instructions +/// +/// [AVX-VNNI]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnni")] +/// fn uses_avxvnni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnni" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxvnni { + const FEATURES: &[&str] = &[ + "avxvnni", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxvnni = self] => "avxvnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnni { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnni")] + /// Create a new token for the "avxvnni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnni = value] => "avxvnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs new file mode 100644 index 000000000..36b16a412 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-VNNI-INT16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI-INT16] --- Advanced Vector Extensions - Vector Neural Network Instructions with 16-bit Integers +/// +/// [AVX-VNNI-INT16]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnniint16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnniint16")] +/// fn uses_avxvnniint16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnniint16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnniint16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnniint16" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxvnniint16 { + const FEATURES: &[&str] = &[ + "avxvnniint16", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxvnniint16 = self] => "avxvnniint16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnniint16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnniint16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnniint16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnniint16")] + /// Create a new token for the "avxvnniint16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI-INT16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnniint16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnniint16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint16 = value] => "avxvnniint16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs new file mode 100644 index 000000000..2a0eaf4a4 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX-VNNI-INT8 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX-VNNI-INT8] --- Advanced Vector Extensions - Vector Neural Network Instructions with 8-bit Integers +/// +/// [AVX-VNNI-INT8]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#AVX-VNNI,_AVX-IFMA +/// +/// A token indicating that the current CPU has the `avxvnniint8` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avxvnniint8")] +/// fn uses_avxvnniint8() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avxvnniint8 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avxvnniint8 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avxvnniint8" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avxvnniint8 { + const FEATURES: &[&str] = &[ + "avxvnniint8", + "avx", + "avx2", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avxvnniint8 = self] => "avxvnniint8", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avxvnniint8 { + #[cfg(feature = "std")] + /// Create a new token if the `"avxvnniint8"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avxvnniint8") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avxvnniint8")] + /// Create a new token for the "avxvnniint8" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX-VNNI-INT8 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avxvnniint8" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avxvnniint8) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avxvnniint8 = value] => "avxvnniint8", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs new file mode 100644 index 000000000..f047d0553 --- /dev/null +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -0,0 +1,24 @@ +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod avx; +pub use avx::Avx; + +mod avx2; +pub use avx2::Avx2; + +mod avxifma; +pub use avxifma::Avxifma; + +mod avxneconvert; +pub use avxneconvert::Avxneconvert; + +mod avxvnni; +pub use avxvnni::Avxvnni; + +mod avxvnniint8; +pub use avxvnniint8::Avxvnniint8; + +mod avxvnniint16; +pub use avxvnniint16::Avxvnniint16; diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs new file mode 100644 index 000000000..523969729 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-BF16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BF16] --- Advanced Vector Extensions 512-bit - Bfloat16 Extensions +/// +/// [AVX512-BF16]: https://en.wikipedia.org/wiki/AVX-512#BF16 +/// +/// A token indicating that the current CPU has the `avx512bf16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bf16")] +/// fn uses_avx512bf16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bf16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bf16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bf16" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512bf16 { + const FEATURES: &[&str] = &[ + "avx512bf16", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512bf16 = self] => "avx512bf16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bf16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bf16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bf16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bf16")] + /// Create a new token for the "avx512bf16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BF16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bf16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bf16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bf16 = value] => "avx512bf16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs new file mode 100644 index 000000000..66bb543ad --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -0,0 +1,191 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-BITALG target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BITALG] --- Advanced Vector Extensions 512-bit - Bit Algorithms +/// +/// +/// [AVX512-BITALG]: https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG +/// +/// A token indicating that the current CPU has the `avx512bitalg` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bitalg")] +/// fn uses_avx512bitalg() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bitalg { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bitalg { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bitalg" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512bitalg { + const FEATURES: &[&str] = &[ + "avx512bitalg", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512bitalg = self] => "avx512bitalg", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bitalg { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bitalg"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bitalg") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bitalg")] + /// Create a new token for the "avx512bitalg" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BITALG is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bitalg" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bitalg) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bitalg = value] => "avx512bitalg", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs new file mode 100644 index 000000000..7213b3da7 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-BW target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-BW] --- Advanced Vector Extensions 512-bit - Byte and Word Instructions +/// +/// [AVX512-BW]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512bw` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bw")] +/// fn uses_avx512bw() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512bw { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512bw { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512bw" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512bw { + const FEATURES: &[&str] = &[ + "avx512bw", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512bw = self] => "avx512bw", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512bw { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512bw"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512bw") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512bw")] + /// Create a new token for the "avx512bw" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-BW is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bw" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512bw) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512bw = value] => "avx512bw", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs new file mode 100644 index 000000000..b3b9c8c36 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-CD target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-CD] --- Advanced Vector Extensions 512-bit - Conflict Detection Instructions +/// +/// [AVX512-CD]: https://en.wikipedia.org/wiki/AVX-512#Conflict_detection +/// +/// A token indicating that the current CPU has the `avx512cd` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512cd")] +/// fn uses_avx512cd() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512cd { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512cd { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512cd" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512cd { + const FEATURES: &[&str] = &[ + "avx512cd", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512cd = self] => "avx512cd", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512cd { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512cd"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512cd") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512cd")] + /// Create a new token for the "avx512cd" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-CD is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512cd" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512cd) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512cd = value] => "avx512cd", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs new file mode 100644 index 000000000..fb6d36703 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-DQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-DQ] --- Advanced Vector Extensions 512-bit - Doubleword and Quadword Instructions +/// +/// [AVX512-DQ]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512dq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512dq")] +/// fn uses_avx512dq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512dq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512dq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512dq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512dq { + const FEATURES: &[&str] = &[ + "avx512dq", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512dq = self] => "avx512dq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512dq { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512dq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512dq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512dq")] + /// Create a new token for the "avx512dq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-DQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512dq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512dq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512dq = value] => "avx512dq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs new file mode 100644 index 000000000..fa6adb779 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -0,0 +1,164 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-F target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-F] --- Advanced Vector Extensions 512-bit - Foundation +/// +/// [AVX512-F]: https://en.wikipedia.org/wiki/AVX-512 +/// +/// A token indicating that the current CPU has the `avx512f` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512f")] +/// fn uses_avx512f() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512f { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512f { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512f" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512f { + const FEATURES: &[&str] = &[ + "avx512f", "avx", "avx2", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512f = self] => "avx512f", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512f { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512f"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512f") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512f")] + /// Create a new token for the "avx512f" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-F is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512f" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512f) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512f = value] => "avx512f", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs new file mode 100644 index 000000000..f3ed60899 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-FP16 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-FP16] --- Advanced Vector Extensions 512-bit - Float16 Extensions +/// +/// [AVX512-FP16]: https://en.wikipedia.org/wiki/AVX-512#FP16 +/// +/// A token indicating that the current CPU has the `avx512fp16` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512fp16")] +/// fn uses_avx512fp16() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512fp16 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512fp16 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512fp16" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512fp16 { + const FEATURES: &[&str] = &[ + "avx512fp16", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512fp16 = self] => "avx512fp16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512fp16 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512fp16"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512fp16") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512fp16")] + /// Create a new token for the "avx512fp16" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-FP16 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512fp16" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512fp16) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512fp16 = value] => "avx512fp16", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs new file mode 100644 index 000000000..b7ab646c4 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-IFMA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-IFMA] --- Advanced Vector Extensions 512-bit - Integer Fused Multiply Add +/// +/// [AVX512-IFMA]: https://en.wikipedia.org/wiki/AVX-512#IFMA +/// +/// A token indicating that the current CPU has the `avx512ifma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512ifma")] +/// fn uses_avx512ifma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512ifma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512ifma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512ifma" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512ifma { + const FEATURES: &[&str] = &[ + "avx512ifma", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512ifma = self] => "avx512ifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512ifma { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512ifma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512ifma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512ifma")] + /// Create a new token for the "avx512ifma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-IFMA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512ifma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512ifma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512ifma = value] => "avx512ifma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs new file mode 100644 index 000000000..9d0ad4dac --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VBMI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VBMI] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions +/// +/// [AVX512-VBMI]: https://en.wikipedia.org/wiki/AVX-512#BW,_DQ_and_VBMI +/// +/// A token indicating that the current CPU has the `avx512vbmi` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vbmi")] +/// fn uses_avx512vbmi() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vbmi { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vbmi { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vbmi" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vbmi { + const FEATURES: &[&str] = &[ + "avx512vbmi", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vbmi = self] => "avx512vbmi", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vbmi { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vbmi"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vbmi") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vbmi")] + /// Create a new token for the "avx512vbmi" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VBMI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vbmi" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vbmi) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi = value] => "avx512vbmi", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs new file mode 100644 index 000000000..cfff6b25e --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -0,0 +1,190 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VBMI2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VBMI2] --- Advanced Vector Extensions 512-bit - Vector Byte Manipulation Instructions 2 +/// +/// [AVX512-VBMI2]: https://en.wikipedia.org/wiki/AVX-512#VBMI2 +/// +/// A token indicating that the current CPU has the `avx512vbmi2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vbmi2")] +/// fn uses_avx512vbmi2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vbmi2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vbmi2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vbmi2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vbmi2 { + const FEATURES: &[&str] = &[ + "avx512vbmi2", + "avx", + "avx2", + "avx512bw", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vbmi2 = self] => "avx512vbmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vbmi2 { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vbmi2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vbmi2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vbmi2")] + /// Create a new token for the "avx512vbmi2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VBMI2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vbmi2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512bw { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512bw { crate::x86::avx512::Avx512bw::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vbmi2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vbmi2 = value] => "avx512vbmi2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs new file mode 100644 index 000000000..ddfd7a1c2 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -0,0 +1,172 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VL target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VL] --- Advanced Vector Extensions 512-bit - Vector Length Extensions +/// +/// [AVX512-VL]: https://en.wikipedia.org/wiki/AVX-512 +/// +/// A token indicating that the current CPU has the `avx512vl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vl")] +/// fn uses_avx512vl() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vl { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vl { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vl" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vl { + const FEATURES: &[&str] = &[ + "avx512vl", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", + "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vl = self] => "avx512vl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vl { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vl")] + /// Create a new token for the "avx512vl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VL is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vl) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vl = value] => "avx512vl", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs new file mode 100644 index 000000000..528282d97 --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VNNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VNNI] --- Advanced Vector Extensions 512-bit - Vector Neural Network Instructions +/// +/// [AVX512-VNNI]: https://en.wikipedia.org/wiki/AVX-512#VNNI +/// +/// A token indicating that the current CPU has the `avx512vnni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vnni")] +/// fn uses_avx512vnni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vnni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vnni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vnni" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vnni { + const FEATURES: &[&str] = &[ + "avx512vnni", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vnni = self] => "avx512vnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vnni { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vnni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vnni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vnni")] + /// Create a new token for the "avx512vnni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VNNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vnni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vnni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vnni = value] => "avx512vnni", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs new file mode 100644 index 000000000..73344f75e --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VP2INTERSECT target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VP2INTERSECT] --- Advanced Vector Extensions 512-bit - Vector Pair Intersection to a Pair of Mask Registers +/// +/// [AVX512-VP2INTERSECT]: https://en.wikipedia.org/wiki/AVX-512#VP2INTERSECT +/// +/// A token indicating that the current CPU has the `avx512vp2intersect` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vp2intersect")] +/// fn uses_avx512vp2intersect() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vp2intersect { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vp2intersect { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vp2intersect" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vp2intersect { + const FEATURES: &[&str] = &[ + "avx512vp2intersect", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vp2intersect = self] => "avx512vp2intersect", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vp2intersect { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vp2intersect"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vp2intersect") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vp2intersect")] + /// Create a new token for the "avx512vp2intersect" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VP2INTERSECT is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vp2intersect" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vp2intersect) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vp2intersect = value] => "avx512vp2intersect", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs new file mode 100644 index 000000000..7f96f8a7d --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -0,0 +1,182 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AVX512-VPOPCNTDQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AVX512-VPOPCNTDQ] --- Advanced Vector Extensions 512-bit - Vector Population Count Instruction +/// +/// [AVX512-VPOPCNTDQ]:https://en.wikipedia.org/wiki/AVX-512#VPOPCNTDQ_and_BITALG +/// +/// A token indicating that the current CPU has the `avx512vpopcntdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512vpopcntdq")] +/// fn uses_avx512vpopcntdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Avx512vpopcntdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Avx512vpopcntdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""avx512vpopcntdq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Avx512vpopcntdq { + const FEATURES: &[&str] = &[ + "avx512vpopcntdq", + "avx", + "avx2", + "avx512f", + "f16c", + "fma", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Avx512vpopcntdq = self] => "avx512vpopcntdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Avx512vpopcntdq { + #[cfg(feature = "std")] + /// Create a new token if the `"avx512vpopcntdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("avx512vpopcntdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "avx512vpopcntdq")] + /// Create a new token for the "avx512vpopcntdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AVX512-VPOPCNTDQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512vpopcntdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::avx512::Avx512f { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::avx512::Avx512f { crate::x86::avx512::Avx512f::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Avx512vpopcntdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Avx512vpopcntdq = value] => "avx512vpopcntdq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs new file mode 100644 index 000000000..1044da40c --- /dev/null +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -0,0 +1,41 @@ +mod avx512bf16; +pub use avx512bf16::Avx512bf16; + +mod avx512bitalg; +pub use avx512bitalg::Avx512bitalg; + +mod avx512bw; +pub use avx512bw::Avx512bw; + +mod avx512cd; +pub use avx512cd::Avx512cd; + +mod avx512dq; +pub use avx512dq::Avx512dq; + +mod avx512f; +pub use avx512f::Avx512f; + +mod avx512fp16; +pub use avx512fp16::Avx512fp16; + +mod avx512ifma; +pub use avx512ifma::Avx512ifma; + +mod avx512vbmi; +pub use avx512vbmi::Avx512vbmi; + +mod avx512vbmi2; +pub use avx512vbmi2::Avx512vbmi2; + +mod avx512vl; +pub use avx512vl::Avx512vl; + +mod avx512vnni; +pub use avx512vnni::Avx512vnni; + +mod avx512vp2intersect; +pub use avx512vp2intersect::Avx512vp2intersect; + +mod avx512vpopcntdq; +pub use avx512vpopcntdq::Avx512vpopcntdq; diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs new file mode 100644 index 000000000..af937bef2 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The AES target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [AES] --- Advanced Encryption Standard +/// +/// [AES]: https://en.wikipedia.org/wiki/AES_instruction_set +/// +/// A token indicating that the current CPU has the `aes` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "aes")] +/// fn uses_aes() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Aes { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Aes { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""aes" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Aes { + const FEATURES: &[&str] = &["aes", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Aes = self] => "aes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Aes { + #[cfg(feature = "std")] + /// Create a new token if the `"aes"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("aes") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "aes")] + /// Create a new token for the "aes" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// AES is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "aes" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Aes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Aes = value] => "aes", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Aes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Aes = value] => "aes", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs new file mode 100644 index 000000000..63c73e81e --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The GFNI target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [GFNI] --- Galois Field New Instructions +/// +/// [GFNI]: https://en.wikipedia.org/wiki/AVX-512#GFNI +/// +/// A token indicating that the current CPU has the `gfni` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "gfni")] +/// fn uses_gfni() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Gfni { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Gfni { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""gfni" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Gfni { + const FEATURES: &[&str] = &["gfni", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Gfni = self] => "gfni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Gfni { + #[cfg(feature = "std")] + /// Create a new token if the `"gfni"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("gfni") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "gfni")] + /// Create a new token for the "gfni" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// GFNI is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "gfni" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Gfni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Gfni = value] => "gfni", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Gfni) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Gfni = value] => "gfni", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs new file mode 100644 index 000000000..5e5d3d4fa --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The KEYLOCKER target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [KEYLOCKER] --- Intel Key Locker Instructions +/// +/// [KEYLOCKER]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions +/// +/// A token indicating that the current CPU has the `kl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "kl")] +/// fn uses_keylocker() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Keylocker { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Keylocker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""kl" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Keylocker { + const FEATURES: &[&str] = &["kl"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Keylocker = self] => "kl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Keylocker { + #[cfg(feature = "std")] + /// Create a new token if the `"kl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("kl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "kl")] + /// Create a new token for the "kl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// KEYLOCKER is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "kl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs new file mode 100644 index 000000000..cfb34d3b4 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -0,0 +1,38 @@ +mod aes; +pub use aes::Aes; + +mod gfni; +pub use gfni::Gfni; + +mod kl; +pub use kl::Keylocker; + +mod pclmulqdq; +pub use pclmulqdq::Pclmulqdq; + +mod rdrand; +pub use rdrand::Rdrand; + +mod rdseed; +pub use rdseed::Rdseed; + +mod sha; +pub use sha::Sha; + +mod sha512; +pub use sha512::Sha512; + +mod sm3; +pub use sm3::Sm3; + +mod sm4; +pub use sm4::Sm4; + +mod vaes; +pub use vaes::Vaes; + +mod vpclmulqdq; +pub use vpclmulqdq::Vpclmulqdq; + +mod widekl; +pub use widekl::WideKeylocker; diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs new file mode 100644 index 000000000..56d47f7ef --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `pclmulqdq` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["pclmulqdq"] --- Packed carry-less multiplication quadword +/// +/// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq +/// +/// A token indicating that the current CPU has the `pclmulqdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "pclmulqdq")] +/// fn uses_pclmulqdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Pclmulqdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Pclmulqdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""pclmulqdq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Pclmulqdq { + const FEATURES: &[&str] = &["pclmulqdq", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Pclmulqdq = self] => "pclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Pclmulqdq { + #[cfg(feature = "std")] + /// Create a new token if the `"pclmulqdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("pclmulqdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "pclmulqdq")] + /// Create a new token for the "pclmulqdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `pclmulqdq` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "pclmulqdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Pclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Pclmulqdq = value] => "pclmulqdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Pclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Pclmulqdq = value] => "pclmulqdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs new file mode 100644 index 000000000..9003251da --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `rdrand` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["rdrand"] --- Read random number +/// +/// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand +/// +/// A token indicating that the current CPU has the `rdrand` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "rdrand")] +/// fn uses_rdrand() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Rdrand { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Rdrand { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""rdrand" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Rdrand { + const FEATURES: &[&str] = &["rdrand"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Rdrand = self] => "rdrand", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Rdrand { + #[cfg(feature = "std")] + /// Create a new token if the `"rdrand"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("rdrand") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "rdrand")] + /// Create a new token for the "rdrand" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `rdrand` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "rdrand" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs new file mode 100644 index 000000000..26389a358 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `rdseed` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["rdseed"] --- Read random seed +/// +/// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand +/// +/// A token indicating that the current CPU has the `rdseed` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "rdseed")] +/// fn uses_rdseed() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Rdseed { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Rdseed { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""rdseed" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Rdseed { + const FEATURES: &[&str] = &["rdseed"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Rdseed = self] => "rdseed", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Rdseed { + #[cfg(feature = "std")] + /// Create a new token if the `"rdseed"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("rdseed") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "rdseed")] + /// Create a new token for the "rdseed" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `rdseed` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "rdseed" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs new file mode 100644 index 000000000..8c53b0011 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SHA target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SHA] --- Secure Hash Algorithm +/// +/// [SHA]: https://en.wikipedia.org/wiki/Intel_SHA_extensions +/// +/// A token indicating that the current CPU has the `sha` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sha")] +/// fn uses_sha() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sha { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sha { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sha" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sha { + const FEATURES: &[&str] = &["sha", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sha = self] => "sha", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sha { + #[cfg(feature = "std")] + /// Create a new token if the `"sha"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sha") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sha")] + /// Create a new token for the "sha" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SHA is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sha" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sha) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha = value] => "sha", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sha) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha = value] => "sha", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs new file mode 100644 index 000000000..6968d4a45 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SHA512 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SHA512] --- Secure Hash Algorithm with 512-bit digest +/// +/// [SHA512]: https://en.wikipedia.org/wiki/Intel_SHA_extensions +/// +/// A token indicating that the current CPU has the `sha512` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sha512")] +/// fn uses_sha512() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sha512 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sha512 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sha512" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sha512 { + const FEATURES: &[&str] = &[ + "sha512", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sha512 = self] => "sha512", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sha512 { + #[cfg(feature = "std")] + /// Create a new token if the `"sha512"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sha512") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sha512")] + /// Create a new token for the "sha512" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SHA512 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sha512" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sha512) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sha512 = value] => "sha512", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs new file mode 100644 index 000000000..3292d72d2 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SM3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SM3] --- ShangMi 3 Hash Algorithm +/// +/// [SM3]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions +/// +/// A token indicating that the current CPU has the `sm3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sm3")] +/// fn uses_sm3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sm3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sm3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sm3" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sm3 { + const FEATURES: &[&str] = &[ + "sm3", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sm3 = self] => "sm3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sm3 { + #[cfg(feature = "std")] + /// Create a new token if the `"sm3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sm3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sm3")] + /// Create a new token for the "sm3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SM3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sm3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sm3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm3 = value] => "sm3", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs new file mode 100644 index 000000000..81e2db9a4 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -0,0 +1,150 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SM4 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SM4] --- ShangMi 4 Cipher Algorithm +/// +/// [SM4]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_SHA_and_SM3_instructions +/// +/// A token indicating that the current CPU has the `sm4` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sm4")] +/// fn uses_sm4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sm4 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sm4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sm4" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sm4 { + const FEATURES: &[&str] = &[ + "sm4", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sm4 = self] => "sm4", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sm4 { + #[cfg(feature = "std")] + /// Create a new token if the `"sm4"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sm4") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sm4")] + /// Create a new token for the "sm4" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SM4 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sm4" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sm4) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sm4 = value] => "sm4", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs new file mode 100644 index 000000000..fca0a918d --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -0,0 +1,157 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The VAES target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [VAES] --- Vector AES Instructions +/// +/// [VAES]: https://en.wikipedia.org/wiki/AVX-512#VAES +/// +/// A token indicating that the current CPU has the `vaes` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "vaes")] +/// fn uses_vaes() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Vaes { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Vaes { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""vaes" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Vaes { + const FEATURES: &[&str] = &[ + "vaes", "aes", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Vaes = self] => "vaes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Vaes { + #[cfg(feature = "std")] + /// Create a new token if the `"vaes"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("vaes") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "vaes")] + /// Create a new token for the "vaes" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// VAES is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "vaes" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::crypto::Aes { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::crypto::Aes { crate::x86::crypto::Aes::new() }) + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::avx::Avx2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::avx::Avx2 { crate::x86::avx::Avx2::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Vaes) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vaes = value] => "vaes", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs new file mode 100644 index 000000000..d50b93f84 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -0,0 +1,158 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The VPCLMULQDQ target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [VPCLMULQDQ] --- Vector Carry-less multiplication of Quadwords +/// +/// [VPCLMULQDQ]: https://en.wikipedia.org/wiki/AVX-512#VPCLMULQDQ +/// +/// A token indicating that the current CPU has the `vpclmulqdq` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "vpclmulqdq")] +/// fn uses_vpclmulqdq() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Vpclmulqdq { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Vpclmulqdq { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""vpclmulqdq" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Vpclmulqdq { + const FEATURES: &[&str] = &[ + "vpclmulqdq", + "avx", + "pclmulqdq", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Vpclmulqdq = self] => "vpclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Vpclmulqdq { + #[cfg(feature = "std")] + /// Create a new token if the `"vpclmulqdq"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("vpclmulqdq") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "vpclmulqdq")] + /// Create a new token for the "vpclmulqdq" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// VPCLMULQDQ is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "vpclmulqdq" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::crypto::Pclmulqdq { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::crypto::Pclmulqdq { crate::x86::crypto::Pclmulqdq::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Vpclmulqdq) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Vpclmulqdq = value] => "vpclmulqdq", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs new file mode 100644 index 000000000..ff179dff7 --- /dev/null +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -0,0 +1,99 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The WIDE KEYLOCKER target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [KEYLOCKER_WIDE] --- Intel Wide Keylocker Instructions +/// +/// [KEYLOCKER_WIDE]: https://en.wikipedia.org/wiki/List_of_x86_cryptographic_instructions#Intel_Key_Locker_instructions +/// +/// A token indicating that the current CPU has the `widekl` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "widekl")] +/// fn uses_wide_keylocker() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct WideKeylocker { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for WideKeylocker { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""widekl" enabled."#) + } +} + +unsafe impl TargetFeatureToken for WideKeylocker { + const FEATURES: &[&str] = &["widekl", "kl"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([WideKeylocker = self] => "widekl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl WideKeylocker { + #[cfg(feature = "std")] + /// Create a new token if the `"widekl"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("widekl") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "widekl")] + /// Create a new token for the "widekl" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// WIDE KEYLOCKER is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "widekl" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::crypto::Keylocker { + fn from(value: WideKeylocker) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([WideKeylocker = value] => "widekl", fn() -> crate::x86::crypto::Keylocker { crate::x86::crypto::Keylocker::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs new file mode 100644 index 000000000..9afa91dc7 --- /dev/null +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -0,0 +1,3 @@ +// Stable in beta, but not current stable +// mod tbm; +// pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs new file mode 100644 index 000000000..e0d8bb5b9 --- /dev/null +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The TBM target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [TBM] --- Trailing Bit Manipulation +/// +/// [TBM]: https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#TBM_(Trailing_Bit_Manipulation) +/// +/// A token indicating that the current CPU has the `tbm` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "tbm")] +/// fn uses_tbm() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Tbm { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Tbm { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""tbm" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Tbm { + const FEATURES: &[&str] = &["tbm", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Tbm = self] => "tbm", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Tbm { + #[cfg(feature = "std")] + /// Create a new token if the `"tbm"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("tbm") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "tbm")] + /// Create a new token for the "tbm" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// TBM is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "tbm" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 10caa27ad..ea94a80d6 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -12,4 +12,12 @@ //! //! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels +pub mod adx; +pub mod avx; +pub mod avx512; +pub mod crypto; +pub mod discontinued; +pub mod sse; pub mod v1; +pub mod v2; +pub mod v3; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs new file mode 100644 index 000000000..fbc5493f0 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `fxsave + fxrstor` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State +/// +/// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, +/// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, +/// +/// A token indicating that the current CPU has the `fxsr` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr")] +/// fn uses_fxsr() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fxsr { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fxsr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fxsr" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Fxsr { + const FEATURES: &[&str] = &["fxsr"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Fxsr = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fxsr { + #[cfg(feature = "std")] + /// Create a new token if the `"fxsr"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fxsr") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fxsr")] + /// Create a new token for the "fxsr" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `fxsave + fxrstor` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs new file mode 100644 index 000000000..ce9a3aeca --- /dev/null +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -0,0 +1,24 @@ +mod fxsr; +pub use fxsr::Fxsr; + +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod sse; +pub use sse::Sse; + +mod sse2; +pub use sse2::Sse2; + +mod sse3; +pub use sse3::Sse3; + +mod ssse3; +pub use ssse3::SupplementalSse3; + +mod sse4_1; +pub use sse4_1::Sse4_1; + +mod sse4_2; +pub use sse4_2::Sse4_2; diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs new file mode 100644 index 000000000..d3473a332 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE] --- Streaming SIMD Extensions +/// +/// [SSE]: https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions +/// +/// A token indicating that the current CPU has the `sse` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse")] +/// fn uses_sse() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse { + const FEATURES: &[&str] = &["sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse { + #[cfg(feature = "std")] + /// Create a new token if the `"sse"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse")] + /// Create a new token for the "sse" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs new file mode 100644 index 000000000..137bf28c4 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -0,0 +1,99 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE2] --- Streaming SIMD Extensions 2 +/// +/// [SSE2]: https://en.wikipedia.org/wiki/SSE2 +/// +/// A token indicating that the current CPU has the `sse2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse2")] +/// fn uses_sse2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse2 { + const FEATURES: &[&str] = &["sse2", "sse"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse2 = self] => "sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse2 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse2")] + /// Create a new token for the "sse2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse2 = value] => "sse2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs new file mode 100644 index 000000000..8c497ec37 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -0,0 +1,106 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE3] --- Streaming SIMD Extensions 3 +/// +/// [SSE3]: https://en.wikipedia.org/wiki/SSE3 +/// +/// A token indicating that the current CPU has the `sse3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse3")] +/// fn uses_sse3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse3" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse3 { + const FEATURES: &[&str] = &["sse3", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse3 = self] => "sse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse3 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse3")] + /// Create a new token for the "sse3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse3 = value] => "sse3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse3 = value] => "sse3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs new file mode 100644 index 000000000..02ec84f6a --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -0,0 +1,120 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE4.1 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4.1] --- Streaming SIMD Extensions 4.1 +/// +/// [SSE4.1]: https://en.wikipedia.org/wiki/SSE4#SSE4.1 +/// +/// A token indicating that the current CPU has the `sse4.1` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4.1")] +/// fn uses_sse4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4_1 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4_1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4.1" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse4_1 { + const FEATURES: &[&str] = &["sse4.1", "sse", "sse2", "sse3", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse4_1 = self] => "sse4.1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4_1 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4.1"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4.1") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4.1")] + /// Create a new token for the "sse4.1" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4.1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4.1" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sse4_1) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_1 = value] => "sse4.1", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs new file mode 100644 index 000000000..cf18d18eb --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -0,0 +1,127 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE4.2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4.2] --- StreamingSIMDExtensions 4.2 +/// +/// [SSE4.2]: https://en.wikipedia.org/wiki/SSE4#SSE4.2 +/// +/// A token indicating that the current CPU has the `sse4.2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4.2")] +/// fn uses_sse4() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4_2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4_2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4.2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Sse4_2 { + const FEATURES: &[&str] = &["sse4.2", "sse", "sse2", "sse3", "sse4.1", "ssse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Sse4_2 = self] => "sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4_2 { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4.2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4.2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4.2")] + /// Create a new token for the "sse4.2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4.2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4.2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Sse4_2) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4_2 = value] => "sse4.2", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs new file mode 100644 index 000000000..5ba3e9b54 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -0,0 +1,113 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSSE3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 +/// +/// [SSSE3]: https://en.wikipedia.org/wiki/SSSE3 +/// +/// A token indicating that the current CPU has the `ssse3` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "ssse3")] +/// fn uses_ssse3() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct SupplementalSse3 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for SupplementalSse3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""ssse3" enabled."#) + } +} + +unsafe impl TargetFeatureToken for SupplementalSse3 { + const FEATURES: &[&str] = &["ssse3", "sse", "sse2", "sse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([SupplementalSse3 = self] => "ssse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl SupplementalSse3 { + #[cfg(feature = "std")] + /// Create a new token if the `"ssse3"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("ssse3") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "ssse3")] + /// Create a new token for the "ssse3" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSSE3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "ssse3" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: SupplementalSse3) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([SupplementalSse3 = value] => "ssse3", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 0e360b506..140fdd549 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -2,14 +2,11 @@ //! //! This is the baseline for x86-64 support. -mod sse; -pub use sse::Sse; - -mod fxsr; -pub use fxsr::Fxsr; +pub use crate::x86::sse::Fxsr; +pub use crate::x86::sse::Sse; /// A token that the current CPU is on the x86-64-v1 microarchitecture level. -// TODO: (This is currently incomplete) +// TODO: (This is currently incomplete) pub struct V1 { pub sse: Sse, pub fxsr: Fxsr, diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs new file mode 100644 index 000000000..b831e3492 --- /dev/null +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `cmpxchg16b` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically +/// +/// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b +/// +/// A token indicating that the current CPU has the `cmpxchg16b` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "cmpxchg16b")] +/// fn uses_cmpxchg16b() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Cmpxchg16b { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Cmpxchg16b { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""cmpxchg16b" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Cmpxchg16b { + const FEATURES: &[&str] = &["cmpxchg16b"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Cmpxchg16b = self] => "cmpxchg16b", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Cmpxchg16b { + #[cfg(feature = "std")] + /// Create a new token if the `"cmpxchg16b"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("cmpxchg16b") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "cmpxchg16b")] + /// Create a new token for the "cmpxchg16b" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `cmpxchg16b` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "cmpxchg16b" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs new file mode 100644 index 000000000..197738fea --- /dev/null +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -0,0 +1,5 @@ +mod cmpxchg16b; +pub use cmpxchg16b::Cmpxchg16b; + +mod popcnt; +pub use popcnt::Popcnt; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs new file mode 100644 index 000000000..5e78adcef --- /dev/null +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `popcnt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["popcnt"] --- Count of bits set to 1 +/// +/// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt +/// +/// A token indicating that the current CPU has the `popcnt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "popcnt")] +/// fn uses_popcnt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Popcnt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Popcnt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""popcnt" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Popcnt { + const FEATURES: &[&str] = &["popcnt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Popcnt = self] => "popcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Popcnt { + #[cfg(feature = "std")] + /// Create a new token if the `"popcnt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("popcnt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "popcnt")] + /// Create a new token for the "popcnt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `popcnt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "popcnt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs new file mode 100644 index 000000000..a8cf99d76 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The 1 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [BMI1] --- Bit Manipulation Instruction Sets +/// +/// [BMI1]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets +/// +/// A token indicating that the current CPU has the `bmi1` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "bmi1")] +/// fn uses_bmi1() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Bmi1 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Bmi1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""bmi1" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Bmi1 { + const FEATURES: &[&str] = &["bmi1"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Bmi1 = self] => "bmi1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Bmi1 { + #[cfg(feature = "std")] + /// Create a new token if the `"bmi1"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("bmi1") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "bmi1")] + /// Create a new token for the "bmi1" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// 1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "bmi1" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs new file mode 100644 index 000000000..82502ff8d --- /dev/null +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The BMI2 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [BMI2] --- Bit Manipulation Instruction Sets 2 +/// +/// [BMI2]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2 +/// +/// A token indicating that the current CPU has the `bmi2` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "bmi2")] +/// fn uses_bmi2() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Bmi2 { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Bmi2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""bmi2" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Bmi2 { + const FEATURES: &[&str] = &["bmi2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Bmi2 = self] => "bmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Bmi2 { + #[cfg(feature = "std")] + /// Create a new token if the `"bmi2"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("bmi2") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "bmi2")] + /// Create a new token for the "bmi2" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// BMI2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "bmi2" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs new file mode 100644 index 000000000..a1bc72683 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The F16C target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [F16C] --- 16-bit floating point conversion instructions +/// +/// [F16C]: https://en.wikipedia.org/wiki/F16C +/// +/// A token indicating that the current CPU has the `f16c` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "f16c")] +/// fn uses_f16c() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct F16c { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for F16c { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""f16c" enabled."#) + } +} + +unsafe impl TargetFeatureToken for F16c { + const FEATURES: &[&str] = &[ + "f16c", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([F16c = self] => "f16c", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl F16c { + #[cfg(feature = "std")] + /// Create a new token if the `"f16c"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("f16c") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "f16c")] + /// Create a new token for the "f16c" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// F16C is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "f16c" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: F16c) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([F16c = value] => "f16c", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs new file mode 100644 index 000000000..dc232d73e --- /dev/null +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -0,0 +1,143 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The FMA3 target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [FMA3] --- Three-operand fused multiply-add +/// +/// [FMA3]: https://en.wikipedia.org/wiki/FMA_instruction_set +/// +/// A token indicating that the current CPU has the `fma` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fma")] +/// fn uses_fma() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Fma { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Fma { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""fma" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Fma { + const FEATURES: &[&str] = &[ + "fma", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Fma = self] => "fma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Fma { + #[cfg(feature = "std")] + /// Create a new token if the `"fma"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("fma") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "fma")] + /// Create a new token for the "fma" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// FMA3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fma" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::avx::Avx { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::avx::Avx { crate::x86::avx::Avx::new() }) + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +impl From for crate::x86::sse::Sse4_1 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse4_1 { crate::x86::sse::Sse4_1::new() }) + } +} + +impl From for crate::x86::sse::Sse4_2 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::Sse4_2 { crate::x86::sse::Sse4_2::new() }) + } +} + +impl From for crate::x86::sse::SupplementalSse3 { + fn from(value: Fma) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Fma = value] => "fma", fn() -> crate::x86::sse::SupplementalSse3 { crate::x86::sse::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs new file mode 100644 index 000000000..01ce658db --- /dev/null +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `lzcnt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["lzcnt"] --- Leading zeros count +/// +/// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt +/// +/// A token indicating that the current CPU has the `lzcnt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "lzcnt")] +/// fn uses_lzcnt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Lzcnt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Lzcnt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""lzcnt" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Lzcnt { + const FEATURES: &[&str] = &["lzcnt"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Lzcnt = self] => "lzcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Lzcnt { + #[cfg(feature = "std")] + /// Create a new token if the `"lzcnt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("lzcnt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "lzcnt")] + /// Create a new token for the "lzcnt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `lzcnt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "lzcnt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs new file mode 100644 index 000000000..16d577e30 --- /dev/null +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -0,0 +1,17 @@ +mod bmi1; +pub use bmi1::Bmi1; + +mod bmi2; +pub use bmi2::Bmi2; + +mod f16c; +pub use f16c::F16c; + +mod fma; +pub use fma::Fma; + +mod lzcnt; +pub use lzcnt::Lzcnt; + +mod movbe; +pub use movbe::Movbe; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs new file mode 100644 index 000000000..6e542dc1e --- /dev/null +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -0,0 +1,92 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `movbe` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["movbe"] --- Move data after swapping bytes +/// +/// ["movbe"]: https://www.felixcloutier.com/x86/movbe +/// +/// A token indicating that the current CPU has the `movbe` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "movbe")] +/// fn uses_movbe() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Movbe { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Movbe { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""movbe" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Movbe { + const FEATURES: &[&str] = &["movbe"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Movbe = self] => "movbe", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Movbe { + #[cfg(feature = "std")] + /// Create a new token if the `"movbe"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("movbe") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "movbe")] + /// Create a new token for the "movbe" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `movbe` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "movbe" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs new file mode 100644 index 000000000..9d2228673 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsave` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [`xsave`] --- Save processor extended states +/// +/// ["xsave"]: https://www.felixcloutier.com/x86/xsave +/// +/// A token indicating that the current CPU has the `xsave` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsave")] +/// fn uses_xsave() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsave { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsave { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsave" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsave { + const FEATURES: &[&str] = &["xsave", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsave = self] => "xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsave { + #[cfg(feature = "std")] + /// Create a new token if the `"xsave"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsave") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsave")] + /// Create a new token for the "xsave" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsave` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsave" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs new file mode 100644 index 000000000..6d11df470 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsavec` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["xsavec"] --- Save processor extended states with compaction +/// +/// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec +/// +/// A token indicating that the current CPU has the `xsavec` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsavec")] +/// fn uses_xsavec() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsavec { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsavec { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsavec" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsavec { + const FEATURES: &[&str] = &["xsavec", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsavec = self] => "xsavec", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsavec { + #[cfg(feature = "std")] + /// Create a new token if the `"xsavec"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsavec") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsavec")] + /// Create a new token for the "xsavec" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsavec` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsavec" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs new file mode 100644 index 000000000..b63d444be --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsaveopt` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["xsaveopt"] --- Save processor extended states optimized +/// +/// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt +/// +/// A token indicating that the current CPU has the `xsaveopt` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsaveopt")] +/// fn uses_xsaveopt() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsaveopt { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsaveopt { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsaveopt" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsaveopt { + const FEATURES: &[&str] = &["xsaveopt", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsaveopt = self] => "xsaveopt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsaveopt { + #[cfg(feature = "std")] + /// Create a new token if the `"xsaveopt"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsaveopt") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsaveopt")] + /// Create a new token for the "xsaveopt" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsaveopt` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsaveopt" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs new file mode 100644 index 000000000..60633befd --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -0,0 +1,93 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The `xsaves` target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// ["xsaves"] --- Save processor extended states supervisor +/// +/// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves +/// +/// A token indicating that the current CPU has the `xsaves` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "xsaves")] +/// fn uses_xsaves() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Xsaves { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Xsaves { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""xsaves" enabled."#) + } +} + +unsafe impl TargetFeatureToken for Xsaves { + const FEATURES: &[&str] = &["xsaves", ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we want this constant to be eagerly evaluated. + trampoline!([Xsaves = self] => "xsaves", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Xsaves { + #[cfg(feature = "std")] + /// Create a new token if the `"xsaves"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("xsaves") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "xsaves")] + /// Create a new token for the "xsaves" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// `xsaves` is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "xsaves" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From 75153cdad0063bd5e9b9cd60a2cbcb10a519784e Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:51:26 +0100 Subject: [PATCH 06/19] Fixup some docs --- fearless_simd_core/src/lib.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index e212b2406..75bc9acb2 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -35,7 +35,7 @@ extern crate std; /// To construct a value of a type implementing this trait, you must have proven that each /// target feature in `FEATURES` is available. pub unsafe trait TargetFeatureToken: Copy { - /// The set of target features which are enabled for this run, if + /// The set of target features which the current CPU has, if /// you have a value of this type. const FEATURES: &[&str]; @@ -78,7 +78,7 @@ pub unsafe trait TargetFeatureToken: Copy { /// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: /// /// ```rust,ignore -/// trampoline!([Self = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) +/// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) /// ``` /// /// There is also support for where clauses after the return type. @@ -178,12 +178,15 @@ macro_rules! trampoline { // We validate that we actually have a token of each claimed type. let _: $token_type = $token; )+ - const { + // We use a const item rather than a const block to ensure that. + // This does mean that you can no longer use tokens "generically", but it's hard to think of + // cases where that would be usable anyway. + const _: () = { // And that the claimed types justify enabling the enabled target features. $crate::trampoline::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) // TODO: Better failure message here (i.e. at least concatting the set of requested features) .unwrap(); - } + }; $( // Soundness: We use `arg_value` outside of the macro body to ensure it doesn't From 119e9651e375fef9ee4d459af4b79294e3334bcc Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Wed, 15 Oct 2025 12:23:23 +0100 Subject: [PATCH 07/19] Save some generator changes, including adding x86_v{1,2,3,4} --- fearless_simd_core/gen/src/data.rs | 4 +- fearless_simd_core/gen/src/data/x86.rs | 28 ++- fearless_simd_core/gen/src/main.rs | 193 +++++++++++++++++- fearless_simd_core/gen/templates/x86.rs | 5 +- fearless_simd_core/gen/templates/x86_level.rs | 91 +++++++++ 5 files changed, 313 insertions(+), 8 deletions(-) create mode 100644 fearless_simd_core/gen/templates/x86_level.rs diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs index 87b9ec467..fe46d862b 100644 --- a/fearless_simd_core/gen/src/data.rs +++ b/fearless_simd_core/gen/src/data.rs @@ -1,2 +1,4 @@ mod x86; -pub(crate) use x86::{X86_FEATURES, X86_TEMPLATE}; +pub(crate) use x86::{ + X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4, +}; diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 52c13c4df..e8c5f85c1 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -422,5 +422,29 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ ), ]; -#[test] -fn all_features_included() {} +// All taken from + +pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_level.rs"); + +/// The target features required in the x86-64-v1 level. +// Rust doesn't have target features for "cmov", "cmpxchg8b", "fpu", "sce", and "mmx". +// The first four are all assumed, and the final is not implemented because +// it's practically impossible to use correctly (and there's no reason to). +pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"]; +/// The target features required in the x86-64-v1 level, in addition to those already in [`V1`]. +pub(crate) const X86_V2: &[&str] = &[ + "sse3", + "ssse3", + "sse4.1", + "sse4.2", + "popcnt", + "cmpxchg16b", + // The lahfahf target feature is currently in Rust beta. + // "lahfsahf", +]; +/// The target features required in the x86-64-v3 level, excluding those already in [`V2`]. +pub(crate) const X86_V3: &[&str] = &[ + "avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave", +]; +/// The target features required in the x86-64-v4 level, excluding those already in [`V3`]. +pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"]; diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index aec7057f4..fe075fdfe 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -1,7 +1,9 @@ mod data; +use std::collections::HashSet; use std::fmt::Write; use std::fs; +use std::hash::RandomState; use std::{ cell::RefCell, collections::HashMap, @@ -10,21 +12,34 @@ use std::{ path::{Path, PathBuf}, }; +use crate::data::X86_LEVEL_TEMPLATE; + fn main() { let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let src_dir = manifest_dir.ancestors().nth(1).unwrap().join("src"); - generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, data::X86_FEATURES).unwrap(); + { + let x86_features = normalize_features(data::X86_FEATURES); + generate_for_arch(&src_dir, "x86", data::X86_TEMPLATE, &x86_features).unwrap(); + let mut features: Vec<&'static str> = Vec::new(); + features.extend(data::X86_V1); + generate_x86_level(&src_dir, "v1", &x86_features, &features).unwrap(); + features.extend(data::X86_V2); + generate_x86_level(&src_dir, "v2", &x86_features, &features).unwrap(); + features.extend(data::X86_V3); + generate_x86_level(&src_dir, "v3", &x86_features, &features).unwrap(); + features.extend(data::X86_V4); + generate_x86_level(&src_dir, "v4", &x86_features, &features).unwrap(); + } } fn generate_for_arch( root_dir: &Path, arch_module_name: &str, template: &str, - features: &'static [Feature], + features: &[NormalizedFeature], ) -> io::Result<()> { let arch_dir = root_dir.join(arch_module_name); - let features = normalize_features(features); - for feature in &features { + for feature in features { let mut new_docs = String::new(); for line in feature.feature.extra_docs.lines() { writeln!(&mut new_docs, "///{line}").unwrap(); @@ -79,6 +94,7 @@ impl From for {type_path} {{ r#""{ENABLED_FEATURES_STR_LIST}""#, &enabled_feature_str_list, ); + let module_dir = arch_dir.join(feature.feature.module); create_dir_all(&module_dir)?; let mut file = module_dir.join(feature.feature.feature_name.replace(".", "_")); @@ -88,6 +104,175 @@ impl From for {type_path} {{ Ok(()) } +/// Generate the code for an X86 microarchitecture level. +fn generate_x86_level( + root_dir: &Path, + level: &'static str, + all_features: &[NormalizedFeature], + required_features: &[&'static str], +) -> io::Result<()> { + // Precalculate the sets of features we need to support. + // Intermediate value for + let mut superset = HashSet::new(); + for feature in required_features { + superset.insert(*feature); + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + superset.extend(&normalized.children); + } + + // Every single target feature supported on this level, including those implied. + // (In all likelihood, this is the same as `required_features`, but I'd rather validate that manually) + let mut superset = superset.into_iter().collect::>(); + superset.sort(); + let mut lcd = HashSet::<_, RandomState>::from_iter(superset.iter().copied()); + // We make the assumption that features are a tree, that is, there's no case where `A->B` and `B->A`. + // However, even if that didn't hold, we at least use a consistent ordering here. + // We test from the superset to be safe; this should be equivalent to using `required_features`, though. + for feature in &superset { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + for feature in &normalized.children { + // If the feature is a child of another required feature, we know we don't need it for this version. + // We don't care whether or not it was actually removed. + lcd.remove(*feature); + } + } + // The set of features which are strictly required. + // This is used to create the target feature string, so that it can be as short as possible. + let mut lcd = lcd.into_iter().collect::>(); + lcd.sort(); + // Now that we have lcd and superset, we can preprocess what we need for the actual file. + + let level_struct_name = level.to_uppercase(); + // The target_feature(enable = "...") string. + let lcd_contents = lcd.join(", "); + // The fields of the new struct. + let lcd_field_definitions = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + format!("{feature}: {type_path},\n") + }) + .collect::(); + // The enabled FEATURES. + let superset_list = superset + .iter() + .map(|it| format!(r#""{it}""#)) + .collect::>() + .join(", "); + // First argument to `trampoline!` + let lcd_trampoline = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + format!("{type_path} = self.{feature}") + }) + .collect::>() + .join(","); + // The version of the struct initializer in `try_new`. + let struct_initializer_try_new = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + // We rely on rustfmt to get the tab spacing right. + format!("\t{feature}: {type_path}::try_new()?,\n") + }) + .collect::(); + // The version of the struct initializer in `new`. + let struct_initializer_new = lcd + .iter() + .map(|feature| { + let normalized = all_features + .iter() + .find(|it| it.feature.feature_name == *feature) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + normalized.feature.module, normalized.feature.struct_name + ); + format!("\t{feature}: {type_path}::new(),\n") + }) + .collect::(); + + let mut from_impls = String::new(); + for child in &superset { + let from_feature = all_features + .iter() + .find(|it| it.feature.feature_name == *child) + .unwrap(); + let type_path = format!( + "crate::x86::{}::{}", + from_feature.feature.module, from_feature.feature.struct_name + ); + write!( + from_impls, + "\n\ +impl From for {type_path} {{ + fn from(value: LEVEL_STRUCT_NAME) -> Self {{ + // This serves as a correctness check of the implicitly enabled features. + trampoline!([LEVEL_STRUCT_NAME = value] => \"{{LEVEL_FEATURE_LCD_CONTENTS}}\", fn() -> {type_path} {{ {type_path}::new() }}) + }} +}}\n" + ).unwrap(); + } + + let mut result = format!( + "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ + {X86_LEVEL_TEMPLATE}" + ); + // We replace the from impls first, as they use template variables from the rest of this. + result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace( + "/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/", + &lcd_field_definitions, + ); + result = result.replace(r#""{LEVEL_FEATURE_SUPERSET_LIST}""#, &superset_list); + result = result.replace("{LEVEL_FEATURE_LCD_TRAMPOLINE}", &lcd_trampoline); + + result = result.replace( + "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/", + &struct_initializer_try_new, + ); + result = result.replace( + "/*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/", + &struct_initializer_new, + ); + + let arch_dir = root_dir.join("x86"); + let module_dir = arch_dir.join(level); + create_dir_all(&module_dir)?; + let output_path = module_dir.join("level.rs"); + fs::write(output_path, result)?; + Ok(()) +} + #[derive(Debug)] struct Feature { /// The name of the struct to be generated. diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 7742b8e98..4c170fc8d 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -32,12 +32,15 @@ impl Debug for FEATURE_STRUCT_NAME { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for FEATURE_STRUCT_NAME { const FEATURES: &[&str] = &["{ENABLED_FEATURES_STR_LIST}"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([FEATURE_STRUCT_NAME = self] => "{FEATURE_ID}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs new file mode 100644 index 000000000..be72bf89d --- /dev/null +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -0,0 +1,91 @@ +//! The x86-64-{LEVEL_ID} microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-{LEVEL_ID} microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")] +/// fn uses_x86_64_{LEVEL_ID}() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct LEVEL_STRUCT_NAME { + /*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/ + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for LEVEL_STRUCT_NAME { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-{LEVEL_ID} enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for LEVEL_STRUCT_NAME { + const FEATURES: &[&str] = &["{LEVEL_FEATURE_SUPERSET_LIST}"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([{LEVEL_FEATURE_LCD_TRAMPOLINE}] => "{LEVEL_FEATURE_LCD_CONTENTS}", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl LEVEL_STRUCT_NAME { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-{LEVEL_ID} target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_TRY_NEW}*/ + }) + } + + #[target_feature(enable = "{LEVEL_FEATURE_LCD_CONTENTS}")] + /// Create a new token for the x86-64-{LEVEL_ID} microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-{LEVEL_ID} is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "{LEVEL_FEATURE_LCD_CONTENTS}" target feature is available. + pub fn new() -> Self { + Self { + /*{LEVEL_FEATURE_STRUCT_INITIALIZER_LCD_NEW}*/ + } + } +} +/*{FROM_IMPLS}*/ + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From b184befc9d8361fea2785bd2cc046714382befcc Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Wed, 15 Oct 2025 12:49:28 +0100 Subject: [PATCH 08/19] Fixup the generator and mostly finalize levels --- fearless_simd_core/gen/src/main.rs | 42 +-- fearless_simd_core/gen/templates/x86_level.rs | 2 + fearless_simd_core/src/x86/adx/adx.rs | 5 +- fearless_simd_core/src/x86/avx/avx.rs | 5 +- fearless_simd_core/src/x86/avx/avx2.rs | 5 +- fearless_simd_core/src/x86/avx/avxifma.rs | 5 +- .../src/x86/avx/avxneconvert.rs | 5 +- fearless_simd_core/src/x86/avx/avxvnni.rs | 5 +- .../src/x86/avx/avxvnniint16.rs | 5 +- fearless_simd_core/src/x86/avx/avxvnniint8.rs | 5 +- .../src/x86/avx512/avx512bf16.rs | 5 +- .../src/x86/avx512/avx512bitalg.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512bw.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512cd.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512dq.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512f.rs | 5 +- .../src/x86/avx512/avx512fp16.rs | 5 +- .../src/x86/avx512/avx512ifma.rs | 5 +- .../src/x86/avx512/avx512vbmi.rs | 5 +- .../src/x86/avx512/avx512vbmi2.rs | 5 +- fearless_simd_core/src/x86/avx512/avx512vl.rs | 5 +- .../src/x86/avx512/avx512vnni.rs | 5 +- .../src/x86/avx512/avx512vp2intersect.rs | 5 +- .../src/x86/avx512/avx512vpopcntdq.rs | 5 +- fearless_simd_core/src/x86/crypto/aes.rs | 5 +- fearless_simd_core/src/x86/crypto/gfni.rs | 5 +- fearless_simd_core/src/x86/crypto/kl.rs | 5 +- .../src/x86/crypto/pclmulqdq.rs | 5 +- fearless_simd_core/src/x86/crypto/rdrand.rs | 5 +- fearless_simd_core/src/x86/crypto/rdseed.rs | 5 +- fearless_simd_core/src/x86/crypto/sha.rs | 5 +- fearless_simd_core/src/x86/crypto/sha512.rs | 5 +- fearless_simd_core/src/x86/crypto/sm3.rs | 5 +- fearless_simd_core/src/x86/crypto/sm4.rs | 5 +- fearless_simd_core/src/x86/crypto/vaes.rs | 5 +- .../src/x86/crypto/vpclmulqdq.rs | 5 +- fearless_simd_core/src/x86/crypto/widekl.rs | 5 +- .../src/x86/discontinued/tbm.rs | 5 +- fearless_simd_core/src/x86/mod.rs | 3 + fearless_simd_core/src/x86/sse/fxsr.rs | 5 +- fearless_simd_core/src/x86/sse/sse.rs | 5 +- fearless_simd_core/src/x86/sse/sse2.rs | 5 +- fearless_simd_core/src/x86/sse/sse3.rs | 5 +- fearless_simd_core/src/x86/sse/sse4_1.rs | 5 +- fearless_simd_core/src/x86/sse/sse4_2.rs | 5 +- fearless_simd_core/src/x86/sse/ssse3.rs | 5 +- fearless_simd_core/src/x86/v1/level.rs | 119 +++++++ fearless_simd_core/src/x86/v1/mod.rs | 31 +- fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 5 +- fearless_simd_core/src/x86/v2/level.rs | 179 ++++++++++ fearless_simd_core/src/x86/v2/mod.rs | 12 + fearless_simd_core/src/x86/v2/popcnt.rs | 5 +- fearless_simd_core/src/x86/v3/bmi1.rs | 5 +- fearless_simd_core/src/x86/v3/bmi2.rs | 5 +- fearless_simd_core/src/x86/v3/f16c.rs | 5 +- fearless_simd_core/src/x86/v3/fma.rs | 5 +- fearless_simd_core/src/x86/v3/level.rs | 279 +++++++++++++++ fearless_simd_core/src/x86/v3/lzcnt.rs | 5 +- fearless_simd_core/src/x86/v3/mod.rs | 17 + fearless_simd_core/src/x86/v3/movbe.rs | 5 +- fearless_simd_core/src/x86/v4/level.rs | 325 ++++++++++++++++++ fearless_simd_core/src/x86/v4/mod.rs | 27 ++ fearless_simd_core/src/x86/xsave/mod.rs | 15 + fearless_simd_core/src/x86/xsave/xsave.rs | 5 +- fearless_simd_core/src/x86/xsave/xsavec.rs | 5 +- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 5 +- fearless_simd_core/src/x86/xsave/xsaves.rs | 5 +- 67 files changed, 1220 insertions(+), 106 deletions(-) create mode 100644 fearless_simd_core/src/x86/v1/level.rs create mode 100644 fearless_simd_core/src/x86/v2/level.rs create mode 100644 fearless_simd_core/src/x86/v3/level.rs create mode 100644 fearless_simd_core/src/x86/v4/level.rs create mode 100644 fearless_simd_core/src/x86/v4/mod.rs create mode 100644 fearless_simd_core/src/x86/xsave/mod.rs diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index fe075fdfe..c3ef8b9e2 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -150,7 +150,7 @@ fn generate_x86_level( let level_struct_name = level.to_uppercase(); // The target_feature(enable = "...") string. - let lcd_contents = lcd.join(", "); + let lcd_contents = lcd.join(","); // The fields of the new struct. let lcd_field_definitions = lcd .iter() @@ -159,11 +159,13 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); - format!("{feature}: {type_path},\n") + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); + format!( + "/// The contained proof that {} is available.\n\ + pub {feature}: {type_path},\n", + normalized.feature.feature_docs_name + ) }) .collect::(); // The enabled FEATURES. @@ -180,14 +182,12 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); format!("{type_path} = self.{feature}") }) .collect::>() - .join(","); + .join(", "); // The version of the struct initializer in `try_new`. let struct_initializer_try_new = lcd .iter() @@ -196,10 +196,8 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); // We rely on rustfmt to get the tab spacing right. format!("\t{feature}: {type_path}::try_new()?,\n") }) @@ -212,10 +210,8 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *feature) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - normalized.feature.module, normalized.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", normalized.feature.struct_name); + let feature = feature.replace(".", "_"); format!("\t{feature}: {type_path}::new(),\n") }) .collect::(); @@ -226,10 +222,7 @@ fn generate_x86_level( .iter() .find(|it| it.feature.feature_name == *child) .unwrap(); - let type_path = format!( - "crate::x86::{}::{}", - from_feature.feature.module, from_feature.feature.struct_name - ); + let type_path = format!("crate::x86::{level}::{}", from_feature.feature.struct_name); write!( from_impls, "\n\ @@ -249,6 +242,9 @@ impl From for {type_path} {{ ); // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("LEVEL_STRUCT_NAME", &level_struct_name); + result = result.replace("{LEVEL_ID}", level); + result = result.replace("{LEVEL_FEATURE_LCD_CONTENTS}", &lcd_contents); result = result.replace( "/*{LEVEL_FEATURE_LCD_FIELD_DEFINITIONS}*/", &lcd_field_definitions, diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs index be72bf89d..c02124118 100644 --- a/fearless_simd_core/gen/templates/x86_level.rs +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -81,6 +81,8 @@ impl LEVEL_STRUCT_NAME { } } } +// TODO: From impls to convert into lower x86 versions. + /*{FROM_IMPLS}*/ const _: () = { diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs index b10b63790..10e7b599e 100644 --- a/fearless_simd_core/src/x86/adx/adx.rs +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -37,12 +37,15 @@ impl Debug for Adx { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Adx { const FEATURES: &[&str] = &["adx"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Adx = self] => "adx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs index baa56e369..368047220 100644 --- a/fearless_simd_core/src/x86/avx/avx.rs +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -37,12 +37,15 @@ impl Debug for Avx { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx { const FEATURES: &[&str] = &["avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx = self] => "avx", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs index b6f252bf5..d3fec9f9c 100644 --- a/fearless_simd_core/src/x86/avx/avx2.rs +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -37,6 +37,8 @@ impl Debug for Avx2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx2 { const FEATURES: &[&str] = &[ "avx2", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avx2 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx2 = self] => "avx2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs index 12fda758a..c6e1964da 100644 --- a/fearless_simd_core/src/x86/avx/avxifma.rs +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -37,6 +37,8 @@ impl Debug for Avxifma { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxifma { const FEATURES: &[&str] = &[ "avxifma", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avxifma { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxifma = self] => "avxifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs index a2adef3c8..9f94fc893 100644 --- a/fearless_simd_core/src/x86/avx/avxneconvert.rs +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -37,6 +37,8 @@ impl Debug for Avxneconvert { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxneconvert { const FEATURES: &[&str] = &[ "avxneconvert", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Avxneconvert { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxneconvert = self] => "avxneconvert", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs index 48148a0c0..5e10181ea 100644 --- a/fearless_simd_core/src/x86/avx/avxvnni.rs +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -37,6 +37,8 @@ impl Debug for Avxvnni { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxvnni { const FEATURES: &[&str] = &[ "avxvnni", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avxvnni { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxvnni = self] => "avxvnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs index 36b16a412..dab234603 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint16.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -37,6 +37,8 @@ impl Debug for Avxvnniint16 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxvnniint16 { const FEATURES: &[&str] = &[ "avxvnniint16", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Avxvnniint16 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxvnniint16 = self] => "avxvnniint16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs index 2a0eaf4a4..ff8876602 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint8.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -37,6 +37,8 @@ impl Debug for Avxvnniint8 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avxvnniint8 { const FEATURES: &[&str] = &[ "avxvnniint8", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Avxvnniint8 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avxvnniint8 = self] => "avxvnniint8", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs index 523969729..705252a5a 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bf16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -37,6 +37,8 @@ impl Debug for Avx512bf16 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512bf16 { const FEATURES: &[&str] = &[ "avx512bf16", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512bf16 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512bf16 = self] => "avx512bf16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs index 66bb543ad..578fd8837 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -38,6 +38,8 @@ impl Debug for Avx512bitalg { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512bitalg { const FEATURES: &[&str] = &[ "avx512bitalg", @@ -57,7 +59,8 @@ unsafe impl TargetFeatureToken for Avx512bitalg { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512bitalg = self] => "avx512bitalg", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs index 7213b3da7..29b058298 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bw.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -37,6 +37,8 @@ impl Debug for Avx512bw { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512bw { const FEATURES: &[&str] = &[ "avx512bw", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512bw { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512bw = self] => "avx512bw", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs index b3b9c8c36..e7ed2389b 100644 --- a/fearless_simd_core/src/x86/avx512/avx512cd.rs +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -37,6 +37,8 @@ impl Debug for Avx512cd { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512cd { const FEATURES: &[&str] = &[ "avx512cd", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512cd { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512cd = self] => "avx512cd", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs index fb6d36703..92b8c87b8 100644 --- a/fearless_simd_core/src/x86/avx512/avx512dq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -37,6 +37,8 @@ impl Debug for Avx512dq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512dq { const FEATURES: &[&str] = &[ "avx512dq", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512dq { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512dq = self] => "avx512dq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs index fa6adb779..bad51a083 100644 --- a/fearless_simd_core/src/x86/avx512/avx512f.rs +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -37,6 +37,8 @@ impl Debug for Avx512f { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512f { const FEATURES: &[&str] = &[ "avx512f", "avx", "avx2", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Avx512f { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512f = self] => "avx512f", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs index f3ed60899..b3ba12d53 100644 --- a/fearless_simd_core/src/x86/avx512/avx512fp16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -37,6 +37,8 @@ impl Debug for Avx512fp16 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512fp16 { const FEATURES: &[&str] = &[ "avx512fp16", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512fp16 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512fp16 = self] => "avx512fp16", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs index b7ab646c4..330e16a8e 100644 --- a/fearless_simd_core/src/x86/avx512/avx512ifma.rs +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -37,6 +37,8 @@ impl Debug for Avx512ifma { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512ifma { const FEATURES: &[&str] = &[ "avx512ifma", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512ifma { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512ifma = self] => "avx512ifma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs index 9d0ad4dac..2811eb146 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vbmi { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vbmi { const FEATURES: &[&str] = &[ "avx512vbmi", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512vbmi { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vbmi = self] => "avx512vbmi", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs index cfff6b25e..aa209c2d2 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vbmi2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vbmi2 { const FEATURES: &[&str] = &[ "avx512vbmi2", @@ -56,7 +58,8 @@ unsafe impl TargetFeatureToken for Avx512vbmi2 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vbmi2 = self] => "avx512vbmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs index ddfd7a1c2..4089a4df4 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vl.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vl { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vl { const FEATURES: &[&str] = &[ "avx512vl", "avx", "avx2", "avx512f", "f16c", "fma", "sse", "sse2", "sse3", "sse4.1", @@ -45,7 +47,8 @@ unsafe impl TargetFeatureToken for Avx512vl { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vl = self] => "avx512vl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs index 528282d97..9703f9b64 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vnni.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vnni { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vnni { const FEATURES: &[&str] = &[ "avx512vnni", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512vnni { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vnni = self] => "avx512vnni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs index 73344f75e..6a3bdd177 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vp2intersect { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vp2intersect { const FEATURES: &[&str] = &[ "avx512vp2intersect", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512vp2intersect { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vp2intersect = self] => "avx512vp2intersect", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs index 7f96f8a7d..ada57947c 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -37,6 +37,8 @@ impl Debug for Avx512vpopcntdq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Avx512vpopcntdq { const FEATURES: &[&str] = &[ "avx512vpopcntdq", @@ -55,7 +57,8 @@ unsafe impl TargetFeatureToken for Avx512vpopcntdq { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Avx512vpopcntdq = self] => "avx512vpopcntdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs index af937bef2..afe9a2eed 100644 --- a/fearless_simd_core/src/x86/crypto/aes.rs +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -37,12 +37,15 @@ impl Debug for Aes { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Aes { const FEATURES: &[&str] = &["aes", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Aes = self] => "aes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs index 63c73e81e..3e00a9238 100644 --- a/fearless_simd_core/src/x86/crypto/gfni.rs +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -37,12 +37,15 @@ impl Debug for Gfni { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Gfni { const FEATURES: &[&str] = &["gfni", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Gfni = self] => "gfni", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs index 5e5d3d4fa..722eb6db4 100644 --- a/fearless_simd_core/src/x86/crypto/kl.rs +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -37,12 +37,15 @@ impl Debug for Keylocker { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Keylocker { const FEATURES: &[&str] = &["kl"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Keylocker = self] => "kl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index 56d47f7ef..ca80c1413 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -37,12 +37,15 @@ impl Debug for Pclmulqdq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Pclmulqdq { const FEATURES: &[&str] = &["pclmulqdq", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Pclmulqdq = self] => "pclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 9003251da..46d05c8c6 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -37,12 +37,15 @@ impl Debug for Rdrand { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Rdrand { const FEATURES: &[&str] = &["rdrand"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Rdrand = self] => "rdrand", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index 26389a358..a4ba70f3e 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -37,12 +37,15 @@ impl Debug for Rdseed { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Rdseed { const FEATURES: &[&str] = &["rdseed"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Rdseed = self] => "rdseed", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs index 8c53b0011..3479ce3da 100644 --- a/fearless_simd_core/src/x86/crypto/sha.rs +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -37,12 +37,15 @@ impl Debug for Sha { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sha { const FEATURES: &[&str] = &["sha", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sha = self] => "sha", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs index 6968d4a45..818ef8849 100644 --- a/fearless_simd_core/src/x86/crypto/sha512.rs +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -37,6 +37,8 @@ impl Debug for Sha512 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sha512 { const FEATURES: &[&str] = &[ "sha512", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Sha512 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sha512 = self] => "sha512", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs index 3292d72d2..fff96832e 100644 --- a/fearless_simd_core/src/x86/crypto/sm3.rs +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -37,6 +37,8 @@ impl Debug for Sm3 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sm3 { const FEATURES: &[&str] = &[ "sm3", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Sm3 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sm3 = self] => "sm3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs index 81e2db9a4..66a48b3d7 100644 --- a/fearless_simd_core/src/x86/crypto/sm4.rs +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -37,6 +37,8 @@ impl Debug for Sm4 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sm4 { const FEATURES: &[&str] = &[ "sm4", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Sm4 { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sm4 = self] => "sm4", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs index fca0a918d..16ddb321e 100644 --- a/fearless_simd_core/src/x86/crypto/vaes.rs +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -37,6 +37,8 @@ impl Debug for Vaes { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Vaes { const FEATURES: &[&str] = &[ "vaes", "aes", "avx", "avx2", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Vaes { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Vaes = self] => "vaes", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs index d50b93f84..342af9d31 100644 --- a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -37,6 +37,8 @@ impl Debug for Vpclmulqdq { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Vpclmulqdq { const FEATURES: &[&str] = &[ "vpclmulqdq", @@ -52,7 +54,8 @@ unsafe impl TargetFeatureToken for Vpclmulqdq { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Vpclmulqdq = self] => "vpclmulqdq", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs index ff179dff7..a9601bde8 100644 --- a/fearless_simd_core/src/x86/crypto/widekl.rs +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -37,12 +37,15 @@ impl Debug for WideKeylocker { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for WideKeylocker { const FEATURES: &[&str] = &["widekl", "kl"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([WideKeylocker = self] => "widekl", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index e0d8bb5b9..572f756ef 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -37,12 +37,15 @@ impl Debug for Tbm { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Tbm { const FEATURES: &[&str] = &["tbm", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Tbm = self] => "tbm", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index ea94a80d6..bdea89070 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -18,6 +18,9 @@ pub mod avx512; pub mod crypto; pub mod discontinued; pub mod sse; +pub mod xsave; + pub mod v1; pub mod v2; pub mod v3; +pub mod v4; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index fbc5493f0..a36ec7099 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -38,12 +38,15 @@ impl Debug for Fxsr { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Fxsr { const FEATURES: &[&str] = &["fxsr"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Fxsr = self] => "fxsr", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs index d3473a332..ad0205776 100644 --- a/fearless_simd_core/src/x86/sse/sse.rs +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -37,12 +37,15 @@ impl Debug for Sse { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse { const FEATURES: &[&str] = &["sse"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs index 137bf28c4..c86ce42fb 100644 --- a/fearless_simd_core/src/x86/sse/sse2.rs +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -37,12 +37,15 @@ impl Debug for Sse2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse2 { const FEATURES: &[&str] = &["sse2", "sse"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse2 = self] => "sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs index 8c497ec37..27789c9ae 100644 --- a/fearless_simd_core/src/x86/sse/sse3.rs +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -37,12 +37,15 @@ impl Debug for Sse3 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse3 { const FEATURES: &[&str] = &["sse3", "sse", "sse2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse3 = self] => "sse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs index 02ec84f6a..3f2b75bce 100644 --- a/fearless_simd_core/src/x86/sse/sse4_1.rs +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -37,12 +37,15 @@ impl Debug for Sse4_1 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse4_1 { const FEATURES: &[&str] = &["sse4.1", "sse", "sse2", "sse3", "ssse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse4_1 = self] => "sse4.1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs index cf18d18eb..0794c2e16 100644 --- a/fearless_simd_core/src/x86/sse/sse4_2.rs +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -37,12 +37,15 @@ impl Debug for Sse4_2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Sse4_2 { const FEATURES: &[&str] = &["sse4.2", "sse", "sse2", "sse3", "sse4.1", "ssse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Sse4_2 = self] => "sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs index 5ba3e9b54..e5a214b9e 100644 --- a/fearless_simd_core/src/x86/sse/ssse3.rs +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -37,12 +37,15 @@ impl Debug for SupplementalSse3 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for SupplementalSse3 { const FEATURES: &[&str] = &["ssse3", "sse", "sse2", "sse3"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([SupplementalSse3 = self] => "ssse3", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs new file mode 100644 index 000000000..58095f880 --- /dev/null +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -0,0 +1,119 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v1 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v1 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "fxsr,sse2")] +/// fn uses_x86_64_v1() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V1 { + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v1::Fxsr, + /// The contained proof that SSE2 is available. + pub sse2: crate::x86::v1::Sse2, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V1 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v1 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V1 { + const FEATURES: &[&str] = &["fxsr", "sse", "sse2"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v1::Fxsr = self.fxsr, crate::x86::v1::Sse2 = self.sse2] => "fxsr,sse2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V1 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v1 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + fxsr: crate::x86::v1::Fxsr::try_new()?, + sse2: crate::x86::v1::Sse2::try_new()?, + }) + } + + #[target_feature(enable = "fxsr,sse2")] + /// Create a new token for the x86-64-v1 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v1 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "fxsr,sse2" target feature is available. + pub fn new() -> Self { + Self { + fxsr: crate::x86::v1::Fxsr::new(), + sse2: crate::x86::v1::Sse2::new(), + } + } +} + +impl From for crate::x86::v1::Fxsr { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Fxsr { crate::x86::v1::Fxsr::new() }) + } +} + +impl From for crate::x86::v1::Sse { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Sse { crate::x86::v1::Sse::new() }) + } +} + +impl From for crate::x86::v1::Sse2 { + fn from(value: V1) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V1 = value] => "fxsr,sse2", fn() -> crate::x86::v1::Sse2 { crate::x86::v1::Sse2::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 140fdd549..1a6309e0d 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -4,32 +4,7 @@ pub use crate::x86::sse::Fxsr; pub use crate::x86::sse::Sse; +pub use crate::x86::sse::Sse2; -/// A token that the current CPU is on the x86-64-v1 microarchitecture level. -// TODO: (This is currently incomplete) -pub struct V1 { - pub sse: Sse, - pub fxsr: Fxsr, -} - -impl V1 { - /// Create a new token if the current CPU is at the x86-64-v1 microarchitecture level or better. - /// - /// This does not do any caching internally, although note that the standard - /// library does internally cache the features it detects. - #[cfg(feature = "std")] - pub fn try_new() -> Option { - // TODO: Caching - Some(Self { - fxsr: Fxsr::try_new()?, - sse: Sse::try_new()?, - }) - } -} - -const _: () = { - assert!( - core::mem::size_of::() == 0, - "Target feature tokens should be zero sized." - ); -}; +mod level; +pub use level::V1; diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index b831e3492..42415dc41 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -37,12 +37,15 @@ impl Debug for Cmpxchg16b { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Cmpxchg16b { const FEATURES: &[&str] = &["cmpxchg16b"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Cmpxchg16b = self] => "cmpxchg16b", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs new file mode 100644 index 000000000..cc64c315c --- /dev/null +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -0,0 +1,179 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v2 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v2 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "cmpxchg16b,fxsr,popcnt,sse4.2")] +/// fn uses_x86_64_v2() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V2 { + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v2::Cmpxchg16b, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v2::Fxsr, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v2::Popcnt, + /// The contained proof that SSE4.2 is available. + pub sse4_2: crate::x86::v2::Sse4_2, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V2 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v2 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V2 { + const FEATURES: &[&str] = &[ + "cmpxchg16b", + "fxsr", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v2::Cmpxchg16b = self.cmpxchg16b, crate::x86::v2::Fxsr = self.fxsr, crate::x86::v2::Popcnt = self.popcnt, crate::x86::v2::Sse4_2 = self.sse4_2] => "cmpxchg16b,fxsr,popcnt,sse4.2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V2 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v2 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + cmpxchg16b: crate::x86::v2::Cmpxchg16b::try_new()?, + fxsr: crate::x86::v2::Fxsr::try_new()?, + popcnt: crate::x86::v2::Popcnt::try_new()?, + sse4_2: crate::x86::v2::Sse4_2::try_new()?, + }) + } + + #[target_feature(enable = "cmpxchg16b,fxsr,popcnt,sse4.2")] + /// Create a new token for the x86-64-v2 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v2 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "cmpxchg16b,fxsr,popcnt,sse4.2" target feature is available. + pub fn new() -> Self { + Self { + cmpxchg16b: crate::x86::v2::Cmpxchg16b::new(), + fxsr: crate::x86::v2::Fxsr::new(), + popcnt: crate::x86::v2::Popcnt::new(), + sse4_2: crate::x86::v2::Sse4_2::new(), + } + } +} + +impl From for crate::x86::v2::Cmpxchg16b { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Cmpxchg16b { crate::x86::v2::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v2::Fxsr { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Fxsr { crate::x86::v2::Fxsr::new() }) + } +} + +impl From for crate::x86::v2::Popcnt { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Popcnt { crate::x86::v2::Popcnt::new() }) + } +} + +impl From for crate::x86::v2::Sse { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse { crate::x86::v2::Sse::new() }) + } +} + +impl From for crate::x86::v2::Sse2 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse2 { crate::x86::v2::Sse2::new() }) + } +} + +impl From for crate::x86::v2::Sse3 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse3 { crate::x86::v2::Sse3::new() }) + } +} + +impl From for crate::x86::v2::Sse4_1 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse4_1 { crate::x86::v2::Sse4_1::new() }) + } +} + +impl From for crate::x86::v2::Sse4_2 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::Sse4_2 { crate::x86::v2::Sse4_2::new() }) + } +} + +impl From for crate::x86::v2::SupplementalSse3 { + fn from(value: V2) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V2 = value] => "cmpxchg16b,fxsr,popcnt,sse4.2", fn() -> crate::x86::v2::SupplementalSse3 { crate::x86::v2::SupplementalSse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs index 197738fea..60c6a6516 100644 --- a/fearless_simd_core/src/x86/v2/mod.rs +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -1,5 +1,17 @@ +pub use crate::x86::sse::Sse3; +pub use crate::x86::sse::Sse4_1; +pub use crate::x86::sse::Sse4_2; +pub use crate::x86::sse::SupplementalSse3; +// TODO: Do we actually want to re-export from the previous level here? +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; + mod cmpxchg16b; pub use cmpxchg16b::Cmpxchg16b; mod popcnt; pub use popcnt::Popcnt; + +mod level; +pub use level::V2; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index 5e78adcef..e5666b6a5 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -37,12 +37,15 @@ impl Debug for Popcnt { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Popcnt { const FEATURES: &[&str] = &["popcnt"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Popcnt = self] => "popcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs index a8cf99d76..92362dbe9 100644 --- a/fearless_simd_core/src/x86/v3/bmi1.rs +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -37,12 +37,15 @@ impl Debug for Bmi1 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Bmi1 { const FEATURES: &[&str] = &["bmi1"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Bmi1 = self] => "bmi1", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs index 82502ff8d..55b97dccc 100644 --- a/fearless_simd_core/src/x86/v3/bmi2.rs +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -37,12 +37,15 @@ impl Debug for Bmi2 { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Bmi2 { const FEATURES: &[&str] = &["bmi2"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Bmi2 = self] => "bmi2", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs index a1bc72683..aacb30e90 100644 --- a/fearless_simd_core/src/x86/v3/f16c.rs +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -37,6 +37,8 @@ impl Debug for F16c { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for F16c { const FEATURES: &[&str] = &[ "f16c", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for F16c { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([F16c = self] => "f16c", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs index dc232d73e..e051013bd 100644 --- a/fearless_simd_core/src/x86/v3/fma.rs +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -37,6 +37,8 @@ impl Debug for Fma { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Fma { const FEATURES: &[&str] = &[ "fma", "avx", "sse", "sse2", "sse3", "sse4.1", "sse4.2", "ssse3", @@ -44,7 +46,8 @@ unsafe impl TargetFeatureToken for Fma { #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Fma = self] => "fma", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs new file mode 100644 index 000000000..53ce8485b --- /dev/null +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -0,0 +1,279 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v3 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v3 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave")] +/// fn uses_x86_64_v3() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V3 { + /// The contained proof that AVX2 is available. + pub avx2: crate::x86::v3::Avx2, + /// The contained proof that 1 is available. + pub bmi1: crate::x86::v3::Bmi1, + /// The contained proof that BMI2 is available. + pub bmi2: crate::x86::v3::Bmi2, + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v3::Cmpxchg16b, + /// The contained proof that F16C is available. + pub f16c: crate::x86::v3::F16c, + /// The contained proof that FMA3 is available. + pub fma: crate::x86::v3::Fma, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v3::Fxsr, + /// The contained proof that `lzcnt` is available. + pub lzcnt: crate::x86::v3::Lzcnt, + /// The contained proof that `movbe` is available. + pub movbe: crate::x86::v3::Movbe, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v3::Popcnt, + /// The contained proof that `xsave` is available. + pub xsave: crate::x86::v3::Xsave, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V3 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v3 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V3 { + const FEATURES: &[&str] = &[ + "avx", + "avx2", + "bmi1", + "bmi2", + "cmpxchg16b", + "f16c", + "fma", + "fxsr", + "lzcnt", + "movbe", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + "xsave", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v3::Avx2 = self.avx2, crate::x86::v3::Bmi1 = self.bmi1, crate::x86::v3::Bmi2 = self.bmi2, crate::x86::v3::Cmpxchg16b = self.cmpxchg16b, crate::x86::v3::F16c = self.f16c, crate::x86::v3::Fma = self.fma, crate::x86::v3::Fxsr = self.fxsr, crate::x86::v3::Lzcnt = self.lzcnt, crate::x86::v3::Movbe = self.movbe, crate::x86::v3::Popcnt = self.popcnt, crate::x86::v3::Xsave = self.xsave] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V3 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v3 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + avx2: crate::x86::v3::Avx2::try_new()?, + bmi1: crate::x86::v3::Bmi1::try_new()?, + bmi2: crate::x86::v3::Bmi2::try_new()?, + cmpxchg16b: crate::x86::v3::Cmpxchg16b::try_new()?, + f16c: crate::x86::v3::F16c::try_new()?, + fma: crate::x86::v3::Fma::try_new()?, + fxsr: crate::x86::v3::Fxsr::try_new()?, + lzcnt: crate::x86::v3::Lzcnt::try_new()?, + movbe: crate::x86::v3::Movbe::try_new()?, + popcnt: crate::x86::v3::Popcnt::try_new()?, + xsave: crate::x86::v3::Xsave::try_new()?, + }) + } + + #[target_feature(enable = "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave")] + /// Create a new token for the x86-64-v3 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v3 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave" target feature is available. + pub fn new() -> Self { + Self { + avx2: crate::x86::v3::Avx2::new(), + bmi1: crate::x86::v3::Bmi1::new(), + bmi2: crate::x86::v3::Bmi2::new(), + cmpxchg16b: crate::x86::v3::Cmpxchg16b::new(), + f16c: crate::x86::v3::F16c::new(), + fma: crate::x86::v3::Fma::new(), + fxsr: crate::x86::v3::Fxsr::new(), + lzcnt: crate::x86::v3::Lzcnt::new(), + movbe: crate::x86::v3::Movbe::new(), + popcnt: crate::x86::v3::Popcnt::new(), + xsave: crate::x86::v3::Xsave::new(), + } + } +} + +impl From for crate::x86::v3::Avx { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Avx { crate::x86::v3::Avx::new() }) + } +} + +impl From for crate::x86::v3::Avx2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Avx2 { crate::x86::v3::Avx2::new() }) + } +} + +impl From for crate::x86::v3::Bmi1 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Bmi1 { crate::x86::v3::Bmi1::new() }) + } +} + +impl From for crate::x86::v3::Bmi2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Bmi2 { crate::x86::v3::Bmi2::new() }) + } +} + +impl From for crate::x86::v3::Cmpxchg16b { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Cmpxchg16b { crate::x86::v3::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v3::F16c { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::F16c { crate::x86::v3::F16c::new() }) + } +} + +impl From for crate::x86::v3::Fma { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Fma { crate::x86::v3::Fma::new() }) + } +} + +impl From for crate::x86::v3::Fxsr { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Fxsr { crate::x86::v3::Fxsr::new() }) + } +} + +impl From for crate::x86::v3::Lzcnt { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Lzcnt { crate::x86::v3::Lzcnt::new() }) + } +} + +impl From for crate::x86::v3::Movbe { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Movbe { crate::x86::v3::Movbe::new() }) + } +} + +impl From for crate::x86::v3::Popcnt { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Popcnt { crate::x86::v3::Popcnt::new() }) + } +} + +impl From for crate::x86::v3::Sse { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse { crate::x86::v3::Sse::new() }) + } +} + +impl From for crate::x86::v3::Sse2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse2 { crate::x86::v3::Sse2::new() }) + } +} + +impl From for crate::x86::v3::Sse3 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse3 { crate::x86::v3::Sse3::new() }) + } +} + +impl From for crate::x86::v3::Sse4_1 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse4_1 { crate::x86::v3::Sse4_1::new() }) + } +} + +impl From for crate::x86::v3::Sse4_2 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Sse4_2 { crate::x86::v3::Sse4_2::new() }) + } +} + +impl From for crate::x86::v3::SupplementalSse3 { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::SupplementalSse3 { crate::x86::v3::SupplementalSse3::new() }) + } +} + +impl From for crate::x86::v3::Xsave { + fn from(value: V3) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V3 = value] => "avx2,bmi1,bmi2,cmpxchg16b,f16c,fma,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v3::Xsave { crate::x86::v3::Xsave::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index 01ce658db..c92a0c9fa 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -37,12 +37,15 @@ impl Debug for Lzcnt { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Lzcnt { const FEATURES: &[&str] = &["lzcnt"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Lzcnt = self] => "lzcnt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs index 16d577e30..8c158a702 100644 --- a/fearless_simd_core/src/x86/v3/mod.rs +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -1,3 +1,17 @@ +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; +pub use crate::x86::v2::Cmpxchg16b; +pub use crate::x86::v2::Popcnt; +pub use crate::x86::v2::Sse3; +pub use crate::x86::v2::Sse4_1; +pub use crate::x86::v2::Sse4_2; +pub use crate::x86::v2::SupplementalSse3; + +pub use crate::x86::avx::Avx; +pub use crate::x86::avx::Avx2; +pub use crate::x86::xsave::Xsave; + mod bmi1; pub use bmi1::Bmi1; @@ -15,3 +29,6 @@ pub use lzcnt::Lzcnt; mod movbe; pub use movbe::Movbe; + +mod level; +pub use level::V3; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index 6e542dc1e..cea0faf98 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -37,12 +37,15 @@ impl Debug for Movbe { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Movbe { const FEATURES: &[&str] = &["movbe"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Movbe = self] => "movbe", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs new file mode 100644 index 000000000..96250c584 --- /dev/null +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -0,0 +1,325 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The x86-64-v4 microarchitecture level. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +// TODO: Level specific docs? +/// A token indicating that the current CPU has the x86-64-v4 microarchitecture level. +/// +/// For more details on the microarchitecture levels, see +/// . +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave")] +/// fn uses_x86_64_v4() { +/// // ... +/// } +/// ``` +/// +/// This struct internally contains only the minimal features required to enable this level. +/// This is done to ensure that the fewest target features are checked. +/// However, it can be turned into any target feature it implies using the from impls. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct V4 { + /// The contained proof that AVX512-BW is available. + pub avx512bw: crate::x86::v4::Avx512bw, + /// The contained proof that AVX512-CD is available. + pub avx512cd: crate::x86::v4::Avx512cd, + /// The contained proof that AVX512-DQ is available. + pub avx512dq: crate::x86::v4::Avx512dq, + /// The contained proof that AVX512-VL is available. + pub avx512vl: crate::x86::v4::Avx512vl, + /// The contained proof that 1 is available. + pub bmi1: crate::x86::v4::Bmi1, + /// The contained proof that BMI2 is available. + pub bmi2: crate::x86::v4::Bmi2, + /// The contained proof that `cmpxchg16b` is available. + pub cmpxchg16b: crate::x86::v4::Cmpxchg16b, + /// The contained proof that `fxsave + fxrstor` is available. + pub fxsr: crate::x86::v4::Fxsr, + /// The contained proof that `lzcnt` is available. + pub lzcnt: crate::x86::v4::Lzcnt, + /// The contained proof that `movbe` is available. + pub movbe: crate::x86::v4::Movbe, + /// The contained proof that `popcnt` is available. + pub popcnt: crate::x86::v4::Popcnt, + /// The contained proof that `xsave` is available. + pub xsave: crate::x86::v4::Xsave, + // This struct explicitly is not non_exhaustive, because it is + // completely safe to construct from the fields. +} + +impl Debug for V4 { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#"x86-64-v4 enabled."#) + } +} + +// Safety: This token can only be constructed if you have proofs that all the requisite +// target features are enabled. +unsafe impl TargetFeatureToken for V4 { + const FEATURES: &[&str] = &[ + "avx", + "avx2", + "avx512bw", + "avx512cd", + "avx512dq", + "avx512f", + "avx512vl", + "bmi1", + "bmi2", + "cmpxchg16b", + "f16c", + "fma", + "fxsr", + "lzcnt", + "movbe", + "popcnt", + "sse", + "sse2", + "sse3", + "sse4.1", + "sse4.2", + "ssse3", + "xsave", + ]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // We use the explicitly written out form here as validation that the set of + // features we've created correctly mapes to the target feature string. + trampoline!([crate::x86::v4::Avx512bw = self.avx512bw, crate::x86::v4::Avx512cd = self.avx512cd, crate::x86::v4::Avx512dq = self.avx512dq, crate::x86::v4::Avx512vl = self.avx512vl, crate::x86::v4::Bmi1 = self.bmi1, crate::x86::v4::Bmi2 = self.bmi2, crate::x86::v4::Cmpxchg16b = self.cmpxchg16b, crate::x86::v4::Fxsr = self.fxsr, crate::x86::v4::Lzcnt = self.lzcnt, crate::x86::v4::Movbe = self.movbe, crate::x86::v4::Popcnt = self.popcnt, crate::x86::v4::Xsave = self.xsave] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl V4 { + #[cfg(feature = "std")] + /// Create a new token if the x86-64-v4 target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + Some(Self { + avx512bw: crate::x86::v4::Avx512bw::try_new()?, + avx512cd: crate::x86::v4::Avx512cd::try_new()?, + avx512dq: crate::x86::v4::Avx512dq::try_new()?, + avx512vl: crate::x86::v4::Avx512vl::try_new()?, + bmi1: crate::x86::v4::Bmi1::try_new()?, + bmi2: crate::x86::v4::Bmi2::try_new()?, + cmpxchg16b: crate::x86::v4::Cmpxchg16b::try_new()?, + fxsr: crate::x86::v4::Fxsr::try_new()?, + lzcnt: crate::x86::v4::Lzcnt::try_new()?, + movbe: crate::x86::v4::Movbe::try_new()?, + popcnt: crate::x86::v4::Popcnt::try_new()?, + xsave: crate::x86::v4::Xsave::try_new()?, + }) + } + + #[target_feature( + enable = "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave" + )] + /// Create a new token for the x86-64-v4 microarchitecture level. + /// + /// This method is useful to get a new token if you have an external proof that + /// x86-64-v4 is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave" target feature is available. + pub fn new() -> Self { + Self { + avx512bw: crate::x86::v4::Avx512bw::new(), + avx512cd: crate::x86::v4::Avx512cd::new(), + avx512dq: crate::x86::v4::Avx512dq::new(), + avx512vl: crate::x86::v4::Avx512vl::new(), + bmi1: crate::x86::v4::Bmi1::new(), + bmi2: crate::x86::v4::Bmi2::new(), + cmpxchg16b: crate::x86::v4::Cmpxchg16b::new(), + fxsr: crate::x86::v4::Fxsr::new(), + lzcnt: crate::x86::v4::Lzcnt::new(), + movbe: crate::x86::v4::Movbe::new(), + popcnt: crate::x86::v4::Popcnt::new(), + xsave: crate::x86::v4::Xsave::new(), + } + } +} + +impl From for crate::x86::v4::Avx { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx { crate::x86::v4::Avx::new() }) + } +} + +impl From for crate::x86::v4::Avx2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx2 { crate::x86::v4::Avx2::new() }) + } +} + +impl From for crate::x86::v4::Avx512bw { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512bw { crate::x86::v4::Avx512bw::new() }) + } +} + +impl From for crate::x86::v4::Avx512cd { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512cd { crate::x86::v4::Avx512cd::new() }) + } +} + +impl From for crate::x86::v4::Avx512dq { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512dq { crate::x86::v4::Avx512dq::new() }) + } +} + +impl From for crate::x86::v4::Avx512f { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512f { crate::x86::v4::Avx512f::new() }) + } +} + +impl From for crate::x86::v4::Avx512vl { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Avx512vl { crate::x86::v4::Avx512vl::new() }) + } +} + +impl From for crate::x86::v4::Bmi1 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Bmi1 { crate::x86::v4::Bmi1::new() }) + } +} + +impl From for crate::x86::v4::Bmi2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Bmi2 { crate::x86::v4::Bmi2::new() }) + } +} + +impl From for crate::x86::v4::Cmpxchg16b { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Cmpxchg16b { crate::x86::v4::Cmpxchg16b::new() }) + } +} + +impl From for crate::x86::v4::F16c { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::F16c { crate::x86::v4::F16c::new() }) + } +} + +impl From for crate::x86::v4::Fma { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Fma { crate::x86::v4::Fma::new() }) + } +} + +impl From for crate::x86::v4::Fxsr { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Fxsr { crate::x86::v4::Fxsr::new() }) + } +} + +impl From for crate::x86::v4::Lzcnt { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Lzcnt { crate::x86::v4::Lzcnt::new() }) + } +} + +impl From for crate::x86::v4::Movbe { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Movbe { crate::x86::v4::Movbe::new() }) + } +} + +impl From for crate::x86::v4::Popcnt { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Popcnt { crate::x86::v4::Popcnt::new() }) + } +} + +impl From for crate::x86::v4::Sse { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse { crate::x86::v4::Sse::new() }) + } +} + +impl From for crate::x86::v4::Sse2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse2 { crate::x86::v4::Sse2::new() }) + } +} + +impl From for crate::x86::v4::Sse3 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse3 { crate::x86::v4::Sse3::new() }) + } +} + +impl From for crate::x86::v4::Sse4_1 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse4_1 { crate::x86::v4::Sse4_1::new() }) + } +} + +impl From for crate::x86::v4::Sse4_2 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Sse4_2 { crate::x86::v4::Sse4_2::new() }) + } +} + +impl From for crate::x86::v4::SupplementalSse3 { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::SupplementalSse3 { crate::x86::v4::SupplementalSse3::new() }) + } +} + +impl From for crate::x86::v4::Xsave { + fn from(value: V4) -> Self { + // This serves as a correctness check of the implicitly enabled features. + trampoline!([V4 = value] => "avx512bw,avx512cd,avx512dq,avx512vl,bmi1,bmi2,cmpxchg16b,fxsr,lzcnt,movbe,popcnt,xsave", fn() -> crate::x86::v4::Xsave { crate::x86::v4::Xsave::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs new file mode 100644 index 000000000..e401ecb2f --- /dev/null +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -0,0 +1,27 @@ +pub use crate::x86::v1::Fxsr; +pub use crate::x86::v1::Sse; +pub use crate::x86::v1::Sse2; +pub use crate::x86::v2::Cmpxchg16b; +pub use crate::x86::v2::Popcnt; +pub use crate::x86::v2::Sse3; +pub use crate::x86::v2::Sse4_1; +pub use crate::x86::v2::Sse4_2; +pub use crate::x86::v2::SupplementalSse3; +pub use crate::x86::v3::Avx; +pub use crate::x86::v3::Avx2; +pub use crate::x86::v3::Bmi1; +pub use crate::x86::v3::Bmi2; +pub use crate::x86::v3::F16c; +pub use crate::x86::v3::Fma; +pub use crate::x86::v3::Lzcnt; +pub use crate::x86::v3::Movbe; +pub use crate::x86::v3::Xsave; + +pub use crate::x86::avx512::Avx512bw; +pub use crate::x86::avx512::Avx512cd; +pub use crate::x86::avx512::Avx512dq; +pub use crate::x86::avx512::Avx512f; +pub use crate::x86::avx512::Avx512vl; + +mod level; +pub use level::V4; diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs new file mode 100644 index 000000000..aee24a743 --- /dev/null +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -0,0 +1,15 @@ +#[expect( + clippy::module_inception, + reason = "The inner module is automatically generated." +)] +mod xsave; +pub use xsave::Xsave; + +mod xsavec; +pub use xsavec::Xsavec; + +mod xsaveopt; +pub use xsaveopt::Xsaveopt; + +pub use xsaves::Xsaves; +mod xsaves; diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 9d2228673..3c57c87e6 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -37,12 +37,15 @@ impl Debug for Xsave { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsave { const FEATURES: &[&str] = &["xsave", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsave = self] => "xsave", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index 6d11df470..b7e5393a6 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -37,12 +37,15 @@ impl Debug for Xsavec { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsavec { const FEATURES: &[&str] = &["xsavec", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsavec = self] => "xsavec", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index b63d444be..8efabdcae 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -37,12 +37,15 @@ impl Debug for Xsaveopt { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsaveopt { const FEATURES: &[&str] = &["xsaveopt", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsaveopt = self] => "xsaveopt", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index 60633befd..1a49e04c7 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -37,12 +37,15 @@ impl Debug for Xsaves { } } +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. unsafe impl TargetFeatureToken for Xsaves { const FEATURES: &[&str] = &["xsaves", ]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { - // Because we want this constant to be eagerly evaluated. + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. trampoline!([Xsaves = self] => "xsaves", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) } } From b40ee761e58c1ea762a434f2195012c7819345d1 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Wed, 15 Oct 2025 13:22:59 +0100 Subject: [PATCH 09/19] Re-run update --- fearless_simd_core/src/x86/v1/level.rs | 1 + fearless_simd_core/src/x86/v2/level.rs | 1 + fearless_simd_core/src/x86/v3/level.rs | 1 + fearless_simd_core/src/x86/v4/level.rs | 1 + fearless_simd_core/src/x86/xsave/xsave.rs | 3 +-- fearless_simd_core/src/x86/xsave/xsavec.rs | 3 +-- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 3 +-- fearless_simd_core/src/x86/xsave/xsaves.rs | 3 +-- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs index 58095f880..86fc61412 100644 --- a/fearless_simd_core/src/x86/v1/level.rs +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -89,6 +89,7 @@ impl V1 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v1::Fxsr { fn from(value: V1) -> Self { diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs index cc64c315c..5f01a232c 100644 --- a/fearless_simd_core/src/x86/v2/level.rs +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -107,6 +107,7 @@ impl V2 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v2::Cmpxchg16b { fn from(value: V2) -> Self { diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs index 53ce8485b..ad607a4f4 100644 --- a/fearless_simd_core/src/x86/v3/level.rs +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -144,6 +144,7 @@ impl V3 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v3::Avx { fn from(value: V3) -> Self { diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs index 96250c584..b3acc6f1c 100644 --- a/fearless_simd_core/src/x86/v4/level.rs +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -155,6 +155,7 @@ impl V4 { } } } +// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v4::Avx { fn from(value: V4) -> Self { diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 3c57c87e6..19c8f2f51 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -40,7 +40,7 @@ impl Debug for Xsave { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsave { - const FEATURES: &[&str] = &["xsave", ]; + const FEATURES: &[&str] = &["xsave"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsave { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index b7e5393a6..df8033d1f 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -40,7 +40,7 @@ impl Debug for Xsavec { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsavec { - const FEATURES: &[&str] = &["xsavec", ]; + const FEATURES: &[&str] = &["xsavec"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsavec { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 8efabdcae..563c26c56 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -40,7 +40,7 @@ impl Debug for Xsaveopt { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsaveopt { - const FEATURES: &[&str] = &["xsaveopt", ]; + const FEATURES: &[&str] = &["xsaveopt"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsaveopt { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index 1a49e04c7..8f365d8cd 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -40,7 +40,7 @@ impl Debug for Xsaves { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Xsaves { - const FEATURES: &[&str] = &["xsaves", ]; + const FEATURES: &[&str] = &["xsaves"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -87,7 +87,6 @@ impl Xsaves { } } - const _: () = { assert!( core::mem::size_of::() == 0, From 7f52b7bc9d27c133ccd885bc323b830d9743dbe1 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 10:16:09 +0100 Subject: [PATCH 10/19] Bump MSRV to allow avx512 support --- .github/workflows/ci.yml | 4 ++-- CHANGELOG.md | 2 +- Cargo.toml | 2 +- README.md | 2 +- fearless_simd/README.md | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a6a3d50fe..ffeb4b90b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,12 +3,12 @@ env: # version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still # come automatically. If the version specified here is no longer the latest stable version, # then please feel free to submit a PR that adjusts it along with the potential clippy fixes. - RUST_STABLE_VER: "1.88" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 + RUST_STABLE_VER: "1.90" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 # The purpose of checking with the minimum supported Rust toolchain is to detect its staleness. # If the compilation fails, then the version specified here needs to be bumped up to reality. # Be sure to also update the rust-version property in the workspace Cargo.toml file, # plus all the README.md files of the affected packages. - RUST_MIN_VER: "1.88" + RUST_MIN_VER: "1.89" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. RUST_MIN_VER_PKGS: "-p fearless_simd" diff --git a/CHANGELOG.md b/CHANGELOG.md index 14c1fb20f..db6d6a965 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ You can find its changes [documented below](#030-2025-10-14). ## [Unreleased] -This release has an [MSRV][] of 1.88. +This release has an [MSRV][] of 1.89. ### Added diff --git a/Cargo.toml b/Cargo.toml index e84d0a2ee..b1f1ab457 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. -rust-version = "1.88" +rust-version = "1.89" [workspace.lints] diff --git a/README.md b/README.md index 7c3d95fe2..4749d6f9f 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd/README.md b/fearless_simd/README.md index 160ce8421..953e48278 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -139,7 +139,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.88** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. From 6da5c54e17c3c4a5312509fbbb9b41088d8fe1f5 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:04:55 +0100 Subject: [PATCH 11/19] Misc cleanups to get ready to launch --- .clippy.toml | 2 + .github/workflows/ci.yml | 2 +- Cargo.lock | 4 +- fearless_simd_core/Cargo.toml | 5 +- fearless_simd_core/LICENSE-APACHE | 176 +++++++++++++++++++++++++ fearless_simd_core/LICENSE-MIT | 25 ++++ fearless_simd_core/README.md | 105 +++++++++++++++ fearless_simd_core/gen/Cargo.toml | 9 +- fearless_simd_core/gen/src/data/x86.rs | 6 +- fearless_simd_core/src/lib.rs | 53 +++++++- fearless_simd_core/src/x86/mod.rs | 5 + fearless_simd_gen/src/mk_simd_types.rs | 45 ++++--- 12 files changed, 404 insertions(+), 33 deletions(-) create mode 100644 fearless_simd_core/LICENSE-APACHE create mode 100644 fearless_simd_core/LICENSE-MIT create mode 100644 fearless_simd_core/README.md diff --git a/.clippy.toml b/.clippy.toml index 4781d68cb..898218354 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -8,3 +8,5 @@ trivial-copy-size-limit = 16 # END LINEBENDER LINT SET + +doc-valid-idents = ["ShangMi", ".."] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ffeb4b90b..5c1919d0d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ env: RUST_MIN_VER: "1.89" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. - RUST_MIN_VER_PKGS: "-p fearless_simd" + RUST_MIN_VER_PKGS: "-p fearless_simd -p fearless_simd_core" # List of features that depend on the standard library and will be excluded from no_std checks. FEATURES_DEPENDING_ON_STD: "std,default" # List of packages that can not target Wasm. diff --git a/Cargo.lock b/Cargo.lock index 672a09133..ce10218f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -135,14 +135,14 @@ dependencies = [ [[package]] name = "fearless_simd_core" -version = "0.1.0" +version = "0.3.0" dependencies = [ "bytemuck", ] [[package]] name = "fearless_simd_core_gen" -version = "0.1.0" +version = "0.0.0" [[package]] name = "fearless_simd_dev_macros" diff --git a/fearless_simd_core/Cargo.toml b/fearless_simd_core/Cargo.toml index e16a9823d..1342c2184 100644 --- a/fearless_simd_core/Cargo.toml +++ b/fearless_simd_core/Cargo.toml @@ -1,6 +1,9 @@ [package] name = "fearless_simd_core" -version = "0.1.0" +version = "0.3.0" +description = "Safely run custom #[target_feature] functions" +keywords = ["simd", "target_feature"] +categories = ["hardware-support"] edition.workspace = true license.workspace = true repository.workspace = true diff --git a/fearless_simd_core/LICENSE-APACHE b/fearless_simd_core/LICENSE-APACHE new file mode 100644 index 000000000..d9a10c0d8 --- /dev/null +++ b/fearless_simd_core/LICENSE-APACHE @@ -0,0 +1,176 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/fearless_simd_core/LICENSE-MIT b/fearless_simd_core/LICENSE-MIT new file mode 100644 index 000000000..f3d84348c --- /dev/null +++ b/fearless_simd_core/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 Raph Levien + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/fearless_simd_core/README.md b/fearless_simd_core/README.md new file mode 100644 index 000000000..45052d288 --- /dev/null +++ b/fearless_simd_core/README.md @@ -0,0 +1,105 @@ +
+ +# Fearless SIMD Core + +**Target Features in Rust's type system** + +[![Latest published version.](https://img.shields.io/crates/v/fearless_simd.svg)](https://crates.io/crates/fearless_simd) +[![Documentation build status.](https://img.shields.io/docsrs/fearless_simd.svg)](https://docs.rs/fearless_simd) +[![Apache 2.0 or MIT license.](https://img.shields.io/badge/license-Apache--2.0_OR_MIT-blue.svg)](#license) +\ +[![Linebender Zulip, #simd channel.](https://img.shields.io/badge/Linebender-%23simd-blue?logo=Zulip)](https://xi.zulipchat.com/#narrow/channel/514230-simd) +[![GitHub Actions CI status.](https://img.shields.io/github/actions/workflow/status/linebender/fearless_simd/ci.yml?logo=github&label=CI)](https://github.com/linebender/fearless_simd/actions) +[![Dependency staleness status.](https://deps.rs/crate/fearless_simd/latest/status.svg)](https://deps.rs/crate/fearless_simd/) + +
+ + + + + + + +An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. + +This crate introduces the [`trampoline!`] macro, which allows running code in a +statically validated `#[target_feature(enable="some_features")]` environment, based on +externally provided tokens. +This abstraction is designed to be combined with target features 1.1, the recent update +in the Rust compiler to allow calling `#[target_feature]` functions safely from within +other `#[target_feature]` functions. +As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. + +This crate also has modules which contain tokens for each Rust target features. +These allow safely validating that a target feature is available, and obtaining a token. +These are grouped by architecture: + +- [`x86`] contains the tokens for both the x86 and x86-64 targets. + It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. + + +# Examples + +At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using +only the standard library. +These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. + + + +Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. +This is also the case for WASM with `wasm_simd`, but note that this crate +[isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to +call `#[target_features]` on that platform. + +# Crate Feature Flags + + + +# Implementation + +The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait, +which indicates that a value of that token is only possible to construct if the set +of target features it specifies are enabled. +This means that the macro can use the existence of these token values as +safety proofs that calling a function with those target features is safe. + +This safety proof happens entirely in const evaluation, so if there's a mistake with the +proof, it will cause a compilation error. +The code generated by this macro is thus a function containing the provided code, marked +with `#[target_feature]`, and a call to this newly generated function. + +[attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm + + + +## Minimum supported Rust Version (MSRV) + +This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. + +Future versions of Fearless SIMD might increase the Rust version requirement. +It will not be treated as a breaking change and as such can even happen with small patch releases. + +## Community + +[![Linebender Zulip, #simd channel.](https://img.shields.io/badge/Linebender-%23simd-blue?logo=Zulip)](https://xi.zulipchat.com/#narrow/channel/514230-simd) + +Discussion of Fearless SIMD development happens in the [Linebender Zulip](https://xi.zulipchat.com/), specifically in [#simd](https://xi.zulipchat.com/#narrow/channel/514230-simd). +All public content can be read without logging in. + +Contributions are welcome by pull request. +The [Rust code of conduct] applies. + +## License + +Licensed under either of + +- Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or ) +- MIT license ([LICENSE-MIT](LICENSE-MIT) or ) + +at your option. + +[Rust Code of Conduct]: https://www.rust-lang.org/policies/code-of-conduct diff --git a/fearless_simd_core/gen/Cargo.toml b/fearless_simd_core/gen/Cargo.toml index 5617f7be1..65644e6ef 100644 --- a/fearless_simd_core/gen/Cargo.toml +++ b/fearless_simd_core/gen/Cargo.toml @@ -1,6 +1,11 @@ [package] name = "fearless_simd_core_gen" -version = "0.1.0" -edition = "2024" +description = "Internal code generator for the Fearless SIMD Core crate." +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +publish = false [dependencies] diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index e8c5f85c1..b6719c22e 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -431,7 +431,7 @@ pub(crate) const X86_LEVEL_TEMPLATE: &str = include_str!("../../templates/x86_le // The first four are all assumed, and the final is not implemented because // it's practically impossible to use correctly (and there's no reason to). pub(crate) const X86_V1: &[&str] = &["fxsr", "sse", "sse2"]; -/// The target features required in the x86-64-v1 level, in addition to those already in [`V1`]. +/// The target features required in the x86-64-v1 level, in addition to those already in [`X86_V1`]. pub(crate) const X86_V2: &[&str] = &[ "sse3", "ssse3", @@ -442,9 +442,9 @@ pub(crate) const X86_V2: &[&str] = &[ // The lahfahf target feature is currently in Rust beta. // "lahfsahf", ]; -/// The target features required in the x86-64-v3 level, excluding those already in [`V2`]. +/// The target features required in the x86-64-v3 level, excluding those already in [`X86_V2`]. pub(crate) const X86_V3: &[&str] = &[ "avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave", ]; -/// The target features required in the x86-64-v4 level, excluding those already in [`V3`]. +/// The target features required in the x86-64-v4 level, excluding those already in [`X86_V3`]. pub(crate) const X86_V4: &[&str] = &["avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"]; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 75bc9acb2..67ce0e0a4 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -1,4 +1,52 @@ -//! Tooling for Rust's target features. +//! An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. +//! +//! This crate introduces the [`trampoline!`] macro, which allows running code in a +//! statically validated `#[target_feature(enable="some_features")]` environment, based on +//! externally provided tokens. +//! This abstraction is designed to be combined with target features 1.1, the recent update +//! in the Rust compiler to allow calling `#[target_feature]` functions safely from within +//! other `#[target_feature]` functions. +//! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. +//! +//! This crate also has modules which contain tokens for each Rust target features. +//! These allow safely validating that a target feature is available, and obtaining a token. +//! These are grouped by architecture: +//! +//! - [`x86`] contains the tokens for both the x86 and x86-64 targets. +//! It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. +//! +//! +//! # Examples +//! +//! At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using +//! only the standard library. +//! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. +//! +//! +//! +//! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. +//! This is also the case for WASM with `wasm_simd`, but note that this crate +//! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to +//! call `#[target_features]` on that platform. +//! +//! # Crate Feature Flags +//! +//! +//! +//! # Implementation +//! +//! The tokens provided to [`trampoline!`] implement the [`TargetFeatureToken`] trait, +//! which indicates that a value of that token is only possible to construct if the set +//! of target features it specifies are enabled. +//! This means that the macro can use the existence of these token values as +//! safety proofs that calling a function with those target features is safe. +//! +//! This safety proof happens entirely in const evaluation, so if there's a mistake with the +//! proof, it will cause a compilation error. +//! The code generated by this macro is thus a function containing the provided code, marked +//! with `#[target_feature]`, and a call to this newly generated function. +//! +//! [attributes.codegen.target_feature.wasm]: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.wasm // LINEBENDER LINT SET - lib.rs - v4 // See https://linebender.org/wiki/canonical-lints/ @@ -229,6 +277,9 @@ macro_rules! trampoline { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(test)] mod example_expansion { + #[cfg(target_arch = "x86")] + use core::arch::x86::{__m128, _mm_mul_ps}; + #[cfg(target_arch = "x86_64")] use core::arch::x86_64::{__m128, _mm_mul_ps}; use crate::x86::{self, v1::Sse}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index bdea89070..77ec914b0 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -24,3 +24,8 @@ pub mod v1; pub mod v2; pub mod v3; pub mod v4; + +pub use v1::V1; +pub use v2::V2; +pub use v3::V3; +pub use v4::V4; diff --git a/fearless_simd_gen/src/mk_simd_types.rs b/fearless_simd_gen/src/mk_simd_types.rs index b1935285f..ee71e374d 100644 --- a/fearless_simd_gen/src/mk_simd_types.rs +++ b/fearless_simd_gen/src/mk_simd_types.rs @@ -207,29 +207,28 @@ fn simd_impl(ty: &VecType) -> TokenStream { | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) | OpSig::Shift - ) { - if let Some(args) = sig.vec_trait_args() { - let ret_ty = sig.ret_ty(ty, TyFlavor::VecImpl); - let call_args = match sig { - OpSig::Unary | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) => quote! { self }, - OpSig::Binary | OpSig::Compare | OpSig::Combine => { - quote! { self, rhs.simd_into(self.simd) } - } - OpSig::Shift => { - quote! { self, shift } - } - OpSig::Ternary => { - quote! { self, op1.simd_into(self.simd), op2.simd_into(self.simd) } - } - _ => quote! { todo!() }, - }; - methods.push(quote! { - #[inline(always)] - pub fn #method_name(#args) -> #ret_ty { - self.simd.#trait_method(#call_args) - } - }); - } + ) && let Some(args) = sig.vec_trait_args() + { + let ret_ty = sig.ret_ty(ty, TyFlavor::VecImpl); + let call_args = match sig { + OpSig::Unary | OpSig::Cvt(_, _) | OpSig::Reinterpret(_, _) => quote! { self }, + OpSig::Binary | OpSig::Compare | OpSig::Combine => { + quote! { self, rhs.simd_into(self.simd) } + } + OpSig::Shift => { + quote! { self, shift } + } + OpSig::Ternary => { + quote! { self, op1.simd_into(self.simd), op2.simd_into(self.simd) } + } + _ => quote! { todo!() }, + }; + methods.push(quote! { + #[inline(always)] + pub fn #method_name(#args) -> #ret_ty { + self.simd.#trait_method(#call_args) + } + }); } } let vec_impl = simd_vec_impl(ty); From 4b1ae94870afb7c916f342cf87dd726d07604737 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:23:49 +0100 Subject: [PATCH 12/19] Handle sse4a and tbm consistently --- Cargo.toml | 1 + fearless_simd_core/gen/src/data/x86.rs | 15 ++- .../src/x86/discontinued/mod.rs | 2 +- fearless_simd_core/src/x86/sse/mod.rs | 4 + fearless_simd_core/src/x86/sse/sse4a.rs | 116 ++++++++++++++++++ 5 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 fearless_simd_core/src/x86/sse/sse4a.rs diff --git a/Cargo.toml b/Cargo.toml index b1f1ab457..eb1d32831 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. +# When increasing past 1.91, also uncomment the `discontinued::tbm` and `sse::sse4a` modules/imports in Fearless SIMD Core. rust-version = "1.89" [workspace.lints] diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index b6719c22e..6740063db 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -349,14 +349,13 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ struct sse::Sse4_2("SSE4.2"): "sse4.2" + ["sse4.1"] fn uses_sse4 ), - // // TODO: This only exists from 1.91 and above (current beta) - // f!( - // /// [SSE4a] --- StreamingSIMDExtensions 4a - - // /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a - // struct Sse4a("SSE4a"): "sse4a" + ["sse3"] - // fn uses_sse4a - // ), + f!( + /// [SSE4a] --- StreamingSIMDExtensions 4a + /// + /// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a + struct sse::Sse4a("SSE4a"): "sse4a" + ["sse3"] + fn uses_sse4a + ), f!( /// [SSSE3] --- Supplemental StreamingSIMDExtensions 3 /// diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index 9afa91dc7..7869229d5 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -1,3 +1,3 @@ -// Stable in beta, but not current stable +// These will be stabilised in 1.91. // mod tbm; // pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index ce9a3aeca..0470258b2 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -17,6 +17,10 @@ pub use sse3::Sse3; mod ssse3; pub use ssse3::SupplementalSse3; +// These will be stabilised in 1.91. +// mod sse4a; +// pub use sse4a::Sse4a; + mod sse4_1; pub use sse4_1::Sse4_1; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs new file mode 100644 index 000000000..b0475fad3 --- /dev/null +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -0,0 +1,116 @@ +// This file is automatically generated by `fearless_simd_core_gen`. +// Its template can be found in `fearless_simd_core/gen/templates`. + +//! The SSE4a target feature. + +use crate::{TargetFeatureToken, trampoline}; + +use core::fmt::Debug; + +/// [SSE4a] --- StreamingSIMDExtensions 4a +/// +/// [SSE4a]: https://en.wikipedia.org/wiki/SSE4#SSE4a +/// +/// A token indicating that the current CPU has the `sse4a` target feature. +/// +/// # Example +/// +/// This can be used to [`trampoline!`] into functions like: +/// +/// ```rust +/// #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +/// #[target_feature(enable = "sse4a")] +/// fn uses_sse4a() { +/// // ... +/// } +/// ``` +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub struct Sse4a { + // We don't use non_exhaustive because we don't want this struct to be constructible. + // in different modules in this crate. + _private: (), +} + +impl Debug for Sse4a { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, r#""sse4a" enabled."#) + } +} + +// Safety: This token can only be constructed if you have proof that all the requisite +// target feature is enabled. +unsafe impl TargetFeatureToken for Sse4a { + const FEATURES: &[&str] = &["sse4a", "sse", "sse2", "sse3"]; + + #[inline(always)] + fn vectorize(self, f: impl FnOnce() -> R) -> R { + // Because we need the safety check to be eagerly evaluated, it uses an constant item. + // This means we can't use `Self = self` here, unfortunately. + trampoline!([Sse4a = self] => "sse4a", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) + } +} + +impl Sse4a { + #[cfg(feature = "std")] + /// Create a new token if the `"sse4a"` target feature is detected as enabled. + /// + /// This does not do any caching internally, although note that the standard + /// library does internally cache the features it detects. + // TODO: Consider a manual override feature/env var? + pub fn try_new() -> Option { + // Feature flag required to make docs compile. + // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if std::arch::is_x86_feature_detected!("sse4a") { + // Safety: The required CPU feature was detected. + unsafe { Some(Self::new()) } + } else { + None + } + } + + #[target_feature(enable = "sse4a")] + /// Create a new token for the "sse4a" target feature. + /// + /// This method is useful to get a new token if you have an external proof that + /// SSE4a is available. This could happen if you are in a target feature + /// function called by an external library user. + /// + /// # Safety + /// + /// No conditions other than those inherited from the target feature attribute, + /// i.e. that the "sse4a" target feature is available. + /// + /// [implicitly enables]: https://doc.rust-lang.org/beta/reference/attributes/codegen.html?highlight=implicitly%20enabled#r-attributes.codegen.target_feature.safety-restrictions + pub fn new() -> Self { + Self { _private: () } + } +} + +impl From for crate::x86::sse::Sse { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse { crate::x86::sse::Sse::new() }) + } +} + +impl From for crate::x86::sse::Sse2 { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse2 { crate::x86::sse::Sse2::new() }) + } +} + +impl From for crate::x86::sse::Sse3 { + fn from(value: Sse4a) -> Self { + // This also serves as a correctness check of the implicitly enabled features. + trampoline!([Sse4a = value] => "sse4a", fn() -> crate::x86::sse::Sse3 { crate::x86::sse::Sse3::new() }) + } +} + +const _: () = { + assert!( + core::mem::size_of::() == 0, + "Target feature tokens should be zero sized." + ); +}; From dc9053938fbadcf8a7176d5868a0bf9a75859e44 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:54:32 +0100 Subject: [PATCH 13/19] Add the final missing docs --- fearless_simd_core/gen/src/data/x86.rs | 48 +++++++++---------- fearless_simd_core/src/lib.rs | 4 +- fearless_simd_core/src/x86/adx/mod.rs | 2 +- fearless_simd_core/src/x86/avx/mod.rs | 7 +++ fearless_simd_core/src/x86/avx512/mod.rs | 6 +++ fearless_simd_core/src/x86/crypto/mod.rs | 4 ++ .../src/x86/crypto/pclmulqdq.rs | 4 +- fearless_simd_core/src/x86/crypto/rdrand.rs | 4 +- fearless_simd_core/src/x86/crypto/rdseed.rs | 4 +- .../src/x86/discontinued/mod.rs | 7 +++ fearless_simd_core/src/x86/mod.rs | 21 +++++--- fearless_simd_core/src/x86/sse/fxsr.rs | 6 +-- fearless_simd_core/src/x86/sse/mod.rs | 9 ++++ fearless_simd_core/src/x86/sse/sse4a.rs | 1 + fearless_simd_core/src/x86/v1/mod.rs | 7 ++- fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 4 +- fearless_simd_core/src/x86/v2/mod.rs | 9 +++- fearless_simd_core/src/x86/v2/popcnt.rs | 4 +- fearless_simd_core/src/x86/v3/lzcnt.rs | 4 +- fearless_simd_core/src/x86/v3/mod.rs | 8 ++++ fearless_simd_core/src/x86/v3/movbe.rs | 4 +- fearless_simd_core/src/x86/v4/mod.rs | 8 ++++ fearless_simd_core/src/x86/xsave/mod.rs | 2 + fearless_simd_core/src/x86/xsave/xsave.rs | 2 +- fearless_simd_core/src/x86/xsave/xsavec.rs | 4 +- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 4 +- fearless_simd_core/src/x86/xsave/xsaves.rs | 4 +- 27 files changed, 131 insertions(+), 60 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 6740063db..e2b1e9f21 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -202,9 +202,9 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ fn uses_bmi2 ), f!( - /// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically + /// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically /// - /// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b + /// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b struct v2::Cmpxchg16b("`cmpxchg16b`"): "cmpxchg16b" + [] fn uses_cmpxchg16b ), @@ -223,10 +223,10 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ fn uses_fma ), f!( - /// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State + /// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State /// - /// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, - /// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, + /// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave, + /// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor, struct sse::Fxsr("`fxsave + fxrstor`"): "fxsr" + [] fn uses_fxsr ), @@ -245,44 +245,44 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ fn uses_keylocker ), f!( - /// ["lzcnt"] --- Leading zeros count + /// [`lzcnt`] --- Leading zeros count /// - /// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt + /// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt struct v3::Lzcnt("`lzcnt`"): "lzcnt" + [] fn uses_lzcnt ), f!( - /// ["movbe"] --- Move data after swapping bytes + /// [`movbe`] --- Move data after swapping bytes /// - /// ["movbe"]: https://www.felixcloutier.com/x86/movbe + /// [`movbe`]: https://www.felixcloutier.com/x86/movbe struct v3::Movbe("`movbe`"): "movbe" + [] fn uses_movbe ), f!( - /// ["pclmulqdq"] --- Packed carry-less multiplication quadword + /// [`pclmulqdq`] --- Packed carry-less multiplication quadword /// - /// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq + /// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq struct crypto::Pclmulqdq("`pclmulqdq`"): "pclmulqdq" + ["sse2"] fn uses_pclmulqdq ), f!( - /// ["popcnt"] --- Count of bits set to 1 + /// [`popcnt`] --- Count of bits set to 1 /// - /// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt + /// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt struct v2::Popcnt("`popcnt`"): "popcnt" + [] fn uses_popcnt ), f!( - /// ["rdrand"] --- Read random number + /// [`rdrand`] --- Read random number /// - /// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand + /// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand struct crypto::Rdrand("`rdrand`"): "rdrand" + [] fn uses_rdrand ), f!( - /// ["rdseed"] --- Read random seed + /// [`rdseed`] --- Read random seed /// - /// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand + /// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand struct crypto::Rdseed("`rdseed`"): "rdseed" + [] fn uses_rdseed ), @@ -394,28 +394,28 @@ pub(crate) const X86_FEATURES: &[Feature] = &[ f!( /// [`xsave`] --- Save processor extended states /// - /// ["xsave"]: https://www.felixcloutier.com/x86/xsave + /// [`xsave`]: https://www.felixcloutier.com/x86/xsave struct xsave::Xsave("`xsave`"): "xsave" + [] fn uses_xsave ), f!( - /// ["xsavec"] --- Save processor extended states with compaction + /// [`xsavec`] --- Save processor extended states with compaction /// - /// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec + /// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec struct xsave::Xsavec("`xsavec`"): "xsavec" + [] fn uses_xsavec ), f!( - /// ["xsaveopt"] --- Save processor extended states optimized + /// [`xsaveopt`] --- Save processor extended states optimized /// - /// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt + /// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt struct xsave::Xsaveopt("`xsaveopt`"): "xsaveopt" + [] fn uses_xsaveopt ), f!( - /// ["xsaves"] --- Save processor extended states supervisor + /// [`xsaves`] --- Save processor extended states supervisor /// - /// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves + /// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves struct xsave::Xsaves("`xsaves`"): "xsaves" + [] fn uses_xsaves ), diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 67ce0e0a4..0638e8e67 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -18,8 +18,8 @@ //! //! # Examples //! -//! At the time of writing, it is not possible to turn scalar values into SIMD vector types safely using -//! only the standard library. +//! At the time of writing, it is not possible to turn scalar values into SIMD +//! vector types safely using only the standard library. //! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. //! //! diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs index 3c74dc60a..0fc70629d 100644 --- a/fearless_simd_core/src/x86/adx/mod.rs +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -1,4 +1,4 @@ -//! The "adx" target feature. +//! The "adx" target feature, used for arbitrary precision integer addition. #[expect( clippy::module_inception, diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs index f047d0553..3261000ea 100644 --- a/fearless_simd_core/src/x86/avx/mod.rs +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -1,3 +1,10 @@ +//! Target features related to the Advanced Vector Extensions target features (before AVX-512). +//! +//! These are most commonly used through the [x86-64-v3](crate::x86::V3) microarchitecture level. +//! +//! These support SIMD registers of up to 256 bits. +//! For the 512 bit extension, see [`avx512`](crate::x86::avx512). + #[expect( clippy::module_inception, reason = "The inner module is automatically generated." diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs index 1044da40c..abc53fde5 100644 --- a/fearless_simd_core/src/x86/avx512/mod.rs +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -1,3 +1,9 @@ +//! Target features related to the 512-bit extensions to [AVX](crate::x86::avx). +//! +//! Many of these are part of the [x86-64-v4](crate::x86::V4) microarchitecture level. +//! +//! These support SIMD registers of up to 512 bits. + mod avx512bf16; pub use avx512bf16::Avx512bf16; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs index cfb34d3b4..39a3c923d 100644 --- a/fearless_simd_core/src/x86/crypto/mod.rs +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -1,3 +1,7 @@ +//! Cryptogryphy related target features, including hashing, random number generation, and encryption. +//! +//! These are not generally part of the standardised microarchitecture levels. + mod aes; pub use aes::Aes; diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index ca80c1413..4ad6e3765 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["pclmulqdq"] --- Packed carry-less multiplication quadword +/// [`pclmulqdq`] --- Packed carry-less multiplication quadword /// -/// ["pclmulqdq"]: https://www.felixcloutier.com/x86/pclmulqdq +/// [`pclmulqdq`]: https://www.felixcloutier.com/x86/pclmulqdq /// /// A token indicating that the current CPU has the `pclmulqdq` target feature. /// diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 46d05c8c6..38d211bb1 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["rdrand"] --- Read random number +/// [`rdrand`] --- Read random number /// -/// ["rdrand"]: https://en.wikipedia.org/wiki/RdRand +/// [`rdrand`]: https://en.wikipedia.org/wiki/RdRand /// /// A token indicating that the current CPU has the `rdrand` target feature. /// diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index a4ba70f3e..08730295a 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["rdseed"] --- Read random seed +/// [`rdseed`] --- Read random seed /// -/// ["rdseed"]: https://en.wikipedia.org/wiki/RdRand +/// [`rdseed`]: https://en.wikipedia.org/wiki/RdRand /// /// A token indicating that the current CPU has the `rdseed` target feature. /// diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index 7869229d5..ed82fed00 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -1,3 +1,10 @@ +//! Discontinued x86-64 target features. +//! +//! That is target features which were present on some CPUs, but later CPU families from the +//! same vendor did not include them. +//! +//! For more information, see + // These will be stabilised in 1.91. // mod tbm; // pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 77ec914b0..206589e84 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -1,14 +1,21 @@ //! Target feature tokens for the x86 and x86-64 CPU families. //! -//! The general compuotation CPU features associated with each [microarchitecture level] can -//! be found in their corresponding modules: +//! The general computation [microarchitecture level]s each have a level in this module. +//! These levels are useful for most users of this crate, as they provide useful categories +//! of supported instructions. //! -//! - [`v1`] for x86-64-v1. -//! - [`v2`] for x86-64-v2. -//! - [`v3`] for x86-64-v3. -//! - [`v4`] for x86-64-v4. +//! - [`V1`] for x86-64-v1. +//! - [`V2`] for x86-64-v2. +//! - [`V3`] for x86-64-v3. +//! - [`V4`] for x86-64-v4. //! -//! Tokens for target features which not associated with these levels can be found in this module. +//! We don't yet provide a way to select the best of these for the current CPU, +//! but that is planned. +//! +//! Tokens for individual target features, including those not associated with these levels, +//! can be found in the modules under this feature. +//! These are less likely to be directly useful for most users, but are provided for use +//! cases which require them (probably especially those under [`crypto`]). //! //! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index a36ec7099..c1315c39f 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -7,10 +7,10 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["fxsave"] and ["fxrstor"] --- Save and restore x87 FPU, MMX Technology, and SSE State +/// [`fxsave`] and [`fxrstor`] --- Save and restore x87 FPU, MMX Technology, and SSE State /// -/// ["fxsave"]: https://www.felixcloutier.com/x86/fxsave, -/// ["fxrstor"]: https://www.felixcloutier.com/x86/fxrstor, +/// [`fxsave`]: https://www.felixcloutier.com/x86/fxsave, +/// [`fxrstor`]: https://www.felixcloutier.com/x86/fxrstor, /// /// A token indicating that the current CPU has the `fxsr` target feature. /// diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index 0470258b2..03e0320ca 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -1,3 +1,12 @@ +//! Target features related to Streaming SIMD Extensions. +//! +//! These are the predecessors to the [AVX](crate::x86::avx) instructions. +//! +//! These are most commonly used through the [x86-64-v2](crate::x86::V2) microarchitecture level. +//! Some of these features are also included in [x86-64-v1](crate::x86::V1). +//! +//! These support SIMD registers of up to 128 bits. + mod fxsr; pub use fxsr::Fxsr; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index b0475fad3..f3562cab5 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -108,6 +108,7 @@ impl From for crate::x86::sse::Sse3 { } } + const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 1a6309e0d..02d27c5aa 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -1,6 +1,11 @@ //! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! -//! This is the baseline for x86-64 support. +//! This can usually be treated as the baseline for x86-64 support; all of the target features in this module are enabled by +//! default on Rust's x86-64 targets (such as `x86_64-unknown-linux-gnu`). +//! +//! This module also contains [`V1`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. pub use crate::x86::sse::Fxsr; pub use crate::x86::sse::Sse; diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index 42415dc41..dc123141e 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["cmpxchg16b"] --- Compares and exchange 16 bytes (128 bits) of data atomically +/// [`cmpxchg16b`] --- Compares and exchange 16 bytes (128 bits) of data atomically /// -/// ["cmpxchg16b"]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b +/// [`cmpxchg16b`]: https://www.felixcloutier.com/x86/cmpxchg8b:cmpxchg16b /// /// A token indicating that the current CPU has the `cmpxchg16b` target feature. /// diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs index 60c6a6516..666414b25 100644 --- a/fearless_simd_core/src/x86/v2/mod.rs +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -1,8 +1,15 @@ +//! Target features enabled in the `x86-64-v2` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V2`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v1` is available. + pub use crate::x86::sse::Sse3; pub use crate::x86::sse::Sse4_1; pub use crate::x86::sse::Sse4_2; pub use crate::x86::sse::SupplementalSse3; -// TODO: Do we actually want to re-export from the previous level here? pub use crate::x86::v1::Fxsr; pub use crate::x86::v1::Sse; pub use crate::x86::v1::Sse2; diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index e5666b6a5..0a81347a0 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["popcnt"] --- Count of bits set to 1 +/// [`popcnt`] --- Count of bits set to 1 /// -/// ["popcnt"]: https://www.felixcloutier.com/x86/popcnt +/// [`popcnt`]: https://www.felixcloutier.com/x86/popcnt /// /// A token indicating that the current CPU has the `popcnt` target feature. /// diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index c92a0c9fa..e9c629dc4 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["lzcnt"] --- Leading zeros count +/// [`lzcnt`] --- Leading zeros count /// -/// ["lzcnt"]: https://www.felixcloutier.com/x86/lzcnt +/// [`lzcnt`]: https://www.felixcloutier.com/x86/lzcnt /// /// A token indicating that the current CPU has the `lzcnt` target feature. /// diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs index 8c158a702..1a3ec5ed5 100644 --- a/fearless_simd_core/src/x86/v3/mod.rs +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -1,3 +1,11 @@ +//! Target features enabled in the `x86-64-v3` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V3`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v2` is available. + pub use crate::x86::v1::Fxsr; pub use crate::x86::v1::Sse; pub use crate::x86::v1::Sse2; diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index cea0faf98..f5270f4d6 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["movbe"] --- Move data after swapping bytes +/// [`movbe`] --- Move data after swapping bytes /// -/// ["movbe"]: https://www.felixcloutier.com/x86/movbe +/// [`movbe`]: https://www.felixcloutier.com/x86/movbe /// /// A token indicating that the current CPU has the `movbe` target feature. /// diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs index e401ecb2f..8946fdfb4 100644 --- a/fearless_simd_core/src/x86/v4/mod.rs +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -1,3 +1,11 @@ +//! Target features enabled in the `x86-64-v4` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. +//! +//! This module also contains [`V4`], which is a token indicating that this level is available. +//! All tokens in this module can be created [`From`] that token. +//! This is re-exported in the parent module, and in most cases that shorter path should be preferred. +//! +//! This level also implies that `x86-64-v3` is available. + pub use crate::x86::v1::Fxsr; pub use crate::x86::v1::Sse; pub use crate::x86::v1::Sse2; diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs index aee24a743..f75aaa4c7 100644 --- a/fearless_simd_core/src/x86/xsave/mod.rs +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -1,3 +1,5 @@ +//! Target features relating to saving processor state, as used to implement operating systems. + #[expect( clippy::module_inception, reason = "The inner module is automatically generated." diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 19c8f2f51..1fba1b9ec 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -9,7 +9,7 @@ use core::fmt::Debug; /// [`xsave`] --- Save processor extended states /// -/// ["xsave"]: https://www.felixcloutier.com/x86/xsave +/// [`xsave`]: https://www.felixcloutier.com/x86/xsave /// /// A token indicating that the current CPU has the `xsave` target feature. /// diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index df8033d1f..1fa011864 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["xsavec"] --- Save processor extended states with compaction +/// [`xsavec`] --- Save processor extended states with compaction /// -/// ["xsavec"]: https://www.felixcloutier.com/x86/xsavec +/// [`xsavec`]: https://www.felixcloutier.com/x86/xsavec /// /// A token indicating that the current CPU has the `xsavec` target feature. /// diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 563c26c56..7dab10877 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["xsaveopt"] --- Save processor extended states optimized +/// [`xsaveopt`] --- Save processor extended states optimized /// -/// ["xsaveopt"]: https://www.felixcloutier.com/x86/xsaveopt +/// [`xsaveopt`]: https://www.felixcloutier.com/x86/xsaveopt /// /// A token indicating that the current CPU has the `xsaveopt` target feature. /// diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index 8f365d8cd..d26309bc7 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -7,9 +7,9 @@ use crate::{TargetFeatureToken, trampoline}; use core::fmt::Debug; -/// ["xsaves"] --- Save processor extended states supervisor +/// [`xsaves`] --- Save processor extended states supervisor /// -/// ["xsaves"]: https://www.felixcloutier.com/x86/xsaves +/// [`xsaves`]: https://www.felixcloutier.com/x86/xsaves /// /// A token indicating that the current CPU has the `xsaves` target feature. /// From 060c5bc3d30adfaa8c14b6cd6ccece8dea15e952 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:23:43 +0100 Subject: [PATCH 14/19] Add copyright headers --- fearless_simd_core/gen/src/data.rs | 3 +++ fearless_simd_core/gen/src/data/x86.rs | 3 +++ fearless_simd_core/gen/src/main.rs | 20 +++++++++---------- fearless_simd_core/gen/templates/aarch64.rs | 4 ++++ fearless_simd_core/gen/templates/x86.rs | 5 +++++ fearless_simd_core/gen/templates/x86_level.rs | 5 +++++ fearless_simd_core/src/lib.rs | 3 +++ fearless_simd_core/src/trampoline.rs | 3 +++ fearless_simd_core/src/x86/adx/adx.rs | 4 +++- fearless_simd_core/src/x86/adx/mod.rs | 3 +++ fearless_simd_core/src/x86/avx/avx.rs | 4 +++- fearless_simd_core/src/x86/avx/avx2.rs | 4 +++- fearless_simd_core/src/x86/avx/avxifma.rs | 4 +++- .../src/x86/avx/avxneconvert.rs | 4 +++- fearless_simd_core/src/x86/avx/avxvnni.rs | 4 +++- .../src/x86/avx/avxvnniint16.rs | 4 +++- fearless_simd_core/src/x86/avx/avxvnniint8.rs | 4 +++- fearless_simd_core/src/x86/avx/mod.rs | 3 +++ .../src/x86/avx512/avx512bf16.rs | 4 +++- .../src/x86/avx512/avx512bitalg.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512bw.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512cd.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512dq.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512f.rs | 4 +++- .../src/x86/avx512/avx512fp16.rs | 4 +++- .../src/x86/avx512/avx512ifma.rs | 4 +++- .../src/x86/avx512/avx512vbmi.rs | 4 +++- .../src/x86/avx512/avx512vbmi2.rs | 4 +++- fearless_simd_core/src/x86/avx512/avx512vl.rs | 4 +++- .../src/x86/avx512/avx512vnni.rs | 4 +++- .../src/x86/avx512/avx512vp2intersect.rs | 4 +++- .../src/x86/avx512/avx512vpopcntdq.rs | 4 +++- fearless_simd_core/src/x86/avx512/mod.rs | 3 +++ fearless_simd_core/src/x86/crypto/aes.rs | 4 +++- fearless_simd_core/src/x86/crypto/gfni.rs | 4 +++- fearless_simd_core/src/x86/crypto/kl.rs | 4 +++- fearless_simd_core/src/x86/crypto/mod.rs | 3 +++ .../src/x86/crypto/pclmulqdq.rs | 4 +++- fearless_simd_core/src/x86/crypto/rdrand.rs | 4 +++- fearless_simd_core/src/x86/crypto/rdseed.rs | 4 +++- fearless_simd_core/src/x86/crypto/sha.rs | 4 +++- fearless_simd_core/src/x86/crypto/sha512.rs | 4 +++- fearless_simd_core/src/x86/crypto/sm3.rs | 4 +++- fearless_simd_core/src/x86/crypto/sm4.rs | 4 +++- fearless_simd_core/src/x86/crypto/vaes.rs | 4 +++- .../src/x86/crypto/vpclmulqdq.rs | 4 +++- fearless_simd_core/src/x86/crypto/widekl.rs | 4 +++- .../src/x86/discontinued/mod.rs | 3 +++ .../src/x86/discontinued/tbm.rs | 4 +++- fearless_simd_core/src/x86/mod.rs | 3 +++ fearless_simd_core/src/x86/sse/fxsr.rs | 4 +++- fearless_simd_core/src/x86/sse/mod.rs | 3 +++ fearless_simd_core/src/x86/sse/sse.rs | 4 +++- fearless_simd_core/src/x86/sse/sse2.rs | 4 +++- fearless_simd_core/src/x86/sse/sse3.rs | 4 +++- fearless_simd_core/src/x86/sse/sse4_1.rs | 4 +++- fearless_simd_core/src/x86/sse/sse4_2.rs | 4 +++- fearless_simd_core/src/x86/sse/sse4a.rs | 4 +++- fearless_simd_core/src/x86/sse/ssse3.rs | 4 +++- fearless_simd_core/src/x86/v1/level.rs | 3 +++ fearless_simd_core/src/x86/v1/mod.rs | 3 +++ fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 4 +++- fearless_simd_core/src/x86/v2/level.rs | 3 +++ fearless_simd_core/src/x86/v2/mod.rs | 3 +++ fearless_simd_core/src/x86/v2/popcnt.rs | 4 +++- fearless_simd_core/src/x86/v3/bmi1.rs | 4 +++- fearless_simd_core/src/x86/v3/bmi2.rs | 4 +++- fearless_simd_core/src/x86/v3/f16c.rs | 4 +++- fearless_simd_core/src/x86/v3/fma.rs | 4 +++- fearless_simd_core/src/x86/v3/level.rs | 3 +++ fearless_simd_core/src/x86/v3/lzcnt.rs | 4 +++- fearless_simd_core/src/x86/v3/mod.rs | 3 +++ fearless_simd_core/src/x86/v3/movbe.rs | 4 +++- fearless_simd_core/src/x86/v4/level.rs | 3 +++ fearless_simd_core/src/x86/v4/mod.rs | 3 +++ fearless_simd_core/src/x86/xsave/mod.rs | 3 +++ fearless_simd_core/src/x86/xsave/xsave.rs | 4 +++- fearless_simd_core/src/x86/xsave/xsavec.rs | 4 +++- fearless_simd_core/src/x86/xsave/xsaveopt.rs | 4 +++- fearless_simd_core/src/x86/xsave/xsaves.rs | 4 +++- 80 files changed, 252 insertions(+), 66 deletions(-) diff --git a/fearless_simd_core/gen/src/data.rs b/fearless_simd_core/gen/src/data.rs index fe46d862b..fb5726bb9 100644 --- a/fearless_simd_core/gen/src/data.rs +++ b/fearless_simd_core/gen/src/data.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + mod x86; pub(crate) use x86::{ X86_FEATURES, X86_LEVEL_TEMPLATE, X86_TEMPLATE, X86_V1, X86_V2, X86_V3, X86_V4, diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index e2b1e9f21..1b96b5a84 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + use crate::Feature; macro_rules! f { diff --git a/fearless_simd_core/gen/src/main.rs b/fearless_simd_core/gen/src/main.rs index c3ef8b9e2..35d8c233c 100644 --- a/fearless_simd_core/gen/src/main.rs +++ b/fearless_simd_core/gen/src/main.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + mod data; use std::collections::HashSet; @@ -75,13 +78,10 @@ impl From for {type_path} {{ }}\n" ).unwrap(); } - let mut result = format!( - "// This file is automatically generated by `fearless_simd_core_gen`.\n\ - // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ - {template}" - ); + let mut result = String::from(template); // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("// {AUTOGEN_COMMENT}\n", AUTOGEN_COMMENT); result = result.replace("{FEATURE_DOCS_NAME}", feature.feature.feature_docs_name); result = result.replace("/// {NEW_DOCS}\n", &new_docs); result = result.replace("{FEATURE_ID}", feature.feature.feature_name); @@ -235,13 +235,10 @@ impl From for {type_path} {{ ).unwrap(); } - let mut result = format!( - "// This file is automatically generated by `fearless_simd_core_gen`.\n\ - // Its template can be found in `fearless_simd_core/gen/templates`.\n\n\ - {X86_LEVEL_TEMPLATE}" - ); + let mut result = String::from(X86_LEVEL_TEMPLATE); // We replace the from impls first, as they use template variables from the rest of this. result = result.replace("/*{FROM_IMPLS}*/", &from_impls); + result = result.replace("// {AUTOGEN_COMMENT}", AUTOGEN_COMMENT); result = result.replace("LEVEL_STRUCT_NAME", &level_struct_name); result = result.replace("{LEVEL_ID}", level); result = result.replace("{LEVEL_FEATURE_LCD_CONTENTS}", &lcd_contents); @@ -269,6 +266,9 @@ impl From for {type_path} {{ Ok(()) } +const AUTOGEN_COMMENT: &str = "// This file is automatically generated by `fearless_simd_core_gen`.\n\ + // Its template can be found in `fearless_simd_core/gen/templates`."; + #[derive(Debug)] struct Feature { /// The name of the struct to be generated. diff --git a/fearless_simd_core/gen/templates/aarch64.rs b/fearless_simd_core/gen/templates/aarch64.rs index e69de29bb..e8fbb89aa 100644 --- a/fearless_simd_core/gen/templates/aarch64.rs +++ b/fearless_simd_core/gen/templates/aarch64.rs @@ -0,0 +1,4 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 4c170fc8d..918a05331 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -1,3 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} + //! The {FEATURE_DOCS_NAME} target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs index c02124118..4aabad264 100644 --- a/fearless_simd_core/gen/templates/x86_level.rs +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -1,3 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// {AUTOGEN_COMMENT} + //! The x86-64-{LEVEL_ID} microarchitecture level. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 0638e8e67..7e3efbfb2 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! An abstraction to allow safely running custom `#[target_feature]` functions on stable Rust. //! //! This crate introduces the [`trampoline!`] macro, which allows running code in a diff --git a/fearless_simd_core/src/trampoline.rs b/fearless_simd_core/src/trampoline.rs index 14c73f62d..dde5a6250 100644 --- a/fearless_simd_core/src/trampoline.rs +++ b/fearless_simd_core/src/trampoline.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Support for the safety checks in [`trampoline!`](crate::trampoline!). //! //! Methods to compute whether a each feature in a target feature string (e.g. "sse2,fma") diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs index 10e7b599e..4cd2cd11e 100644 --- a/fearless_simd_core/src/x86/adx/adx.rs +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The ADX target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/adx/mod.rs b/fearless_simd_core/src/x86/adx/mod.rs index 0fc70629d..22a8b231b 100644 --- a/fearless_simd_core/src/x86/adx/mod.rs +++ b/fearless_simd_core/src/x86/adx/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! The "adx" target feature, used for arbitrary precision integer addition. #[expect( diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs index 368047220..9e03dd5d8 100644 --- a/fearless_simd_core/src/x86/avx/avx.rs +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs index d3fec9f9c..2d2435797 100644 --- a/fearless_simd_core/src/x86/avx/avx2.rs +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs index c6e1964da..d356ee462 100644 --- a/fearless_simd_core/src/x86/avx/avxifma.rs +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-IFMA target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs index 9f94fc893..41f670166 100644 --- a/fearless_simd_core/src/x86/avx/avxneconvert.rs +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-NE-CONVERT target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs index 5e10181ea..ed631a653 100644 --- a/fearless_simd_core/src/x86/avx/avxvnni.rs +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-VNNI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs index dab234603..270472f2d 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint16.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-VNNI-INT16 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs index ff8876602..907d49235 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint8.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX-VNNI-INT8 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx/mod.rs b/fearless_simd_core/src/x86/avx/mod.rs index 3261000ea..65fe57578 100644 --- a/fearless_simd_core/src/x86/avx/mod.rs +++ b/fearless_simd_core/src/x86/avx/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features related to the Advanced Vector Extensions target features (before AVX-512). //! //! These are most commonly used through the [x86-64-v3](crate::x86::V3) microarchitecture level. diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs index 705252a5a..ec9329682 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bf16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-BF16 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs index 578fd8837..d88e55826 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-BITALG target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs index 29b058298..9a07e5faf 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bw.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-BW target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs index e7ed2389b..cce3c7662 100644 --- a/fearless_simd_core/src/x86/avx512/avx512cd.rs +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-CD target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs index 92b8c87b8..5b444aa1c 100644 --- a/fearless_simd_core/src/x86/avx512/avx512dq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-DQ target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs index bad51a083..6dfa381b7 100644 --- a/fearless_simd_core/src/x86/avx512/avx512f.rs +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-F target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs index b3ba12d53..9a03a7002 100644 --- a/fearless_simd_core/src/x86/avx512/avx512fp16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-FP16 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs index 330e16a8e..1c8866f48 100644 --- a/fearless_simd_core/src/x86/avx512/avx512ifma.rs +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-IFMA target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs index 2811eb146..3e5dde518 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VBMI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs index aa209c2d2..d1187a917 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VBMI2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs index 4089a4df4..aa6b95b54 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vl.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VL target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs index 9703f9b64..d58569970 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vnni.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VNNI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs index 6a3bdd177..676ec5806 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VP2INTERSECT target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs index ada57947c..c1b39eee4 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AVX512-VPOPCNTDQ target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/avx512/mod.rs b/fearless_simd_core/src/x86/avx512/mod.rs index abc53fde5..5a0ca606c 100644 --- a/fearless_simd_core/src/x86/avx512/mod.rs +++ b/fearless_simd_core/src/x86/avx512/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features related to the 512-bit extensions to [AVX](crate::x86::avx). //! //! Many of these are part of the [x86-64-v4](crate::x86::V4) microarchitecture level. diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs index afe9a2eed..bcc8145de 100644 --- a/fearless_simd_core/src/x86/crypto/aes.rs +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The AES target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs index 3e00a9238..8267392b1 100644 --- a/fearless_simd_core/src/x86/crypto/gfni.rs +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The GFNI target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs index 722eb6db4..c1023e401 100644 --- a/fearless_simd_core/src/x86/crypto/kl.rs +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The KEYLOCKER target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/mod.rs b/fearless_simd_core/src/x86/crypto/mod.rs index 39a3c923d..7154cf890 100644 --- a/fearless_simd_core/src/x86/crypto/mod.rs +++ b/fearless_simd_core/src/x86/crypto/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Cryptogryphy related target features, including hashing, random number generation, and encryption. //! //! These are not generally part of the standardised microarchitecture levels. diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index 4ad6e3765..357f8e4f6 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `pclmulqdq` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 38d211bb1..0f3800a00 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `rdrand` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index 08730295a..b7ca6b56a 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `rdseed` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs index 3479ce3da..3e9e2cabd 100644 --- a/fearless_simd_core/src/x86/crypto/sha.rs +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SHA target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs index 818ef8849..9aff5b820 100644 --- a/fearless_simd_core/src/x86/crypto/sha512.rs +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SHA512 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs index fff96832e..2ad3d2ec8 100644 --- a/fearless_simd_core/src/x86/crypto/sm3.rs +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SM3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs index 66a48b3d7..14479e316 100644 --- a/fearless_simd_core/src/x86/crypto/sm4.rs +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SM4 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs index 16ddb321e..9e634b776 100644 --- a/fearless_simd_core/src/x86/crypto/vaes.rs +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The VAES target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs index 342af9d31..66e135b29 100644 --- a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The VPCLMULQDQ target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs index a9601bde8..1acfb8d94 100644 --- a/fearless_simd_core/src/x86/crypto/widekl.rs +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The WIDE KEYLOCKER target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index ed82fed00..f1cc4f63f 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Discontinued x86-64 target features. //! //! That is target features which were present on some CPUs, but later CPU families from the diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index 572f756ef..ab875c82f 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The TBM target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 206589e84..951a532c2 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target feature tokens for the x86 and x86-64 CPU families. //! //! The general computation [microarchitecture level]s each have a level in this module. diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index c1315c39f..d7dfe7910 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `fxsave + fxrstor` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index 03e0320ca..ecd7d84b7 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features related to Streaming SIMD Extensions. //! //! These are the predecessors to the [AVX](crate::x86::avx) instructions. diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs index ad0205776..8f40bb76e 100644 --- a/fearless_simd_core/src/x86/sse/sse.rs +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs index c86ce42fb..089bda1a8 100644 --- a/fearless_simd_core/src/x86/sse/sse2.rs +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs index 27789c9ae..c86b454c2 100644 --- a/fearless_simd_core/src/x86/sse/sse3.rs +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs index 3f2b75bce..cbce281fd 100644 --- a/fearless_simd_core/src/x86/sse/sse4_1.rs +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE4.1 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs index 0794c2e16..6054559dd 100644 --- a/fearless_simd_core/src/x86/sse/sse4_2.rs +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE4.2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index f3562cab5..d6bf87695 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSE4a target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs index e5a214b9e..45305bf79 100644 --- a/fearless_simd_core/src/x86/sse/ssse3.rs +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The SSSE3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs index 86fc61412..889db039a 100644 --- a/fearless_simd_core/src/x86/v1/level.rs +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v1/mod.rs b/fearless_simd_core/src/x86/v1/mod.rs index 02d27c5aa..58d5dcf88 100644 --- a/fearless_simd_core/src/x86/v1/mod.rs +++ b/fearless_simd_core/src/x86/v1/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v1` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This can usually be treated as the baseline for x86-64 support; all of the target features in this module are enabled by diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index dc123141e..1b6d002a1 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `cmpxchg16b` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs index 5f01a232c..97c66b7b9 100644 --- a/fearless_simd_core/src/x86/v2/level.rs +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v2/mod.rs b/fearless_simd_core/src/x86/v2/mod.rs index 666414b25..a5032b1b1 100644 --- a/fearless_simd_core/src/x86/v2/mod.rs +++ b/fearless_simd_core/src/x86/v2/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v2` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This module also contains [`V2`], which is a token indicating that this level is available. diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index 0a81347a0..d7c71c759 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `popcnt` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs index 92362dbe9..f887a9dcf 100644 --- a/fearless_simd_core/src/x86/v3/bmi1.rs +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The 1 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs index 55b97dccc..af3eeefe2 100644 --- a/fearless_simd_core/src/x86/v3/bmi2.rs +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The BMI2 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs index aacb30e90..53ad83186 100644 --- a/fearless_simd_core/src/x86/v3/f16c.rs +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The F16C target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs index e051013bd..44d017091 100644 --- a/fearless_simd_core/src/x86/v3/fma.rs +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The FMA3 target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs index ad607a4f4..f24edb4e1 100644 --- a/fearless_simd_core/src/x86/v3/level.rs +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index e9c629dc4..d5283fc69 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `lzcnt` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v3/mod.rs b/fearless_simd_core/src/x86/v3/mod.rs index 1a3ec5ed5..c67583a9f 100644 --- a/fearless_simd_core/src/x86/v3/mod.rs +++ b/fearless_simd_core/src/x86/v3/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v3` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This module also contains [`V3`], which is a token indicating that this level is available. diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index f5270f4d6..91ee22bbb 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `movbe` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs index b3acc6f1c..1e6cad459 100644 --- a/fearless_simd_core/src/x86/v4/level.rs +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. diff --git a/fearless_simd_core/src/x86/v4/mod.rs b/fearless_simd_core/src/x86/v4/mod.rs index 8946fdfb4..7f3cd1ee2 100644 --- a/fearless_simd_core/src/x86/v4/mod.rs +++ b/fearless_simd_core/src/x86/v4/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features enabled in the `x86-64-v4` [microarchitecture level](https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels) on x86 and x86-64. //! //! This module also contains [`V4`], which is a token indicating that this level is available. diff --git a/fearless_simd_core/src/x86/xsave/mod.rs b/fearless_simd_core/src/x86/xsave/mod.rs index f75aaa4c7..406dd54cd 100644 --- a/fearless_simd_core/src/x86/xsave/mod.rs +++ b/fearless_simd_core/src/x86/xsave/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! Target features relating to saving processor state, as used to implement operating systems. #[expect( diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 1fba1b9ec..1507efb7a 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsave` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index 1fa011864..b84f79126 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsavec` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 7dab10877..3454d9ca2 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsaveopt` target feature. use crate::{TargetFeatureToken, trampoline}; diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index d26309bc7..c5277468d 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -1,6 +1,8 @@ +// Copyright 2025 the Fearless_SIMD Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + // This file is automatically generated by `fearless_simd_core_gen`. // Its template can be found in `fearless_simd_core/gen/templates`. - //! The `xsaves` target feature. use crate::{TargetFeatureToken, trampoline}; From 674227ec968d2471da00e6e3c101baaae9c31128 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Thu, 16 Oct 2025 15:32:31 +0100 Subject: [PATCH 15/19] Add a CI check for the new generator --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5c1919d0d..229965e21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -192,6 +192,13 @@ jobs: - name: run code generator run: cargo run --bin fearless_simd_gen + - name: run core code generator + run: cargo run --bin fearless_simd_core_gen + + - name: Reformat (Fearless SIMD Core) + # The code generator for Fearless SIMD Core does not do this. + run: cargo fmt -p fearless_simd_core + - name: check for uncommitted changes run: git diff --exit-code From 769e44e4a1f9482bcb6bb63b81da6f2b419101d3 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 17 Oct 2025 13:31:08 +0100 Subject: [PATCH 16/19] Fixup docs on `vectorize` --- fearless_simd_core/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 7e3efbfb2..418b49d93 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -94,9 +94,9 @@ pub unsafe trait TargetFeatureToken: Copy { /// /// `f` must be marked `#[inline(always)]` for this to work. /// - /// Note that this does *not* enable the target features on the Rust side (e.g. for calling). + /// Note that this does *not* enable the target features on the Rust side (i.e. for calling intrinsics safely). /// To do so, you should instead use [`trampoline!`] directly - this is a convenience wrapper around `trampoline` - /// for cases where the dispatch of simd values is handled elsewhere. + /// for cases where either autovectorisation is sufficient, or dispatch to simd intrinsics is handled elsewhere. fn vectorize(self, f: impl FnOnce() -> R) -> R; } From af91c1e90f80d51b242fb3ee7d8aa3a3908299ca Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:56:10 +0100 Subject: [PATCH 17/19] Address various review feedback and other cleanups Rename `trampoline.rs` to `support.rs` The old name conflicted with the name of the macro, leading to it being harder to find the docs of the macro itself. Remove unneeded reference Remove entire note on 128 bytes being too small The point it was making was: - Fairly hard to explain - Not necessarily true Add a few more test cases Co-authored-by: Taj Pereira --- fearless_simd_core/src/lib.rs | 81 +++++++++++-------- .../src/{trampoline.rs => support.rs} | 19 ++++- 2 files changed, 64 insertions(+), 36 deletions(-) rename fearless_simd_core/src/{trampoline.rs => support.rs} (91%) diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index 418b49d93..d5bcf004f 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -11,30 +11,32 @@ //! other `#[target_feature]` functions. //! As such, once you have used the [`trampoline!`] macro, you can call any intrinsic in [`core::arch`]. //! -//! This crate also has modules which contain tokens for each Rust target features. -//! These allow safely validating that a target feature is available, and obtaining a token. +//! This crate also has modules which contain a token for each Rust target feature. +//! These each have a `try_new` constructor, which validates whether the corresponding +//! target feature is available, then creates a token if it is. //! These are grouped by architecture: //! //! - [`x86`] contains the tokens for both the x86 and x86-64 targets. -//! It also contains tokens for each x86-64 microarchitecture level, see [`x86::V1`] for details. +//! It also contains a token for each x86-64 microarchitecture level, see [`x86::V1`] for details. //! //! //! # Examples //! -//! At the time of writing, it is not possible to turn scalar values into SIMD +//! At the time of writing, it is not possible to turn scalar values into SIMD //! vector types safely using only the standard library. //! These examples use [bytemuck](https://crates.io/crates/bytemuck) for this. //! -//! +//! Note: These examples are currently pending. +//! //! //! Note that for `aarch64`'s neon, you will want to enable bytemuck's `aarch64_simd` feature. //! This is also the case for WASM with `wasm_simd`, but note that this crate //! [isn't needed on WASM][attributes.codegen.target_feature.wasm], as it is safe to -//! call `#[target_features]` on that platform. +//! call `#[target_feature]` functions on that platform. //! //! # Crate Feature Flags //! -//! +//! //! //! # Implementation //! @@ -67,18 +69,18 @@ #[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] pub mod x86; -pub mod trampoline; +pub mod support; #[cfg(feature = "std")] extern crate std; -/// Token that a set of target feature is available. +/// Token which proves that a set of target feature is available. /// /// Note that this trait is only meaningful when there are values of this type. /// That is, to enable the target features in `FEATURES`, you *must* have a value /// of this type. /// -/// Values which implement this trait are used in the second argument to [`trampoline!`], +/// Values which implement this trait are used in the first argument to [`trampoline!`], /// which is a safe abstraction over enabling target features. /// /// # Safety @@ -104,43 +106,58 @@ pub unsafe trait TargetFeatureToken: Copy { /// /// This is effectively a stable implementation of the "Struct Target Features" Rust feature, /// which at the time of writing is neither in stable or nightly Rust. -/// This macro can be used to make SIMD dispatch safe in addition to make explicit SIMD, both safely. +/// This macro can be used to make both SIMD dispatch and explicit SIMD safe. /// /// # Reference /// -/// These reference examples presume that you have (values in brackets are the "variables"): +/// These reference examples presume that you have the following. +/// The parts of the examples referring to each prerequisite are provided in the brackets: /// -/// - An expression (`token`) of a type (`Token`) which is `TargetFeatureToken` for some target features (`"f1,f2,f3"`); -/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset of those target features (`"f1,f2"`); +/// - An expression (`token`) of a type (`Token`) which implements `TargetFeatureToken` for some target features (`"f1,f2,f3"`); +/// - A function (signature `fn uses_simd(val: [f32; 4]) -> [f32; 4]`) which is safe but enables a subset +/// of those target features (annotated `#[target_feature(enable = "f1,f2")]`); /// - Local values of types corresponding to the argument types (`a` of type `[f32; 4]`) /// /// ```rust,ignore -/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// trampoline!(Token = token => "f1,f2,f3", uses_simd(a: [f32; 4]) -> [f32; 4]) +/// // Or equivalently, as `uses_simd` doesn't require `f3`: +/// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]); /// ``` /// -/// Multiple tokens are also supported by providing them in a sequence in square brackets: +/// Multiple tokens are also supported by providing them in a sequence in square brackets. +/// The target feature string must be a subset of the total features made available by the tokens: /// /// ```rust,ignore /// trampoline!([Token = token, Sse = my_sse] => "f1,f2,sse", uses_simd(a: [f32; 4]) -> [f32; 4]) /// ``` /// +/// This is fully validated for safety, so the following example would fail to compile: +/// +/// ```rust,ignore,compile_fail +/// // ERROR: call to function `uses_simd` with `#[target_feature]` is unsafe and requires unsafe block +/// // in order for the call to be safe, the context requires the following additional target feature: f2 +/// trampoline!(Token = token => "f1", uses_simd(a: [f32; 4]) -> [f32; 4]); +/// ``` +/// /// A more advanced syntax is available if you need to use generics. -/// That syntax is explained in comments around the macro's definition, which can be seen above. +/// That syntax is explained in comments around the macro's definition. /// For reference, the implementation used to implement [`vectorize`](TargetFeatureToken::vectorize) for `"sse"` is: /// /// ```rust,ignore /// trampoline!([Sse = self] => "sse", <(R)> fn<(R)>(f: impl FnOnce() -> R = f) -> R { f() }) /// ``` /// -/// There is also support for where clauses after the return type. +/// There is also support for a where clause, after the return type. /// /// # Motivation /// -/// In Fearless SIMD, this macro has two primary use cases: +/// In Fearless SIMD, this macro is used in three ways primary use cases: /// -/// 1) To dispatch to a specialised SIMD implementation of a function using target specific -/// instructions which will be more efficient than generic version written using the portable subset. +/// 1) By end-users, to dispatch to a specialised SIMD implementation of a function using target specific +/// instructions, which will be more efficient than generic version written using the portable subset. /// 2) To implement the portable subset of SIMD operations. +/// 3) To implement the `dispatch!` macro and `Simd::vectorize`, which allows SIMD intrinsics to +/// be correctly inlined when writing portable SIMD code. /// /// To expand on use case 1, when using Fearless SIMD you will often be writing functions which are /// instantiated for multiple different SIMD levels (using generics). @@ -176,12 +193,6 @@ pub unsafe trait TargetFeatureToken: Copy { /// trampoline!(Token = token => "f1,f2", uses_simd(a: [f32; 4]) -> [f32; 4]) /// ``` /// -/// Note that a function only operating on 128 bytes is probably too small for checking -/// whether a token exists just for it is worthwhile. -/// However, if you have amorphised the cost of that check between many function calls, -/// the `trampoline!` macro itself compiles down to a function call. -/// (This would be the case when this macro is being used to implement the portable subset of SIMD operations) -/// // TODO: We could write an example for each of ARM, x86, and conditionally compile it in? /// Note that our examples are all ignored as there is no target feature which is available on every platform, /// but we need these docs to compile for users on any platform. @@ -229,12 +240,14 @@ macro_rules! trampoline { // We validate that we actually have a token of each claimed type. let _: $token_type = $token; )+ - // We use a const item rather than a const block to ensure that. - // This does mean that you can no longer use tokens "generically", but it's hard to think of - // cases where that would be usable anyway. + // We use a const item rather than a const block to ensure that the const evaluation happens eagerly, + // ensuring that we don't create functions which look valid but actually will always fail when actually codegenned. + // This does mean that you can't use tokens "generically", but it's hard to think of cases where that + // would be usable anyway. For any case where that is valid, you can always manually create the + // "subsetted" token/tokens beforehand using the `From` impls. const _: () = { // And that the claimed types justify enabling the enabled target features. - $crate::trampoline::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) + $crate::support::is_feature_subset($to_enable, [$(<$token_type as $crate::TargetFeatureToken>::FEATURES),+]) // TODO: Better failure message here (i.e. at least concatting the set of requested features) .unwrap(); }; @@ -319,13 +332,13 @@ mod example_expansion { { sse_mul_f32s(a, b) } } let _: Sse = sse; - const { - crate::trampoline::is_feature_subset( + const _: () = { + crate::support::is_feature_subset( "sse", [::FEATURES], ) .unwrap(); - } + }; #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] let a = a; #[allow(clippy::redundant_locals, reason = "Required for consistency/safety.")] diff --git a/fearless_simd_core/src/trampoline.rs b/fearless_simd_core/src/support.rs similarity index 91% rename from fearless_simd_core/src/trampoline.rs rename to fearless_simd_core/src/support.rs index dde5a6250..96331d161 100644 --- a/fearless_simd_core/src/trampoline.rs +++ b/fearless_simd_core/src/support.rs @@ -45,7 +45,7 @@ impl SubsetResult { } /// Determine whether the features in the target feature string `required` are a subset of the features in `permitted`. -/// See the module level docs [self]. +/// See [the module level docs][self]. /// /// We require static lifetimes as this is primarily internal to the macro. pub const fn is_feature_subset( @@ -62,7 +62,7 @@ pub const fn is_feature_subset( } // `comma_idx` is now the index of the comma, e.g. if the string was "sse,", idx would be 3 // This is the feature we need to validate exists in permitted. - let (to_find, remaining_required) = &required_bytes.split_at(comma_idx); + let (to_find, remaining_required) = required_bytes.split_at(comma_idx); if let [comma, rest @ ..] = remaining_required { if *comma != b',' { panic!("Internal failure of expected behaviour."); @@ -199,6 +199,8 @@ mod tests { expect_failure("c,a,b", [&["a", "b"]], "c"); expect_success("a,b", [&["a", "b", "c"]]); expect_failure("a,b", [&["a", "c"]], "b"); + expect_success("a,b,a,a", [&["a", "b", "c"]]); + expect_success("a,b,c", [&["c"], &["b"], &["a"]]); // Check it correctly catches more than single item failures expect_success("a1,a2,a3", [&["a1", "a2", "a3"]]); @@ -220,10 +222,23 @@ mod tests { expect_failure("a1b,a2b", [&["a1b", "a3b"]], "a2b"); } + #[test] + fn incorrect_token() { + // The permitted list here only allows features which are the literal `a1,a2` + // This is completely impossible to pass, but it's worth checking + expect_any_failure("a1,a2", [&["a1,a2"]]); + } + #[test] fn empty_feature() { expect_failure("a,b,", [&["a", "b"]], ""); expect_failure("", [&["a", "b"]], ""); + + // We succeed if the empty target feature is allowed; any case where this is relevant will always + // be validated away by rustc anyway, as there is no target with the target feature `""`. + // As such, there's no harm in being flexible here. + expect_success("", [&[""]]); + expect_success(",,,,,,", [&[""]]); } #[test] From 355009b1b91acd6ce088d984f281454299f1525a Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:59:01 +0100 Subject: [PATCH 18/19] Clean up the stuff about licensing --- fearless_simd_core/gen/src/data/x86.rs | 5 ++--- fearless_simd_core/gen/templates/x86.rs | 1 - fearless_simd_core/gen/templates/x86_level.rs | 1 - fearless_simd_core/src/lib.rs | 1 - fearless_simd_core/src/support.rs | 2 +- fearless_simd_core/src/x86/adx/adx.rs | 1 - fearless_simd_core/src/x86/avx/avx.rs | 1 - fearless_simd_core/src/x86/avx/avx2.rs | 1 - fearless_simd_core/src/x86/avx/avxifma.rs | 1 - fearless_simd_core/src/x86/avx/avxneconvert.rs | 1 - fearless_simd_core/src/x86/avx/avxvnni.rs | 1 - fearless_simd_core/src/x86/avx/avxvnniint16.rs | 1 - fearless_simd_core/src/x86/avx/avxvnniint8.rs | 1 - fearless_simd_core/src/x86/avx512/avx512bf16.rs | 1 - fearless_simd_core/src/x86/avx512/avx512bitalg.rs | 1 - fearless_simd_core/src/x86/avx512/avx512bw.rs | 1 - fearless_simd_core/src/x86/avx512/avx512cd.rs | 1 - fearless_simd_core/src/x86/avx512/avx512dq.rs | 1 - fearless_simd_core/src/x86/avx512/avx512f.rs | 1 - fearless_simd_core/src/x86/avx512/avx512fp16.rs | 1 - fearless_simd_core/src/x86/avx512/avx512ifma.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vbmi.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vbmi2.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vl.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vnni.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs | 1 - fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs | 1 - fearless_simd_core/src/x86/crypto/aes.rs | 1 - fearless_simd_core/src/x86/crypto/gfni.rs | 1 - fearless_simd_core/src/x86/crypto/kl.rs | 1 - fearless_simd_core/src/x86/crypto/pclmulqdq.rs | 1 - fearless_simd_core/src/x86/crypto/rdrand.rs | 1 - fearless_simd_core/src/x86/crypto/rdseed.rs | 1 - fearless_simd_core/src/x86/crypto/sha.rs | 1 - fearless_simd_core/src/x86/crypto/sha512.rs | 1 - fearless_simd_core/src/x86/crypto/sm3.rs | 1 - fearless_simd_core/src/x86/crypto/sm4.rs | 1 - fearless_simd_core/src/x86/crypto/vaes.rs | 1 - fearless_simd_core/src/x86/crypto/vpclmulqdq.rs | 1 - fearless_simd_core/src/x86/crypto/widekl.rs | 1 - fearless_simd_core/src/x86/discontinued/tbm.rs | 1 - fearless_simd_core/src/x86/mod.rs | 7 +++++++ fearless_simd_core/src/x86/sse/fxsr.rs | 1 - fearless_simd_core/src/x86/sse/sse.rs | 1 - fearless_simd_core/src/x86/sse/sse2.rs | 1 - fearless_simd_core/src/x86/sse/sse3.rs | 1 - fearless_simd_core/src/x86/sse/sse4_1.rs | 1 - fearless_simd_core/src/x86/sse/sse4_2.rs | 1 - fearless_simd_core/src/x86/sse/sse4a.rs | 1 - fearless_simd_core/src/x86/sse/ssse3.rs | 1 - fearless_simd_core/src/x86/v1/level.rs | 1 - fearless_simd_core/src/x86/v2/cmpxchg16b.rs | 1 - fearless_simd_core/src/x86/v2/level.rs | 1 - fearless_simd_core/src/x86/v2/popcnt.rs | 1 - fearless_simd_core/src/x86/v3/bmi1.rs | 1 - fearless_simd_core/src/x86/v3/bmi2.rs | 1 - fearless_simd_core/src/x86/v3/f16c.rs | 1 - fearless_simd_core/src/x86/v3/fma.rs | 1 - fearless_simd_core/src/x86/v3/level.rs | 1 - fearless_simd_core/src/x86/v3/lzcnt.rs | 1 - fearless_simd_core/src/x86/v3/movbe.rs | 1 - fearless_simd_core/src/x86/v4/level.rs | 1 - fearless_simd_core/src/x86/xsave/xsave.rs | 1 - fearless_simd_core/src/x86/xsave/xsavec.rs | 1 - fearless_simd_core/src/x86/xsave/xsaveopt.rs | 1 - fearless_simd_core/src/x86/xsave/xsaves.rs | 1 - 66 files changed, 10 insertions(+), 67 deletions(-) diff --git a/fearless_simd_core/gen/src/data/x86.rs b/fearless_simd_core/gen/src/data/x86.rs index 1b96b5a84..841dac603 100644 --- a/fearless_simd_core/gen/src/data/x86.rs +++ b/fearless_simd_core/gen/src/data/x86.rs @@ -22,11 +22,10 @@ macro_rules! f { pub(crate) const X86_TEMPLATE: &str = include_str!("../../templates/x86.rs"); -// Data taken from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 +// Data adapted from: https://doc.rust-lang.org/reference/attributes/codegen.html#r-attributes.codegen.target_feature.x86 // (specifically, at https://github.com/rust-lang/reference/blob/1d930e1d5a27e114b4d22a50b0b6cd3771b92e31/src/attributes/codegen.md#x86-or-x86_64) -// TODO: Do we need to add their license attribution to our license? // TODO: Check set against https://doc.rust-lang.org/stable/std/macro.is_x86_feature_detected.html -// In particular, we're missing lahfsahf +// In particular, we seem to be missing lahfsahf (not stable?) pub(crate) const X86_FEATURES: &[Feature] = &[ f!( /// [ADX] --- Multi-Precision Add-Carry Instruction Extensions diff --git a/fearless_simd_core/gen/templates/x86.rs b/fearless_simd_core/gen/templates/x86.rs index 918a05331..777dcae38 100644 --- a/fearless_simd_core/gen/templates/x86.rs +++ b/fearless_simd_core/gen/templates/x86.rs @@ -59,7 +59,6 @@ impl FEATURE_STRUCT_NAME { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("{FEATURE_ID}") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/gen/templates/x86_level.rs b/fearless_simd_core/gen/templates/x86_level.rs index 4aabad264..51aaa6d9d 100644 --- a/fearless_simd_core/gen/templates/x86_level.rs +++ b/fearless_simd_core/gen/templates/x86_level.rs @@ -86,7 +86,6 @@ impl LEVEL_STRUCT_NAME { } } } -// TODO: From impls to convert into lower x86 versions. /*{FROM_IMPLS}*/ diff --git a/fearless_simd_core/src/lib.rs b/fearless_simd_core/src/lib.rs index d5bcf004f..924bbd0b9 100644 --- a/fearless_simd_core/src/lib.rs +++ b/fearless_simd_core/src/lib.rs @@ -65,7 +65,6 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![no_std] -// TODO: Do we want both an `x86` and `x86_64` module? #[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))] pub mod x86; diff --git a/fearless_simd_core/src/support.rs b/fearless_simd_core/src/support.rs index 96331d161..cf530b3af 100644 --- a/fearless_simd_core/src/support.rs +++ b/fearless_simd_core/src/support.rs @@ -236,7 +236,7 @@ mod tests { // We succeed if the empty target feature is allowed; any case where this is relevant will always // be validated away by rustc anyway, as there is no target with the target feature `""`. - // As such, there's no harm in being flexible here. + // As such, there's no harm in being flexible here.git expect_success("", [&[""]]); expect_success(",,,,,,", [&[""]]); } diff --git a/fearless_simd_core/src/x86/adx/adx.rs b/fearless_simd_core/src/x86/adx/adx.rs index 4cd2cd11e..339dbbd0e 100644 --- a/fearless_simd_core/src/x86/adx/adx.rs +++ b/fearless_simd_core/src/x86/adx/adx.rs @@ -61,7 +61,6 @@ impl Adx { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("adx") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avx.rs b/fearless_simd_core/src/x86/avx/avx.rs index 9e03dd5d8..3885ebbcd 100644 --- a/fearless_simd_core/src/x86/avx/avx.rs +++ b/fearless_simd_core/src/x86/avx/avx.rs @@ -61,7 +61,6 @@ impl Avx { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avx2.rs b/fearless_simd_core/src/x86/avx/avx2.rs index 2d2435797..9cfdaab61 100644 --- a/fearless_simd_core/src/x86/avx/avx2.rs +++ b/fearless_simd_core/src/x86/avx/avx2.rs @@ -63,7 +63,6 @@ impl Avx2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxifma.rs b/fearless_simd_core/src/x86/avx/avxifma.rs index d356ee462..870bb9889 100644 --- a/fearless_simd_core/src/x86/avx/avxifma.rs +++ b/fearless_simd_core/src/x86/avx/avxifma.rs @@ -63,7 +63,6 @@ impl Avxifma { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxifma") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxneconvert.rs b/fearless_simd_core/src/x86/avx/avxneconvert.rs index 41f670166..0009d58f2 100644 --- a/fearless_simd_core/src/x86/avx/avxneconvert.rs +++ b/fearless_simd_core/src/x86/avx/avxneconvert.rs @@ -71,7 +71,6 @@ impl Avxneconvert { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxneconvert") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxvnni.rs b/fearless_simd_core/src/x86/avx/avxvnni.rs index ed631a653..e385386ef 100644 --- a/fearless_simd_core/src/x86/avx/avxvnni.rs +++ b/fearless_simd_core/src/x86/avx/avxvnni.rs @@ -63,7 +63,6 @@ impl Avxvnni { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxvnni") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxvnniint16.rs b/fearless_simd_core/src/x86/avx/avxvnniint16.rs index 270472f2d..e213c938d 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint16.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint16.rs @@ -71,7 +71,6 @@ impl Avxvnniint16 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxvnniint16") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx/avxvnniint8.rs b/fearless_simd_core/src/x86/avx/avxvnniint8.rs index 907d49235..7caa7251a 100644 --- a/fearless_simd_core/src/x86/avx/avxvnniint8.rs +++ b/fearless_simd_core/src/x86/avx/avxvnniint8.rs @@ -71,7 +71,6 @@ impl Avxvnniint8 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avxvnniint8") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512bf16.rs b/fearless_simd_core/src/x86/avx512/avx512bf16.rs index ec9329682..62bbaa696 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bf16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bf16.rs @@ -75,7 +75,6 @@ impl Avx512bf16 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bf16") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs index d88e55826..226cffabe 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bitalg.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bitalg.rs @@ -76,7 +76,6 @@ impl Avx512bitalg { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bitalg") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512bw.rs b/fearless_simd_core/src/x86/avx512/avx512bw.rs index 9a07e5faf..b5aab6f66 100644 --- a/fearless_simd_core/src/x86/avx512/avx512bw.rs +++ b/fearless_simd_core/src/x86/avx512/avx512bw.rs @@ -64,7 +64,6 @@ impl Avx512bw { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512bw") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512cd.rs b/fearless_simd_core/src/x86/avx512/avx512cd.rs index cce3c7662..39c81d5fc 100644 --- a/fearless_simd_core/src/x86/avx512/avx512cd.rs +++ b/fearless_simd_core/src/x86/avx512/avx512cd.rs @@ -64,7 +64,6 @@ impl Avx512cd { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512cd") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512dq.rs b/fearless_simd_core/src/x86/avx512/avx512dq.rs index 5b444aa1c..abc3c32a7 100644 --- a/fearless_simd_core/src/x86/avx512/avx512dq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512dq.rs @@ -64,7 +64,6 @@ impl Avx512dq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512dq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512f.rs b/fearless_simd_core/src/x86/avx512/avx512f.rs index 6dfa381b7..a25c92552 100644 --- a/fearless_simd_core/src/x86/avx512/avx512f.rs +++ b/fearless_simd_core/src/x86/avx512/avx512f.rs @@ -63,7 +63,6 @@ impl Avx512f { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512f") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512fp16.rs b/fearless_simd_core/src/x86/avx512/avx512fp16.rs index 9a03a7002..b76df9034 100644 --- a/fearless_simd_core/src/x86/avx512/avx512fp16.rs +++ b/fearless_simd_core/src/x86/avx512/avx512fp16.rs @@ -75,7 +75,6 @@ impl Avx512fp16 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512fp16") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512ifma.rs b/fearless_simd_core/src/x86/avx512/avx512ifma.rs index 1c8866f48..dd74a8a29 100644 --- a/fearless_simd_core/src/x86/avx512/avx512ifma.rs +++ b/fearless_simd_core/src/x86/avx512/avx512ifma.rs @@ -74,7 +74,6 @@ impl Avx512ifma { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512ifma") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs index 3e5dde518..38eb6e994 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi.rs @@ -75,7 +75,6 @@ impl Avx512vbmi { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vbmi") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs index d1187a917..b172416d3 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vbmi2.rs @@ -75,7 +75,6 @@ impl Avx512vbmi2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vbmi2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vl.rs b/fearless_simd_core/src/x86/avx512/avx512vl.rs index aa6b95b54..983bc3fc4 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vl.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vl.rs @@ -64,7 +64,6 @@ impl Avx512vl { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vl") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vnni.rs b/fearless_simd_core/src/x86/avx512/avx512vnni.rs index d58569970..f0037c2f0 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vnni.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vnni.rs @@ -74,7 +74,6 @@ impl Avx512vnni { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vnni") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs index 676ec5806..5294dcbeb 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vp2intersect.rs @@ -74,7 +74,6 @@ impl Avx512vp2intersect { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vp2intersect") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs index c1b39eee4..f16c735f4 100644 --- a/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs +++ b/fearless_simd_core/src/x86/avx512/avx512vpopcntdq.rs @@ -74,7 +74,6 @@ impl Avx512vpopcntdq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("avx512vpopcntdq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/aes.rs b/fearless_simd_core/src/x86/crypto/aes.rs index bcc8145de..65a1aeaa3 100644 --- a/fearless_simd_core/src/x86/crypto/aes.rs +++ b/fearless_simd_core/src/x86/crypto/aes.rs @@ -61,7 +61,6 @@ impl Aes { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("aes") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/gfni.rs b/fearless_simd_core/src/x86/crypto/gfni.rs index 8267392b1..f8b12371b 100644 --- a/fearless_simd_core/src/x86/crypto/gfni.rs +++ b/fearless_simd_core/src/x86/crypto/gfni.rs @@ -61,7 +61,6 @@ impl Gfni { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("gfni") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/kl.rs b/fearless_simd_core/src/x86/crypto/kl.rs index c1023e401..45361b810 100644 --- a/fearless_simd_core/src/x86/crypto/kl.rs +++ b/fearless_simd_core/src/x86/crypto/kl.rs @@ -61,7 +61,6 @@ impl Keylocker { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("kl") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs index 357f8e4f6..31d7f60a9 100644 --- a/fearless_simd_core/src/x86/crypto/pclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/pclmulqdq.rs @@ -61,7 +61,6 @@ impl Pclmulqdq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("pclmulqdq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/rdrand.rs b/fearless_simd_core/src/x86/crypto/rdrand.rs index 0f3800a00..c37ab595f 100644 --- a/fearless_simd_core/src/x86/crypto/rdrand.rs +++ b/fearless_simd_core/src/x86/crypto/rdrand.rs @@ -61,7 +61,6 @@ impl Rdrand { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("rdrand") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/rdseed.rs b/fearless_simd_core/src/x86/crypto/rdseed.rs index b7ca6b56a..2f052c0b8 100644 --- a/fearless_simd_core/src/x86/crypto/rdseed.rs +++ b/fearless_simd_core/src/x86/crypto/rdseed.rs @@ -61,7 +61,6 @@ impl Rdseed { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("rdseed") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sha.rs b/fearless_simd_core/src/x86/crypto/sha.rs index 3e9e2cabd..1788683d8 100644 --- a/fearless_simd_core/src/x86/crypto/sha.rs +++ b/fearless_simd_core/src/x86/crypto/sha.rs @@ -61,7 +61,6 @@ impl Sha { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sha") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sha512.rs b/fearless_simd_core/src/x86/crypto/sha512.rs index 9aff5b820..f1116dc2d 100644 --- a/fearless_simd_core/src/x86/crypto/sha512.rs +++ b/fearless_simd_core/src/x86/crypto/sha512.rs @@ -63,7 +63,6 @@ impl Sha512 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sha512") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sm3.rs b/fearless_simd_core/src/x86/crypto/sm3.rs index 2ad3d2ec8..663f34672 100644 --- a/fearless_simd_core/src/x86/crypto/sm3.rs +++ b/fearless_simd_core/src/x86/crypto/sm3.rs @@ -63,7 +63,6 @@ impl Sm3 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sm3") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/sm4.rs b/fearless_simd_core/src/x86/crypto/sm4.rs index 14479e316..af2a6cfcb 100644 --- a/fearless_simd_core/src/x86/crypto/sm4.rs +++ b/fearless_simd_core/src/x86/crypto/sm4.rs @@ -63,7 +63,6 @@ impl Sm4 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sm4") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/vaes.rs b/fearless_simd_core/src/x86/crypto/vaes.rs index 9e634b776..7f47b7440 100644 --- a/fearless_simd_core/src/x86/crypto/vaes.rs +++ b/fearless_simd_core/src/x86/crypto/vaes.rs @@ -63,7 +63,6 @@ impl Vaes { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("vaes") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs index 66e135b29..8467fe8b2 100644 --- a/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs +++ b/fearless_simd_core/src/x86/crypto/vpclmulqdq.rs @@ -71,7 +71,6 @@ impl Vpclmulqdq { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("vpclmulqdq") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/crypto/widekl.rs b/fearless_simd_core/src/x86/crypto/widekl.rs index 1acfb8d94..5c74f3d9a 100644 --- a/fearless_simd_core/src/x86/crypto/widekl.rs +++ b/fearless_simd_core/src/x86/crypto/widekl.rs @@ -61,7 +61,6 @@ impl WideKeylocker { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("widekl") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index ab875c82f..65deed65b 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -61,7 +61,6 @@ impl Tbm { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("tbm") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/mod.rs b/fearless_simd_core/src/x86/mod.rs index 951a532c2..929f9c8c6 100644 --- a/fearless_simd_core/src/x86/mod.rs +++ b/fearless_simd_core/src/x86/mod.rs @@ -20,6 +20,13 @@ //! These are less likely to be directly useful for most users, but are provided for use //! cases which require them (probably especially those under [`crypto`]). //! +//! Both the x86 and x86-64 CPU families are supported in this module as their code is entirely identical, +//! including using the same [`std::is_x86_feature_detected`] macro. +//! Note that this is not the case for `std::arch`; for example, [`core::arch::x86_64::_mm_crc32_u64`] is +//! only available on x86-64. +//! +//! Documentation for features is adapted from the Rust reference. +//! //! [microarchitecture level]: https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels pub mod adx; diff --git a/fearless_simd_core/src/x86/sse/fxsr.rs b/fearless_simd_core/src/x86/sse/fxsr.rs index d7dfe7910..50fa9a792 100644 --- a/fearless_simd_core/src/x86/sse/fxsr.rs +++ b/fearless_simd_core/src/x86/sse/fxsr.rs @@ -62,7 +62,6 @@ impl Fxsr { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("fxsr") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse.rs b/fearless_simd_core/src/x86/sse/sse.rs index 8f40bb76e..5937ece6c 100644 --- a/fearless_simd_core/src/x86/sse/sse.rs +++ b/fearless_simd_core/src/x86/sse/sse.rs @@ -61,7 +61,6 @@ impl Sse { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse2.rs b/fearless_simd_core/src/x86/sse/sse2.rs index 089bda1a8..1d151568a 100644 --- a/fearless_simd_core/src/x86/sse/sse2.rs +++ b/fearless_simd_core/src/x86/sse/sse2.rs @@ -61,7 +61,6 @@ impl Sse2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse3.rs b/fearless_simd_core/src/x86/sse/sse3.rs index c86b454c2..c501bc3a3 100644 --- a/fearless_simd_core/src/x86/sse/sse3.rs +++ b/fearless_simd_core/src/x86/sse/sse3.rs @@ -61,7 +61,6 @@ impl Sse3 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse3") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse4_1.rs b/fearless_simd_core/src/x86/sse/sse4_1.rs index cbce281fd..451a45d3c 100644 --- a/fearless_simd_core/src/x86/sse/sse4_1.rs +++ b/fearless_simd_core/src/x86/sse/sse4_1.rs @@ -61,7 +61,6 @@ impl Sse4_1 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse4.1") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse4_2.rs b/fearless_simd_core/src/x86/sse/sse4_2.rs index 6054559dd..feba99a85 100644 --- a/fearless_simd_core/src/x86/sse/sse4_2.rs +++ b/fearless_simd_core/src/x86/sse/sse4_2.rs @@ -61,7 +61,6 @@ impl Sse4_2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse4.2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index d6bf87695..2bb3e346c 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -61,7 +61,6 @@ impl Sse4a { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("sse4a") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/sse/ssse3.rs b/fearless_simd_core/src/x86/sse/ssse3.rs index 45305bf79..22b35582b 100644 --- a/fearless_simd_core/src/x86/sse/ssse3.rs +++ b/fearless_simd_core/src/x86/sse/ssse3.rs @@ -61,7 +61,6 @@ impl SupplementalSse3 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("ssse3") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v1/level.rs b/fearless_simd_core/src/x86/v1/level.rs index 889db039a..e431a73ba 100644 --- a/fearless_simd_core/src/x86/v1/level.rs +++ b/fearless_simd_core/src/x86/v1/level.rs @@ -92,7 +92,6 @@ impl V1 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v1::Fxsr { fn from(value: V1) -> Self { diff --git a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs index 1b6d002a1..1fd68ceb1 100644 --- a/fearless_simd_core/src/x86/v2/cmpxchg16b.rs +++ b/fearless_simd_core/src/x86/v2/cmpxchg16b.rs @@ -61,7 +61,6 @@ impl Cmpxchg16b { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("cmpxchg16b") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v2/level.rs b/fearless_simd_core/src/x86/v2/level.rs index 97c66b7b9..f3455c4e7 100644 --- a/fearless_simd_core/src/x86/v2/level.rs +++ b/fearless_simd_core/src/x86/v2/level.rs @@ -110,7 +110,6 @@ impl V2 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v2::Cmpxchg16b { fn from(value: V2) -> Self { diff --git a/fearless_simd_core/src/x86/v2/popcnt.rs b/fearless_simd_core/src/x86/v2/popcnt.rs index d7c71c759..290660490 100644 --- a/fearless_simd_core/src/x86/v2/popcnt.rs +++ b/fearless_simd_core/src/x86/v2/popcnt.rs @@ -61,7 +61,6 @@ impl Popcnt { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("popcnt") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/bmi1.rs b/fearless_simd_core/src/x86/v3/bmi1.rs index f887a9dcf..875a10025 100644 --- a/fearless_simd_core/src/x86/v3/bmi1.rs +++ b/fearless_simd_core/src/x86/v3/bmi1.rs @@ -61,7 +61,6 @@ impl Bmi1 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("bmi1") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/bmi2.rs b/fearless_simd_core/src/x86/v3/bmi2.rs index af3eeefe2..b83d5d441 100644 --- a/fearless_simd_core/src/x86/v3/bmi2.rs +++ b/fearless_simd_core/src/x86/v3/bmi2.rs @@ -61,7 +61,6 @@ impl Bmi2 { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("bmi2") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/f16c.rs b/fearless_simd_core/src/x86/v3/f16c.rs index 53ad83186..1733f5ab9 100644 --- a/fearless_simd_core/src/x86/v3/f16c.rs +++ b/fearless_simd_core/src/x86/v3/f16c.rs @@ -63,7 +63,6 @@ impl F16c { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("f16c") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/fma.rs b/fearless_simd_core/src/x86/v3/fma.rs index 44d017091..09479f337 100644 --- a/fearless_simd_core/src/x86/v3/fma.rs +++ b/fearless_simd_core/src/x86/v3/fma.rs @@ -63,7 +63,6 @@ impl Fma { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("fma") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/level.rs b/fearless_simd_core/src/x86/v3/level.rs index f24edb4e1..c5d2f8c75 100644 --- a/fearless_simd_core/src/x86/v3/level.rs +++ b/fearless_simd_core/src/x86/v3/level.rs @@ -147,7 +147,6 @@ impl V3 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v3::Avx { fn from(value: V3) -> Self { diff --git a/fearless_simd_core/src/x86/v3/lzcnt.rs b/fearless_simd_core/src/x86/v3/lzcnt.rs index d5283fc69..f81f8df48 100644 --- a/fearless_simd_core/src/x86/v3/lzcnt.rs +++ b/fearless_simd_core/src/x86/v3/lzcnt.rs @@ -61,7 +61,6 @@ impl Lzcnt { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("lzcnt") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v3/movbe.rs b/fearless_simd_core/src/x86/v3/movbe.rs index 91ee22bbb..37df1e1f2 100644 --- a/fearless_simd_core/src/x86/v3/movbe.rs +++ b/fearless_simd_core/src/x86/v3/movbe.rs @@ -61,7 +61,6 @@ impl Movbe { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("movbe") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/v4/level.rs b/fearless_simd_core/src/x86/v4/level.rs index 1e6cad459..db146467d 100644 --- a/fearless_simd_core/src/x86/v4/level.rs +++ b/fearless_simd_core/src/x86/v4/level.rs @@ -158,7 +158,6 @@ impl V4 { } } } -// TODO: From impls to convert into lower x86 versions. impl From for crate::x86::v4::Avx { fn from(value: V4) -> Self { diff --git a/fearless_simd_core/src/x86/xsave/xsave.rs b/fearless_simd_core/src/x86/xsave/xsave.rs index 1507efb7a..d24692c20 100644 --- a/fearless_simd_core/src/x86/xsave/xsave.rs +++ b/fearless_simd_core/src/x86/xsave/xsave.rs @@ -61,7 +61,6 @@ impl Xsave { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsave") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/xsave/xsavec.rs b/fearless_simd_core/src/x86/xsave/xsavec.rs index b84f79126..5b91126b2 100644 --- a/fearless_simd_core/src/x86/xsave/xsavec.rs +++ b/fearless_simd_core/src/x86/xsave/xsavec.rs @@ -61,7 +61,6 @@ impl Xsavec { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsavec") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/xsave/xsaveopt.rs b/fearless_simd_core/src/x86/xsave/xsaveopt.rs index 3454d9ca2..00505619c 100644 --- a/fearless_simd_core/src/x86/xsave/xsaveopt.rs +++ b/fearless_simd_core/src/x86/xsave/xsaveopt.rs @@ -61,7 +61,6 @@ impl Xsaveopt { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsaveopt") { // Safety: The required CPU feature was detected. diff --git a/fearless_simd_core/src/x86/xsave/xsaves.rs b/fearless_simd_core/src/x86/xsave/xsaves.rs index c5277468d..000054c71 100644 --- a/fearless_simd_core/src/x86/xsave/xsaves.rs +++ b/fearless_simd_core/src/x86/xsave/xsaves.rs @@ -61,7 +61,6 @@ impl Xsaves { // TODO: Consider a manual override feature/env var? pub fn try_new() -> Option { // Feature flag required to make docs compile. - // TODO: Extract into a (private) crate::x86::is_x86_feature_detected? #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] if std::arch::is_x86_feature_detected!("xsaves") { // Safety: The required CPU feature was detected. From 3af92fb7af801435e5a090bf50fcbd1dc58224b3 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 3 Nov 2025 11:58:36 +0000 Subject: [PATCH 19/19] Add SSE4a and TBM with MSRV 1.91 --- .clippy.toml | 2 +- .github/workflows/ci.yml | 4 ++-- CHANGELOG.md | 2 +- Cargo.toml | 3 +-- README.md | 2 +- fearless_simd/README.md | 2 +- fearless_simd_core/README.md | 2 +- fearless_simd_core/src/x86/discontinued/mod.rs | 5 ++--- fearless_simd_core/src/x86/discontinued/tbm.rs | 3 +-- fearless_simd_core/src/x86/sse/mod.rs | 5 ++--- fearless_simd_core/src/x86/sse/sse4a.rs | 1 - 11 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.clippy.toml b/.clippy.toml index 898218354..bee24f274 100644 --- a/.clippy.toml +++ b/.clippy.toml @@ -9,4 +9,4 @@ trivial-copy-size-limit = 16 # END LINEBENDER LINT SET -doc-valid-idents = ["ShangMi", ".."] +doc-valid-idents = ["ShangMi", "SSE4a", ".."] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 229965e21..244205696 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,12 +3,12 @@ env: # version like 1.70. Note that we only specify MAJOR.MINOR and not PATCH so that bugfixes still # come automatically. If the version specified here is no longer the latest stable version, # then please feel free to submit a PR that adjusts it along with the potential clippy fixes. - RUST_STABLE_VER: "1.90" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 + RUST_STABLE_VER: "1.91" # In quotes because otherwise (e.g.) 1.70 would be interpreted as 1.7 # The purpose of checking with the minimum supported Rust toolchain is to detect its staleness. # If the compilation fails, then the version specified here needs to be bumped up to reality. # Be sure to also update the rust-version property in the workspace Cargo.toml file, # plus all the README.md files of the affected packages. - RUST_MIN_VER: "1.89" + RUST_MIN_VER: "1.91" # List of packages that will be checked with the minimum supported Rust version. # This should be limited to packages that are intended for publishing. RUST_MIN_VER_PKGS: "-p fearless_simd -p fearless_simd_core" diff --git a/CHANGELOG.md b/CHANGELOG.md index db6d6a965..9ad6c367b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ You can find its changes [documented below](#030-2025-10-14). ## [Unreleased] -This release has an [MSRV][] of 1.89. +This release has an [MSRV][] of 1.91. ### Added diff --git a/Cargo.toml b/Cargo.toml index eb1d32831..b9e817fd9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,8 +15,7 @@ license = "Apache-2.0 OR MIT" repository = "https://github.com/linebender/fearless_simd" # Keep in sync with RUST_MIN_VER in .github/workflows/ci.yml, with the relevant README.md files # and with the MSRV in the `Unreleased` section of CHANGELOG.md. -# When increasing past 1.91, also uncomment the `discontinued::tbm` and `sse::sse4a` modules/imports in Fearless SIMD Core. -rust-version = "1.89" +rust-version = "1.91" [workspace.lints] diff --git a/README.md b/README.md index 4749d6f9f..dcccda46d 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ It benefited from conversations with Luca Versari, though he is not responsible ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd/README.md b/fearless_simd/README.md index 953e48278..f1f3ed198 100644 --- a/fearless_simd/README.md +++ b/fearless_simd/README.md @@ -139,7 +139,7 @@ At least one of `std` and `libm` is required; `std` overrides `libm`. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd_core/README.md b/fearless_simd_core/README.md index 45052d288..142fea2c3 100644 --- a/fearless_simd_core/README.md +++ b/fearless_simd_core/README.md @@ -78,7 +78,7 @@ with `#[target_feature]`, and a call to this newly generated function. ## Minimum supported Rust Version (MSRV) -This version of Fearless SIMD has been verified to compile with **Rust 1.89** and later. +This version of Fearless SIMD has been verified to compile with **Rust 1.91** and later. Future versions of Fearless SIMD might increase the Rust version requirement. It will not be treated as a breaking change and as such can even happen with small patch releases. diff --git a/fearless_simd_core/src/x86/discontinued/mod.rs b/fearless_simd_core/src/x86/discontinued/mod.rs index f1cc4f63f..1072d9cad 100644 --- a/fearless_simd_core/src/x86/discontinued/mod.rs +++ b/fearless_simd_core/src/x86/discontinued/mod.rs @@ -8,6 +8,5 @@ //! //! For more information, see -// These will be stabilised in 1.91. -// mod tbm; -// pub use tbm::Tbm; +mod tbm; +pub use tbm::Tbm; diff --git a/fearless_simd_core/src/x86/discontinued/tbm.rs b/fearless_simd_core/src/x86/discontinued/tbm.rs index 65deed65b..797afca4b 100644 --- a/fearless_simd_core/src/x86/discontinued/tbm.rs +++ b/fearless_simd_core/src/x86/discontinued/tbm.rs @@ -42,7 +42,7 @@ impl Debug for Tbm { // Safety: This token can only be constructed if you have proof that all the requisite // target feature is enabled. unsafe impl TargetFeatureToken for Tbm { - const FEATURES: &[&str] = &["tbm", ]; + const FEATURES: &[&str] = &["tbm"]; #[inline(always)] fn vectorize(self, f: impl FnOnce() -> R) -> R { @@ -88,7 +88,6 @@ impl Tbm { } } - const _: () = { assert!( core::mem::size_of::() == 0, diff --git a/fearless_simd_core/src/x86/sse/mod.rs b/fearless_simd_core/src/x86/sse/mod.rs index ecd7d84b7..d3d6b5e52 100644 --- a/fearless_simd_core/src/x86/sse/mod.rs +++ b/fearless_simd_core/src/x86/sse/mod.rs @@ -29,9 +29,8 @@ pub use sse3::Sse3; mod ssse3; pub use ssse3::SupplementalSse3; -// These will be stabilised in 1.91. -// mod sse4a; -// pub use sse4a::Sse4a; +mod sse4a; +pub use sse4a::Sse4a; mod sse4_1; pub use sse4_1::Sse4_1; diff --git a/fearless_simd_core/src/x86/sse/sse4a.rs b/fearless_simd_core/src/x86/sse/sse4a.rs index 2bb3e346c..8c8abc80c 100644 --- a/fearless_simd_core/src/x86/sse/sse4a.rs +++ b/fearless_simd_core/src/x86/sse/sse4a.rs @@ -109,7 +109,6 @@ impl From for crate::x86::sse::Sse3 { } } - const _: () = { assert!( core::mem::size_of::() == 0,