From 1d33672bea9a4eca601876ac2e6422c4f3084fc2 Mon Sep 17 00:00:00 2001 From: Yunze Xu Date: Tue, 12 May 2026 21:53:07 +0800 Subject: [PATCH 1/4] fix: _cycles_per_sec might crash due to overflow by reordering of rdtsc --- src/tsc_now.rs | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/tsc_now.rs b/src/tsc_now.rs index c4b9d13..e0a535a 100644 --- a/src/tsc_now.rs +++ b/src/tsc_now.rs @@ -2,6 +2,10 @@ //! This module will be compiled when it's either linux_x86 or linux_x86_64. +#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] +use core::sync::atomic::compiler_fence; +#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] +use core::sync::atomic::Ordering; use std::cell::UnsafeCell; use std::fs::read_to_string; use std::io::ErrorKind; @@ -132,6 +136,7 @@ fn has_invariant_tsc() -> bool { use core::arch::x86_64::__cpuid; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[allow(unused_unsafe)] unsafe { let cpuid_invariant_tsc_bts = 1 << 8; __cpuid(0x80000000).eax >= 0x80000007 @@ -163,7 +168,7 @@ fn _cycles_per_sec() -> (u64, Instant, u64) { let mut last_tsc; let mut old_cycles = 0.0; - loop { + 'outer: loop { let (t1, tsc1) = monotonic_with_tsc(); loop { let (t2, tsc2) = monotonic_with_tsc(); @@ -171,7 +176,14 @@ fn _cycles_per_sec() -> (u64, Instant, u64) { last_tsc = tsc2; let elapsed_nanos = (t2 - t1).as_nanos(); if elapsed_nanos > 10_000_000 { - cycles_per_sec = (tsc2 - tsc1) as f64 * 1_000_000_000.0 / elapsed_nanos as f64; + // Even with fence added in monotonic_with_tsc(), tsc2 < tsc1 is still possible + // if the thread migrates to a different CPU core between samples + // (cores may have slightly different TSC offsets). checked_sub + // prevents overflow; we retry from the outer loop with fresh tsc1. + let Some(delta) = tsc2.checked_sub(tsc1) else { + continue 'outer; + }; + cycles_per_sec = delta as f64 * 1_000_000_000.0 / elapsed_nanos as f64; break; } } @@ -189,7 +201,25 @@ fn _cycles_per_sec() -> (u64, Instant, u64) { /// get interrupted in half way may happen, they aren't guaranteed /// to represent the same instant. fn monotonic_with_tsc() -> (Instant, u64) { - (Instant::now(), tsc()) + // RDTSC is not serializing; LFENCE ensures Instant::now() completes first. + #[cfg(any(target_arch = "x86_64", target_feature = "sse2"))] + { + #[cfg(target_arch = "x86")] + use core::arch::x86::_mm_lfence; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::_mm_lfence; + let t = Instant::now(); + unsafe { + _mm_lfence(); + } + (t, tsc()) + } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + let t = Instant::now(); + compiler_fence(Ordering::SeqCst); + (t, tsc()) + } } #[inline] From 57a2c16e65990cbcd700e6d3768bc4014ba862e9 Mon Sep 17 00:00:00 2001 From: Yunze Xu Date: Wed, 13 May 2026 11:30:01 +0800 Subject: [PATCH 2/4] chore: move the use into the cfg block --- src/tsc_now.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tsc_now.rs b/src/tsc_now.rs index e0a535a..6ac81ac 100644 --- a/src/tsc_now.rs +++ b/src/tsc_now.rs @@ -2,10 +2,6 @@ //! This module will be compiled when it's either linux_x86 or linux_x86_64. -#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] -use core::sync::atomic::compiler_fence; -#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] -use core::sync::atomic::Ordering; use std::cell::UnsafeCell; use std::fs::read_to_string; use std::io::ErrorKind; @@ -216,6 +212,8 @@ fn monotonic_with_tsc() -> (Instant, u64) { } #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { + use std::sync::atomic::compiler_fence; + use std::sync::atomic::Ordering; let t = Instant::now(); compiler_fence(Ordering::SeqCst); (t, tsc()) From 79e8ebea31de3eab471ef08120c4cb5d2058e0d6 Mon Sep 17 00:00:00 2001 From: Yunze Xu Date: Wed, 13 May 2026 11:47:37 +0800 Subject: [PATCH 3/4] remove unsafe instead of allow unused unsafe --- src/tsc_now.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tsc_now.rs b/src/tsc_now.rs index 6ac81ac..d2eba7b 100644 --- a/src/tsc_now.rs +++ b/src/tsc_now.rs @@ -132,8 +132,7 @@ fn has_invariant_tsc() -> bool { use core::arch::x86_64::__cpuid; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] - #[allow(unused_unsafe)] - unsafe { + { let cpuid_invariant_tsc_bts = 1 << 8; __cpuid(0x80000000).eax >= 0x80000007 && __cpuid(0x80000007).edx & cpuid_invariant_tsc_bts != 0 From 62c8a63670a15cf9e813af1f4fa54e099ba48120 Mon Sep 17 00:00:00 2001 From: tison Date: Wed, 13 May 2026 12:07:25 +0800 Subject: [PATCH 4/4] fixup Signed-off-by: tison --- src/tsc_now.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/tsc_now.rs b/src/tsc_now.rs index d2eba7b..892e168 100644 --- a/src/tsc_now.rs +++ b/src/tsc_now.rs @@ -196,27 +196,23 @@ fn _cycles_per_sec() -> (u64, Instant, u64) { /// get interrupted in half way may happen, they aren't guaranteed /// to represent the same instant. fn monotonic_with_tsc() -> (Instant, u64) { + let t = Instant::now(); // RDTSC is not serializing; LFENCE ensures Instant::now() completes first. - #[cfg(any(target_arch = "x86_64", target_feature = "sse2"))] + #[cfg(target_feature = "sse2")] { #[cfg(target_arch = "x86")] - use core::arch::x86::_mm_lfence; + use std::arch::x86::_mm_lfence; #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::_mm_lfence; - let t = Instant::now(); - unsafe { - _mm_lfence(); - } - (t, tsc()) + use std::arch::x86_64::_mm_lfence; + unsafe { _mm_lfence() }; } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + #[cfg(not(target_feature = "sse2"))] { use std::sync::atomic::compiler_fence; use std::sync::atomic::Ordering; - let t = Instant::now(); compiler_fence(Ordering::SeqCst); - (t, tsc()) } + (t, tsc()) } #[inline]