From 435512db8c21ca75f307f1b7ee6ade09dd8c4ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Wed, 24 Jun 2026 13:39:24 +0200 Subject: [PATCH] [Comgr/Caching/Support] Don't use `mmap` in `localCache`, on NFS When the cache directory is on an NFS, we may hit a SIGBUS signal when there is contention on the cache. This happens typically with MPI jobs running on an NFS mounted $HOME directory. With NFS, the attributes are stored in a cache. The mmap call may succeed, but when we try to access memory returned from this call we may hit a SIGBUS because the file was modified by another process on another machine. If we execute through the regular `read()` path, we also fail with a `Stale file descriptor` error. But at least this path is properly handled and we can continue without using the cache. To achieve this, we make: 1) `getOpenFileImpl()` not use `mmap` if `IsVolatile` is `true`, 2) We make the `localCache` pass `IsVolatile=true` if the cache is mounted on an NFS directory. Notice that (1) may affect other paths appart from the `localCache`. But from what I checked, `IsVolatile` is only `true` for tools like `clangd` on user files. The rest either use `IsVolatile` equal to `false`. This fix is worth upstraming, but I haven't done so yet because I want to explore other solutions before commiting to it. --- llvm/lib/Support/Caching.cpp | 5 ++++- llvm/lib/Support/MemoryBuffer.cpp | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp index 52fd0ba7d681c..05dd6a594a8bc 100644 --- a/llvm/lib/Support/Caching.cpp +++ b/llvm/lib/Support/Caching.cpp @@ -37,6 +37,8 @@ Expected llvm::localCache(const Twine &CacheNameRef, TempFilePrefixRef.toVector(TempFilePrefix); CacheDirectoryPathRef.toVector(CacheDirectoryPath); + bool OnNFS = !sys::fs::is_local(CacheDirectoryPath); + auto Func = [=](unsigned Task, StringRef Key, const Twine &ModuleName) -> Expected { // This choice of file name allows the cache to be pruned (see pruneCache() @@ -52,7 +54,8 @@ Expected llvm::localCache(const Twine &CacheNameRef, ErrorOr> MBOrErr = MemoryBuffer::getOpenFile(*FDOrErr, EntryPath, /*FileSize=*/-1, - /*RequiresNullTerminator=*/false); + /*RequiresNullTerminator=*/false, + /*IsVolatile=*/OnNFS); sys::fs::closeFile(*FDOrErr); if (MBOrErr) { AddBuffer(Task, ModuleName, std::move(*MBOrErr)); diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp index 9c49c143cf01c..470538276c98b 100644 --- a/llvm/lib/Support/MemoryBuffer.cpp +++ b/llvm/lib/Support/MemoryBuffer.cpp @@ -377,6 +377,21 @@ static bool shouldUseMmap(sys::fs::file_t FD, return false; #endif + // Do not use mmap on NFS file systems (IsVolatile is true). + // IsVolatile=true should be used on NFS file systems or when the files are + // expected to change (this happens on clangd when reading user's code). + // Otherwise, accessing the buffer obtained through mmap may result in a + // SIGBUS. Doing regular read() may result in "Stale file handle" errors, + // which is also not great, but at least `getOpenFileImpl` can forward the + // error to the user. In contrast, the SIGBUS happens on the caller's code and + // we cannot prevent it. + // + // FIXME: We could use the `CrashRecoveryContext`, and read one byte from + // every page of the file to catch the SIGBUS in advance. ATM I haven't + // managed to make this work, thus the conservative solution. + if (IsVolatile) + return false; + // mmap may leave the buffer without null terminator if the file size changed // by the time the last page is mapped in, so avoid it if the file size is // likely to change.