Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ internal static partial class Kernel32
// https://learn.microsoft.com/windows/win32/api/winioctl/ni-winioctl-fsctl_get_reparse_point
internal const int FSCTL_GET_REPARSE_POINT = 0x000900a8;

// https://learn.microsoft.com/windows/win32/api/winioctl/ni-winioctl-fsctl_set_sparse
internal const int FSCTL_SET_SPARSE = 0x000900C4;

// https://learn.microsoft.com/windows-hardware/drivers/ddi/ntddstor/ni-ntddstor-ioctl_storage_read_capacity
internal const int IOCTL_STORAGE_READ_CAPACITY = 0x002D5140;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
<Compile Include="$(CommonPath)Interop\Windows\Interop.Libraries.cs" Link="Common\Interop\Windows\Interop.Libraries.cs" />
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.BY_HANDLE_FILE_INFORMATION.cs" Link="Common\Interop\Windows\Kernel32\Interop.BY_HANDLE_FILE_INFORMATION.cs" />
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.CreateFile.cs" Link="Common\Interop\Windows\Kernel32\Interop.CreateFile.cs" />
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.DeviceIoControl.cs" Link="Common\Interop\Windows\Kernel32\Interop.DeviceIoControl.cs" />
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.FILE_TIME.cs" Link="Common\Interop\Windows\Kernel32\Interop.FILE_TIME.cs" />
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.FileOperations.cs" Link="Common\Interop\Windows\Kernel32\Interop.FileOperations.cs" />
<Compile Include="$(CommonPath)Interop\Windows\Kernel32\Interop.GenericOperations.cs" Link="Common\Interop\Windows\Kernel32\Interop.GenericOperations.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,108 @@ private async ValueTask<int> ReadAsyncCore(Memory<byte> buffer, CancellationToke
// Exposes the underlying raw stream for callers that need to access the condensed data.
internal Stream BaseStream => _rawStream;

// Copies only the populated segments of this sparse entry to the given destination FileStream,
// seeking over holes so they remain unwritten. On file systems that support sparse files (most
// modern Unix file systems and NTFS when the file has been marked sparse via FSCTL_SET_SPARSE),
// the holes will not consume disk space. On file systems without sparse support, the OS will
// zero-fill the holes when SetLength is called below, producing an equivalent on-disk result
// to a plain CopyTo (just without the up-front PreallocationSize reservation).
//
// The destination must be writable and seekable. The destination's final length will equal
// the entry's real (expanded) size.
internal void CopyPopulatedDataTo(FileStream destination)
{
ThrowIfDisposed();
EnsureInitialized();
Debug.Assert(_segments is not null && _packedStartOffsets is not null);

byte[] buffer = ArrayPool<byte>.Shared.Rent(81920);
try
{
for (int i = 0; i < _segments.Length; i++)
{
(long virtualOffset, long segmentLength) = _segments[i];
if (segmentLength == 0)
{
continue;
}

destination.Position = virtualOffset;
long written = 0;
while (written < segmentLength)
{
int toRead = (int)Math.Min(segmentLength - written, buffer.Length);
int bytesRead = ReadFromPackedData(buffer.AsSpan(0, toRead), _packedStartOffsets[i] + written);
if (bytesRead == 0)
{
throw new EndOfStreamException();
}
destination.Write(buffer, 0, bytesRead);
written += bytesRead;
}
}
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}

// Extend the destination to the full real size so any trailing hole is materialized
// (as an unallocated extent on sparse-capable file systems, or as zeros otherwise).
if (destination.Length < _realSize)
{
destination.SetLength(_realSize);
}

_virtualPosition = _realSize;
}

// Async counterpart to CopyPopulatedDataTo.
internal async ValueTask CopyPopulatedDataToAsync(FileStream destination, CancellationToken cancellationToken)
{
ThrowIfDisposed();
await EnsureInitializedAsync(cancellationToken).ConfigureAwait(false);
Debug.Assert(_segments is not null && _packedStartOffsets is not null);

byte[] buffer = ArrayPool<byte>.Shared.Rent(81920);
try
{
for (int i = 0; i < _segments.Length; i++)
{
(long virtualOffset, long segmentLength) = _segments[i];
if (segmentLength == 0)
{
continue;
}

destination.Position = virtualOffset;
long written = 0;
while (written < segmentLength)
{
int toRead = (int)Math.Min(segmentLength - written, buffer.Length);
int bytesRead = await ReadFromPackedDataAsync(buffer.AsMemory(0, toRead), _packedStartOffsets[i] + written, cancellationToken).ConfigureAwait(false);
if (bytesRead == 0)
{
throw new EndOfStreamException();
}
await destination.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken).ConfigureAwait(false);
written += bytesRead;
}
}
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}

if (destination.Length < _realSize)
{
destination.SetLength(_realSize);
}

_virtualPosition = _realSize;
}

// Reads from the packed data at the given packedOffset.
// After EnsureInitialized, the raw stream is positioned at _dataStart and
// _nextPackedOffset tracks how far into the packed data we've read.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,16 @@ private void ExtractAsHardLink(string targetFilePath, string hardLinkFilePath)
Debug.Assert(!string.IsNullOrEmpty(hardLinkFilePath));
File.CreateHardLink(hardLinkFilePath, targetFilePath);
}

// On Unix-like systems no explicit step is needed to make a file sparse: the kernel
// creates a hole whenever a write is preceded by a seek past the previous end. Most
// modern file systems (ext4, btrfs, xfs, APFS, ...) support sparse files; on those that
// do not, the SetLength call performed after the segment copy will still produce a
// correct (but fully allocated) result.
#pragma warning disable IDE0060
private static void TryMarkFileSparse(FileStream fs)
{
}
#pragma warning restore IDE0060
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,24 @@ private void ExtractAsHardLink(string targetFilePath, string hardLinkFilePath)
Debug.Assert(!string.IsNullOrEmpty(hardLinkFilePath));
File.CreateHardLink(hardLinkFilePath, targetFilePath);
}

// Best-effort attempt to mark the file as sparse on Windows so subsequent unwritten ranges
// remain real holes (unallocated extents) rather than being zero-filled on disk. The call
// is silently ignored if the underlying file system does not support sparse files
// (e.g. FAT/exFAT), in which case the extraction still produces correct content but the
// file occupies its full logical size on disk.
private static unsafe void TryMarkFileSparse(FileStream fs)
{
Interop.Kernel32.DeviceIoControl(
fs.SafeFileHandle,
Interop.Kernel32.FSCTL_SET_SPARSE,
null,
0,
null,
0,
out _,
IntPtr.Zero);
}
#pragma warning restore IDE0060
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,19 @@ private void ExtractAsRegularFile(string destinationFileName)
// Rely on FileStream's ctor for further checking destinationFileName parameter
using (FileStream fs = new FileStream(destinationFileName, CreateFileStreamOptions(isAsync: false)))
{
// Important: The DataStream will be written from its current position
DataStream?.CopyTo(fs);
if (_header._gnuSparseDataStream is GnuSparseStream { Position: 0 } sparseStream)
{
// Sparse-aware extraction: write only the populated segments, seeking over holes
// so file systems can leave them as actual sparse holes (NTFS once marked sparse;
// most Unix file systems do this automatically).
TryMarkFileSparse(fs);
sparseStream.CopyPopulatedDataTo(fs);
}
else
{
// Important: The DataStream will be written from its current position
DataStream?.CopyTo(fs);
}
}

AttemptSetLastWriteTime(destinationFileName, ModificationTime);
Expand All @@ -588,7 +599,12 @@ private async Task ExtractAsRegularFileAsync(string destinationFileName, Cancell
FileStream fs = new FileStream(destinationFileName, CreateFileStreamOptions(isAsync: true));
await using (fs.ConfigureAwait(false))
{
if (DataStream != null)
if (_header._gnuSparseDataStream is GnuSparseStream { Position: 0 } sparseStream)
{
TryMarkFileSparse(fs);
await sparseStream.CopyPopulatedDataToAsync(fs, cancellationToken).ConfigureAwait(false);
}
else if (DataStream != null)
{
// Important: The DataStream will be written from its current position
await DataStream.CopyToAsync(fs, cancellationToken).ConfigureAwait(false);
Expand Down Expand Up @@ -617,7 +633,11 @@ private FileStreamOptions CreateFileStreamOptions(bool isAsync)
Access = FileAccess.Write,
Mode = FileMode.CreateNew,
Share = FileShare.None,
PreallocationSize = Length,
// Skip preallocation for GNU sparse entries: the entry's Length is the expanded
// (real) size, while the archive only contains the much smaller packed data.
// Preallocating to the expanded size would reserve disk space that bears no
// relation to the archive contents and can fail surprisingly on small volumes.
PreallocationSize = _header._gnuSparseDataStream is null ? Length : 0,
Options = isAsync ? FileOptions.Asynchronous : FileOptions.None
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,62 @@ public void CopySparseEntryToNewArchive_PreservesExpandedContent(bool copyData,
Assert.Null(reader2.GetNextEntry());
}

[Theory]
[InlineData(false)]
[InlineData(true)]
public async Task ExtractToFile_SparseEntry_ExpandsCorrectly(bool useAsync)
{
// Verifies extraction of a GNU sparse entry to a file produces the expanded
// content (data segments + zero-filled holes). Also exercises the code path
// in TarEntry.CreateFileStreamOptions where preallocation is intentionally
// skipped for sparse entries to avoid reserving disk space equal to the
// expanded (real) size — see https://github.com/dotnet/runtime/issues/128283.
const string RealName = "sparse-extract.bin";
const long RealSize = 4096;
var segments = new[] { (256L, 256L), (1024L, 256L), (3072L, 256L) };

var (archive, _) = BuildSparseArchive(RealName, RealSize, segments);
archive.Position = 0;

string destinationFile = GetTestFilePath();

using (var reader = new TarReader(archive))
{
TarEntry? entry = useAsync
? await reader.GetNextEntryAsync()
: reader.GetNextEntry();
Assert.NotNull(entry);
Assert.Equal(RealSize, entry.Length);

if (useAsync)
{
await entry.ExtractToFileAsync(destinationFile, overwrite: false);
}
else
{
entry.ExtractToFile(destinationFile, overwrite: false);
}
}

byte[] extracted = File.ReadAllBytes(destinationFile);
Assert.Equal(RealSize, extracted.Length);
VerifyExpandedContent(extracted, RealSize, segments);

if (OperatingSystem.IsWindows())
{
string? root = Path.GetPathRoot(destinationFile);
Assert.NotNull(root);

if (new DriveInfo(root!).DriveFormat.Equals("NTFS", StringComparison.OrdinalIgnoreCase))
{
// On Windows the sparse-aware extraction path marks the destination file with
// FSCTL_SET_SPARSE. NTFS reflects this through the FileAttributes.SparseFile bit.
Assert.True((File.GetAttributes(destinationFile) & FileAttributes.SparseFile) != 0,
"Expected sparse extraction to mark the destination file as sparse on Windows/NTFS.");
}
}
Comment thread
rzikm marked this conversation as resolved.
}

[Theory]
[InlineData(false)]
[InlineData(true)]
Expand Down
Loading