From df921d2c4e31c91461a5d9b5a6b96a736f43f28e Mon Sep 17 00:00:00 2001 From: csjones <637026+csjones@users.noreply.github.com> Date: Thu, 30 Apr 2026 13:56:26 -0700 Subject: [PATCH] feat: add gitlink stripping to prevent downstream submodule failures (#19) - Added --strip-gitlinks flag to add and update commands for opt-in removal - Added stripGitlinks field to SubtreeEntry for persistent configuration - Implemented GitOperations.findGitlinks() to detect mode 160000 entries via ls-files - Implemented GitOperations.stripGitlinks() to remove detected gitlinks from index - Added GitOperations.emitGitlinkWarning() for consistent Policy C warnings --- Sources/SubtreeLib/Commands/AddCommand.swift | 24 ++- .../SubtreeLib/Commands/UpdateCommand.swift | 45 ++++- .../Configuration/Models/SubtreeEntry.swift | 17 +- .../Utilities/ConfigFileManager.swift | 3 +- .../SubtreeLib/Utilities/GitOperations.swift | 75 ++++++++ .../StripGitlinksIntegrationTests.swift | 171 ++++++++++++++++++ 6 files changed, 330 insertions(+), 5 deletions(-) create mode 100644 Tests/IntegrationTests/StripGitlinksIntegrationTests.swift diff --git a/Sources/SubtreeLib/Commands/AddCommand.swift b/Sources/SubtreeLib/Commands/AddCommand.swift index b7967e0..0d724d9 100644 --- a/Sources/SubtreeLib/Commands/AddCommand.swift +++ b/Sources/SubtreeLib/Commands/AddCommand.swift @@ -24,6 +24,9 @@ public struct AddCommand: AsyncParsableCommand { @Flag(name: .long, help: "Disable squash mode (preserves full upstream history)") var noSquash: Bool = false + @Flag(name: .long, help: "Strip upstream submodule gitlinks after merge (persists to subtree.yaml)") + var stripGitlinks: Bool = false + public init() {} public func run() async throws { @@ -147,6 +150,24 @@ public struct AddCommand: AsyncParsableCommand { Foundation.exit(1) } + // Detect upstream submodule gitlinks brought in by the merge. If the user opted + // in via --strip-gitlinks, remove them and fold the removal into the atomic + // commit. Otherwise emit a non-destructive warning (Policy C) so the user can + // make an informed choice. + do { + let detected = try await GitOperations.findGitlinks(prefix: finalPrefix) + if !detected.isEmpty { + if stripGitlinks { + let removed = try await GitOperations.stripGitlinks(prefix: finalPrefix) + print("ℹ️ Stripped \(removed.count) upstream submodule gitlink(s) from \(finalPrefix)") + } else { + GitOperations.emitGitlinkWarning(prefix: finalPrefix, gitlinks: detected, command: "add") + } + } + } catch { + print("⚠️ Failed to inspect submodule gitlinks: \(error)") + } + // T040: Capture upstream commit hash from subtree trailer // The split trailer contains the upstream commit hash, which is what we need // for accurate up-to-date checks and stale trailer detection @@ -173,7 +194,8 @@ public struct AddCommand: AsyncParsableCommand { commit: commitHash, tag: refType == "tag" ? finalRef : nil, branch: refType == "branch" ? finalRef : nil, - squash: useSquash + squash: useSquash, + stripGitlinks: stripGitlinks ? true : nil ) // Create new config with appended entry diff --git a/Sources/SubtreeLib/Commands/UpdateCommand.swift b/Sources/SubtreeLib/Commands/UpdateCommand.swift index 4bffccb..d9e9373 100644 --- a/Sources/SubtreeLib/Commands/UpdateCommand.swift +++ b/Sources/SubtreeLib/Commands/UpdateCommand.swift @@ -113,6 +113,9 @@ public struct UpdateCommand: AsyncParsableCommand { @Option(name: .long, help: "Specific ref (tag, branch, or commit) to update to") var ref: String? + @Flag(name: .long, help: "Strip upstream submodule gitlinks after merge (persists to subtree.yaml)") + var stripGitlinks: Bool = false + public init() {} // T039: Mutual exclusion validation @@ -304,6 +307,24 @@ public struct UpdateCommand: AsyncParsableCommand { let headAfterCommit = headAfter.stdout.trimmingCharacters(in: .whitespacesAndNewlines) let subtreePullCreatedCommit = headBeforeCommit != headAfterCommit + // Detect upstream submodule gitlinks. Strip only if the user opted in via + // the CLI flag or `stripGitlinks: true` in subtree.yaml. Otherwise emit a + // non-destructive warning (Policy C). + let effectiveStrip = self.stripGitlinks || (entry.stripGitlinks ?? false) + do { + let detected = try await GitOperations.findGitlinks(prefix: entry.prefix) + if !detected.isEmpty { + if effectiveStrip { + let removed = try await GitOperations.stripGitlinks(prefix: entry.prefix) + print("ℹ️ Stripped \(removed.count) upstream submodule gitlink(s) from \(entry.prefix)") + } else { + GitOperations.emitGitlinkWarning(prefix: entry.prefix, gitlinks: detected, command: "update") + } + } + } catch { + print("⚠️ Failed to inspect submodule gitlinks: \(error)") + } + // T021: Config update (new commit hash AND new tag if changed) after successful update let updatedSubtrees = config.subtrees.map { subtree in if subtree.name == entry.name { @@ -316,7 +337,8 @@ public struct UpdateCommand: AsyncParsableCommand { branch: newBranch, squash: subtree.squash, extracts: subtree.extracts, - extractions: subtree.extractions + extractions: subtree.extractions, + stripGitlinks: self.stripGitlinks ? true : subtree.stripGitlinks ) } return subtree @@ -494,6 +516,24 @@ public struct UpdateCommand: AsyncParsableCommand { let headAfterCommit = headAfter.stdout.trimmingCharacters(in: .whitespacesAndNewlines) let subtreePullCreatedCommit = headBeforeCommit != headAfterCommit + // Detect upstream submodule gitlinks. Strip only if the user opted in via + // the CLI flag or `stripGitlinks: true` in subtree.yaml. Otherwise emit a + // non-destructive warning (Policy C). + let effectiveStripAll = self.stripGitlinks || (entry.stripGitlinks ?? false) + do { + let detected = try await GitOperations.findGitlinks(prefix: entry.prefix) + if !detected.isEmpty { + if effectiveStripAll { + let removed = try await GitOperations.stripGitlinks(prefix: entry.prefix) + print("ℹ️ Stripped \(removed.count) upstream submodule gitlink(s) from \(entry.prefix)") + } else { + GitOperations.emitGitlinkWarning(prefix: entry.prefix, gitlinks: detected, command: "update") + } + } + } catch { + print("⚠️ Failed to inspect submodule gitlinks: \(error)") + } + // Update config (commit AND tag if changed) let updatedSubtrees = config.subtrees.map { subtree in if subtree.name == entry.name { @@ -506,7 +546,8 @@ public struct UpdateCommand: AsyncParsableCommand { branch: newBranch, squash: subtree.squash, extracts: subtree.extracts, - extractions: subtree.extractions + extractions: subtree.extractions, + stripGitlinks: self.stripGitlinks ? true : subtree.stripGitlinks ) } return subtree diff --git a/Sources/SubtreeLib/Configuration/Models/SubtreeEntry.swift b/Sources/SubtreeLib/Configuration/Models/SubtreeEntry.swift index 9e21b90..ff21adf 100644 --- a/Sources/SubtreeLib/Configuration/Models/SubtreeEntry.swift +++ b/Sources/SubtreeLib/Configuration/Models/SubtreeEntry.swift @@ -40,6 +40,19 @@ public struct SubtreeEntry: Codable, Sendable { /// Defines saved file extraction configurations from subtree to project public let extractions: [ExtractionMapping]? + /// Optional flag to strip upstream submodule gitlinks (mode 160000) on add/update. + /// + /// Some upstream repositories include git submodule gitlinks (test/fuzz/interop + /// tooling, etc). When `git subtree add/pull` merges that tree, the gitlinks come + /// along verbatim. Without matching `.gitmodules` entries, downstream tooling + /// (e.g. Swift Package Index's `git submodule update --init`) fails at clone time. + /// + /// When set to `true`, the `add` and `update` commands automatically remove these + /// gitlinks from the index after each merge, folding the removal into the same + /// atomic commit. Default behavior (nil/false) is non-destructive: the CLI only + /// detects and warns when unmapped gitlinks are present. + public let stripGitlinks: Bool? + /// Initialize a subtree entry with required and optional fields public init( name: String, @@ -50,7 +63,8 @@ public struct SubtreeEntry: Codable, Sendable { branch: String? = nil, squash: Bool? = nil, extracts: [ExtractPattern]? = nil, - extractions: [ExtractionMapping]? = nil + extractions: [ExtractionMapping]? = nil, + stripGitlinks: Bool? = nil ) { self.name = name self.remote = remote @@ -61,5 +75,6 @@ public struct SubtreeEntry: Codable, Sendable { self.squash = squash self.extracts = extracts self.extractions = extractions + self.stripGitlinks = stripGitlinks } } diff --git a/Sources/SubtreeLib/Utilities/ConfigFileManager.swift b/Sources/SubtreeLib/Utilities/ConfigFileManager.swift index eae5bcd..465923e 100644 --- a/Sources/SubtreeLib/Utilities/ConfigFileManager.swift +++ b/Sources/SubtreeLib/Utilities/ConfigFileManager.swift @@ -148,7 +148,8 @@ public enum ConfigFileManager { branch: subtree.branch, squash: subtree.squash, extracts: subtree.extracts, - extractions: extractions + extractions: extractions, + stripGitlinks: subtree.stripGitlinks ) // Create new subtrees array with the updated entry diff --git a/Sources/SubtreeLib/Utilities/GitOperations.swift b/Sources/SubtreeLib/Utilities/GitOperations.swift index 131d21a..c090f1f 100644 --- a/Sources/SubtreeLib/Utilities/GitOperations.swift +++ b/Sources/SubtreeLib/Utilities/GitOperations.swift @@ -219,6 +219,81 @@ public enum GitOperations { } } + /// Find submodule gitlink (mode 160000) entries in the index under a prefix. + /// + /// Some upstream repositories include git submodule gitlinks (test/fuzz/interop + /// dependencies, etc). When `git subtree add/pull` merges that tree, the gitlinks + /// come along verbatim. Without matching `.gitmodules` entries, downstream tooling + /// (e.g. Swift Package Index's `git submodule update --init`) fails at clone time: + /// + /// fatal: No url found for submodule path '/' in .gitmodules + /// + /// This helper performs **detection only** — it does not mutate the index. Use + /// `stripGitlinks(prefix:)` to remove detected entries. + /// + /// - Parameter prefix: Repository-relative prefix to scan (e.g. "Vendor/openssl") + /// - Returns: List of gitlink paths under the prefix (empty if none) + /// - Throws: GitError.commandFailed on git invocation failure + public static func findGitlinks(prefix: String) async throws -> [String] { + let lsResult = try await run(arguments: ["ls-files", "-s", "--", prefix]) + guard lsResult.exitCode == 0 else { + throw GitError.commandFailed("git ls-files failed: \(lsResult.stderr)") + } + + // ls-files -s output: " \t" + var gitlinkPaths: [String] = [] + for line in lsResult.stdout.split(separator: "\n", omittingEmptySubsequences: true) { + guard line.hasPrefix("160000 ") else { continue } + guard let tabIndex = line.firstIndex(of: "\t") else { continue } + gitlinkPaths.append(String(line[line.index(after: tabIndex)...])) + } + + return gitlinkPaths + } + + /// Remove submodule gitlink (mode 160000) entries from the index under a prefix. + /// + /// Composes `findGitlinks(prefix:)` with `git rm --cached`. The removals are staged + /// so they can be folded into the same atomic commit that wraps the subtree add/update. + /// + /// - Parameter prefix: Repository-relative prefix to scan (e.g. "Vendor/openssl") + /// - Returns: List of removed gitlink paths (empty if none found) + /// - Throws: GitError.commandFailed on git invocation failure + @discardableResult + public static func stripGitlinks(prefix: String) async throws -> [String] { + let gitlinkPaths = try await findGitlinks(prefix: prefix) + guard !gitlinkPaths.isEmpty else { return [] } + + let rmResult = try await run(arguments: ["rm", "--cached", "-q", "--"] + gitlinkPaths) + guard rmResult.exitCode == 0 else { + throw GitError.commandFailed("git rm --cached failed: \(rmResult.stderr)") + } + + return gitlinkPaths + } + + /// Emit a Policy-C warning when unmapped gitlinks are present but the user has + /// not opted into stripping. Centralized so AddCommand and UpdateCommand stay + /// consistent. + /// + /// - Parameters: + /// - prefix: Repository-relative prefix + /// - gitlinks: Detected gitlink paths (must be non-empty) + /// - command: The CLI subcommand to suggest re-running ("add" or "update") + public static func emitGitlinkWarning(prefix: String, gitlinks: [String], command: String) { + precondition(!gitlinks.isEmpty) + let count = gitlinks.count + let preview = gitlinks.prefix(3).joined(separator: ", ") + let suffix = count > 3 ? ", … (+\(count - 3) more)" : "" + print(""" + ⚠️ Found \(count) upstream submodule gitlink(s) under \(prefix): \(preview)\(suffix) + Without a matching .gitmodules entry, consumers running `git submodule update` + (e.g. Swift Package Index) will fail to clone this repository. + • If unwanted: re-run `subtree \(command)` with `--strip-gitlinks` (persists to subtree.yaml). + • If wanted: commit a .gitmodules file mapping each path to an upstream URL. + """) + } + // T006: Git subtree pull wrapper for update operations /// Execute git subtree pull to update a subtree /// diff --git a/Tests/IntegrationTests/StripGitlinksIntegrationTests.swift b/Tests/IntegrationTests/StripGitlinksIntegrationTests.swift new file mode 100644 index 0000000..a938bcd --- /dev/null +++ b/Tests/IntegrationTests/StripGitlinksIntegrationTests.swift @@ -0,0 +1,171 @@ +import Foundation +import Testing +import Subprocess +import Yams +#if canImport(System) +import System +#else +import SystemPackage +#endif + +/// Integration tests for the gitlink-stripping feature (Policy C: detect-and-warn by default, +/// strip on opt-in via `--strip-gitlinks` or `stripGitlinks: true` in subtree.yaml). +/// +/// These tests construct a local "upstream" repository whose tree contains an unmapped +/// submodule gitlink (mode 160000) — mimicking the real-world swift-openssl/SPI scenario +/// where upstream's `.gitmodules`-bound submodules are dropped on extraction but the +/// gitlink entries remain in the tree. +@Suite("Strip Gitlinks Integration Tests") +struct StripGitlinksIntegrationTests { + + // MARK: - Helpers + + /// Run a git command in a directory and return stdout. Throws on non-zero exit. + @discardableResult + private func runGit(_ arguments: [String], in dir: FilePath) async throws -> String { + let result = try await Subprocess.run( + .name("git"), + arguments: .init(arguments), + workingDirectory: dir, + output: .string(limit: 16384), + error: .string(limit: 16384) + ) + guard case .exited(0) = result.terminationStatus else { + let stderr = result.standardError ?? "" + throw TestError.gitCommandFailed(arguments.joined(separator: " "), stderr) + } + return result.standardOutput ?? "" + } + + /// Create a local upstream repo containing one regular file plus a fake submodule + /// gitlink (mode 160000) at `lib/sub`. Returns the absolute path. + /// + /// The gitlink is injected via `git update-index --add --cacheinfo` with a dummy SHA; + /// no real submodule is required. This reproduces the exact tree shape that + /// real upstreams (e.g., OpenSSL, with `cloudflare-quiche`, `krb5`, etc.) produce. + private func createUpstreamWithGitlink() async throws -> FilePath { + let upstream = FilePath("/tmp/subtree-strip-gitlinks-upstream-\(UUID().uuidString)") + try FileManager.default.createDirectory(atPath: upstream.string, withIntermediateDirectories: true) + + try await runGit(["init", "-b", "main"], in: upstream) + try await runGit(["config", "user.name", "Test User"], in: upstream) + try await runGit(["config", "user.email", "test@example.com"], in: upstream) + try await runGit(["config", "commit.gpgsign", "false"], in: upstream) + + // Add a real file so the tree is not empty + let helloPath = upstream.appending("hello.txt") + try "upstream content\n".write(toFile: helloPath.string, atomically: true, encoding: .utf8) + try await runGit(["add", "hello.txt"], in: upstream) + + // Inject a fake gitlink. The dummy SHA is never resolved — git only validates format. + let dummyCommitSha = "0000000000000000000000000000000000000001" + try await runGit( + ["update-index", "--add", "--cacheinfo", "160000,\(dummyCommitSha),lib/sub"], + in: upstream + ) + + try await runGit(["commit", "-m", "Initial upstream tree with submodule gitlink"], in: upstream) + + return upstream + } + + /// Get the file mode (e.g., "100644", "160000") for a path in the index, or nil if absent. + private func indexMode(of path: String, in dir: FilePath) async throws -> String? { + let output = try await runGit(["ls-files", "-s", "--", path], in: dir) + let trimmed = output.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + // Format: " \t" + let firstField = trimmed.split(separator: " ", maxSplits: 1).first.map(String.init) + return firstField + } + + // MARK: - Tests + + /// Policy C default: when `--strip-gitlinks` is NOT passed and the entry has no + /// `stripGitlinks: true`, the CLI must emit a warning AND leave the gitlink in the index. + /// The user retains full control; no silent data loss. + @Test("Without --strip-gitlinks: gitlink remains in index and warning is emitted") + func warnsButDoesNotStripByDefault() async throws { + let upstream = try await createUpstreamWithGitlink() + defer { try? FileManager.default.removeItem(atPath: upstream.string) } + + let consumer = try await GitRepositoryFixture() + defer { try? consumer.tearDown() } + + let harness = TestHarness() + let initResult = try await harness.run(arguments: ["init"], workingDirectory: consumer.path) + #expect(initResult.exitCode == 0) + + let addResult = try await harness.run( + arguments: ["add", "--remote", "file://\(upstream.string)", "--name", "fake", "--ref", "main"], + workingDirectory: consumer.path + ) + #expect(addResult.exitCode == 0) + + // Warning must mention the gitlink and tell the user how to opt in. + #expect(addResult.stdout.contains("upstream submodule gitlink")) + #expect(addResult.stdout.contains("--strip-gitlinks")) + + // Gitlink must still be present in the consumer's index — Policy C is non-destructive. + let mode = try await indexMode(of: "fake/lib/sub", in: consumer.path) + #expect(mode == "160000", "Expected gitlink to remain in index without opt-in; got mode=\(mode ?? "nil")") + + // subtree.yaml must NOT have stripGitlinks set (no implicit opt-in). + let configPath = consumer.path.appending("subtree.yaml") + let configContent = try String(contentsOfFile: configPath.string, encoding: .utf8) + #expect(!configContent.contains("stripGitlinks")) + } + + /// With `--strip-gitlinks` on `add`: the gitlink is removed from the index, the + /// removal is folded into the atomic add commit, and `stripGitlinks: true` is + /// persisted to subtree.yaml so subsequent updates auto-strip. + @Test("With --strip-gitlinks: gitlink removed and config persisted") + func stripsAndPersistsWhenOptedIn() async throws { + let upstream = try await createUpstreamWithGitlink() + defer { try? FileManager.default.removeItem(atPath: upstream.string) } + + let consumer = try await GitRepositoryFixture() + defer { try? consumer.tearDown() } + + let harness = TestHarness() + let initResult = try await harness.run(arguments: ["init"], workingDirectory: consumer.path) + #expect(initResult.exitCode == 0) + + let addResult = try await harness.run( + arguments: [ + "add", + "--remote", "file://\(upstream.string)", + "--name", "fake", + "--ref", "main", + "--strip-gitlinks" + ], + workingDirectory: consumer.path + ) + #expect(addResult.exitCode == 0, "add failed: \(addResult.stderr)") + + // Stripped indicator must be in stdout. + #expect(addResult.stdout.contains("Stripped 1 upstream submodule gitlink")) + + // Gitlink must be ABSENT from the consumer's index. + let mode = try await indexMode(of: "fake/lib/sub", in: consumer.path) + #expect(mode == nil, "Expected gitlink stripped; still present with mode=\(mode ?? "nil")") + + // Removal must be part of the atomic add commit (no separate "strip gitlinks" commit). + // The HEAD commit (the amended merge commit) must show subtree.yaml as added, + // and its tree must not contain fake/lib/sub. If a separate strip commit had been + // created, HEAD's name-status would show only the deletion of fake/lib/sub, not + // the subtree.yaml addition. + let headStatus = try await runGit(["show", "--name-status", "--format=", "HEAD"], in: consumer.path) + #expect(headStatus.contains("subtree.yaml"), + "Expected HEAD commit to include subtree.yaml; got:\n\(headStatus)") + let headTree = try await runGit(["ls-tree", "-r", "HEAD"], in: consumer.path) + #expect(!headTree.contains("fake/lib/sub"), + "Expected gitlink absent from HEAD tree; got:\n\(headTree)") + + // subtree.yaml must persist `stripGitlinks: true` so future `update` runs auto-strip. + let configPath = consumer.path.appending("subtree.yaml") + let configContent = try String(contentsOfFile: configPath.string, encoding: .utf8) + #expect(configContent.contains("stripGitlinks: true"), + "Expected stripGitlinks: true in subtree.yaml; got:\n\(configContent)") + } +}