Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
git \
ca-certificates \
curl \
tini \
&& rm -rf /var/lib/apt/lists/*

# Non-root user for runtime
Expand Down Expand Up @@ -72,4 +73,11 @@ VOLUME ["/data"]
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD curl -fsS http://127.0.0.1:${GITLAWB_PORT}/health || exit 1

ENTRYPOINT ["gitlawb-node"]
# Run under tini so the node is never PID 1. As PID 1 a process must reap
# reparented orphans itself; the node does not, so git's reparented grandchildren
# (e.g. pack-objects orphaned when a served upload-pack dies on a client
# disconnect) would accumulate as zombies until fork() fails with EAGAIN (#53).
# tini reaps them and forwards SIGTERM/SIGINT to the node, which runs its own
# graceful shutdown. (No `-g`: the node manages its own children, so group-wide
# signalling would only disturb in-flight git helpers during shutdown.)
ENTRYPOINT ["tini", "--", "gitlawb-node"]
1 change: 1 addition & 0 deletions crates/gitlawb-node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ tower-http = { version = "0.6", features = ["cors", "trace", "request-id", "limi
sqlx = { version = "0.8", features = ["postgres", "runtime-tokio-rustls", "chrono", "uuid"] }
clap = { version = "4", features = ["derive", "env"] }
bytes = "1"
libc = "0.2"
cid = { workspace = true }
hex = { workspace = true }
sha2 = { workspace = true }
Expand Down
17 changes: 11 additions & 6 deletions crates/gitlawb-node/src/git/issues.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,18 @@ pub fn create_issue(repo_path: &Path, issue_id: &str, json: &str) -> Result<()>

use std::io::Write;
let mut child = hash_output;
if let Some(stdin) = child.stdin.take() {
let mut stdin = stdin;
stdin
.write_all(json.as_bytes())
.context("failed to write to git hash-object stdin")?;
}
// Write the blob to stdin, but always reap the child afterward even if the
// write fails, so a stdin-write error can't drop the Child unwaited and leak
// a zombie (#53).
let write_result = match child.stdin.take() {
Some(mut stdin) => stdin.write_all(json.as_bytes()),
None => Err(std::io::Error::new(
std::io::ErrorKind::BrokenPipe,
"git hash-object stdin unavailable",
)),
};
let output = child.wait_with_output().context("git hash-object failed")?;
write_result.context("failed to write to git hash-object stdin")?;

if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
Expand Down
212 changes: 200 additions & 12 deletions crates/gitlawb-node/src/git/smart_http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,23 +75,83 @@ pub async fn receive_pack(repo_path: &Path, request_body: Bytes) -> Result<Respo
.body(Body::from(output))?)
}

/// Sends SIGTERM to a child's whole process group on drop, unless disarmed first.
///
/// A served `git upload-pack`/`receive-pack` forks helpers such as `pack-objects`.
/// If the request future is dropped (client disconnect) or returns early, dropping
/// the tokio `Child` does not signal `git`; it lingers until EPIPE, and its
/// `pack-objects` child can reparent to PID 1 and never be reaped — a zombie that
/// accumulates until `fork()` fails with EAGAIN (#53). Spawning the child in its
/// own process group and signalling that group here tears the whole tree down at
/// the source. SIGTERM (not SIGKILL) lets `git` run its cleanup — notably removing
/// `.git/*.lock` files mid-`receive-pack` — before it exits, so an aborted push
/// can't leave a stale lock that blocks the next one. The guard is disarmed once
/// `wait_with_output()` returns, so a request that completed cleanly never signals.
#[cfg(unix)]
struct KillGroupOnDrop {
pgid: Option<i32>,
}

#[cfg(unix)]
impl KillGroupOnDrop {
fn disarm(&mut self) {
self.pgid = None;
}
}

#[cfg(unix)]
impl Drop for KillGroupOnDrop {
fn drop(&mut self) {
if let Some(pgid) = self.pgid {
// SAFETY: kill(2) takes only integer arguments and borrows no Rust
// memory. Signalling a stale group just returns ESRCH, which we ignore.
unsafe {
libc::kill(-pgid, libc::SIGTERM);
}
}
}
}

async fn run_git_service(service: &str, repo_path: &Path, input: Bytes) -> Result<Vec<u8>> {
let mut child = Command::new("git")
let mut command = Command::new("git");
command
.arg(service_to_command(service))
.arg("--stateless-rpc")
.arg(repo_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
.stderr(Stdio::piped());

// Write request body to git's stdin
if let Some(mut stdin) = child.stdin.take() {
stdin.write_all(&input).await?;
}
// Run git in its own process group so the whole tree (git + pack-objects)
// can be signalled together on disconnect rather than orphaning a grandchild.
#[cfg(unix)]
command.process_group(0);

let mut child = command.spawn()?;

// Arm the group-kill guard for the lifetime of the request. With
// process_group(0) the child is its own group leader, so pgid == its pid.
#[cfg(unix)]
let mut group_guard = KillGroupOnDrop {
pgid: child.id().map(|id| id as i32),
};

// Write the request body to git's stdin, but don't early-return on a write
// error: always reap the child first (below), so the guard only ever fires on
// an actual future-drop (client disconnect), never on a pid we just reaped.
let write_result: std::io::Result<()> = match child.stdin.take() {
Some(mut stdin) => stdin.write_all(&input).await,
None => Ok(()),
};

let output = child.wait_with_output().await?;

// Child reaped, so its group is gone: disarm before surfacing any error.
#[cfg(unix)]
group_guard.disarm();

write_result.context("failed to write to git stdin")?;

if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
bail!("{service} failed: {stderr}");
Expand Down Expand Up @@ -152,13 +212,25 @@ pub fn build_filtered_pack(repo_path: &Path, withheld: &HashSet<String>) -> Resu
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
{
// Feed the object ids on stdin, but always reap the child afterward even if
// the write fails or stdin is missing, so an error can't drop the Child
// unwaited and leak a zombie (#53).
let write_result: std::io::Result<()> = {
use std::io::Write as _;
let mut stdin = child.stdin.take().expect("stdin");
stdin.write_all(keep.join("\n").as_bytes())?;
stdin.write_all(b"\n")?;
}
match child.stdin.take() {
Some(mut stdin) => {
let mut data = keep.join("\n").into_bytes();
data.push(b'\n');
stdin.write_all(&data)
}
None => Err(std::io::Error::new(
std::io::ErrorKind::BrokenPipe,
"git pack-objects stdin unavailable",
)),
}
};
let out = child.wait_with_output()?;
write_result.context("failed to write object ids to git pack-objects stdin")?;
if !out.status.success() {
bail!(
"git pack-objects failed: {}",
Expand Down Expand Up @@ -688,4 +760,120 @@ mod tests {

server.abort();
}

// ── #53 regression: served-git process-group teardown ──────────────────
//
// run_git_service runs git in its own process group and SIGTERMs that group
// when the request future is dropped (client disconnect) or returns early, so
// git AND its pack-objects child die together instead of orphaning a zombie.
// These exercise the KillGroupOnDrop guard that wires that up.

#[cfg(unix)]
fn alive(pid: i32) -> bool {
// kill(pid, 0) probes existence: returns 0 while the pid exists, -1 once
// it's gone (ESRCH). Same-uid here, so EPERM never applies.
unsafe { libc::kill(pid, 0) == 0 }
}

#[cfg(unix)]
#[tokio::test]
async fn kill_group_guard_terminates_child_on_drop() {
let mut child = tokio::process::Command::new("sleep")
.arg("300")
.process_group(0)
.spawn()
.unwrap();
let pgid = child.id().map(|id| id as i32);

{
let _guard = KillGroupOnDrop { pgid };
} // guard drops here -> SIGTERM to the group

use std::os::unix::process::ExitStatusExt;
let status = child.wait().await.unwrap();
assert_eq!(
status.signal(),
Some(libc::SIGTERM),
"child must be terminated by SIGTERM via its process group"
);
}

#[cfg(unix)]
#[tokio::test]
async fn kill_group_guard_reaps_grandchild_on_drop() {
// The #53 scenario: git forks pack-objects. A group kill must reach the
// grandchild, not just the direct child. sh (the group leader) backgrounds
// a sleep (standing in for pack-objects) and prints its pid.
use tokio::io::AsyncReadExt;
let mut child = tokio::process::Command::new("sh")
.arg("-c")
.arg("sleep 300 & echo \"$!\"; wait")
.stdout(Stdio::piped())
.process_group(0)
.spawn()
.unwrap();
let pgid = child.id().map(|id| id as i32);

// Read the backgrounded grandchild's pid from the first stdout line.
let mut stdout = child.stdout.take().unwrap();
let mut buf = Vec::new();
loop {
let mut byte = [0u8; 1];
let n = stdout.read(&mut byte).await.unwrap();
if n == 0 || byte[0] == b'\n' {
break;
}
buf.push(byte[0]);
}
let grandchild: i32 = String::from_utf8(buf).unwrap().trim().parse().unwrap();
assert!(alive(grandchild), "grandchild should be running");

{
let _guard = KillGroupOnDrop { pgid };
} // group SIGTERM reaches sh AND the sleep grandchild

let _ = child.wait().await; // reap sh

// The grandchild reparents to init and is reaped; poll until it's gone.
let mut gone = false;
for _ in 0..200 {
if !alive(grandchild) {
gone = true;
break;
}
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}
assert!(
gone,
"grandchild must be terminated by the group signal (#53)"
);
}

#[cfg(unix)]
#[tokio::test]
async fn kill_group_guard_disarmed_does_not_kill() {
// A request that completed cleanly disarms the guard; dropping it must not
// signal anything.
let mut child = tokio::process::Command::new("sleep")
.arg("300")
.process_group(0)
.spawn()
.unwrap();

{
let mut guard = KillGroupOnDrop {
pgid: child.id().map(|id| id as i32),
};
guard.disarm();
} // disarmed -> no kill

assert!(
child.try_wait().unwrap().is_none(),
"disarmed guard must not kill the child"
);

// Clean up the still-running child.
let _ = child.kill().await;
let _ = child.wait().await;
}
}
8 changes: 8 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ services:

node:
build: .
# Reap reparented orphan processes so git's grandchildren (e.g. pack-objects
# left behind when a served clone disconnects) can't accumulate as zombies
# and exhaust the PID table (#53). The image also runs under tini, so this is
# belt-and-suspenders for the quickstart.
init: true
# Cap the process table so a regression fails loudly instead of taking the
# host down. Sits well above normal concurrent git fan-out.
pids_limit: 1024
depends_on:
postgres:
condition: service_healthy
Expand Down
Loading