Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,19 +230,35 @@ Visit [Tutorial: Create a simple pipeline (S3 bucket)](https://docs.aws.amazon.c

To use LFS you need to first install git-lfs. You can refer to the [official documentation](https://git-lfs.com/) on how to do this on your system.

Next, you need enable the S3 integration by running the following command in the repo folder:
Next, enable the S3 integration in the repo. There are two install modes:

```bash
# Per-remote (recommended; required when other LFS remotes coexist)
git-lfs-s3 install --remote <remote-name>

# Unscoped (back-compat; applies the agent to ALL remotes in the repo)
git-lfs-s3 install
```

which is a short cut for:
`--remote` writes a per-remote scoped configuration so `git-lfs-s3` only fires for that one remote — letting an S3 remote coexist with non-S3 LFS remotes (e.g. GitHub, GitLab) without breaking their LFS push/pull. Use it whenever the repo has more than one remote.

The bare `git-lfs-s3 install` form sets `lfs.standalonetransferagent` globally and is short for:

```bash
git config --add lfs.customtransfer.git-lfs-s3.path git-lfs-s3
git config --add lfs.standalonetransferagent git-lfs-s3
```

`git-lfs-s3 install --remote <name>` instead writes:

```bash
git config remote.<name>.lfsurl https://lfs-alias.git-remote-s3.test/<bucket>/<prefix>
git config lfs.<that-url>.standalonetransferagent git-lfs-s3
git config lfs.customtransfer.git-lfs-s3.path git-lfs-s3
```

The `lfs-alias.git-remote-s3.test` host is a synthetic, never-contacted match key (the `.test` TLD is reserved by RFC 6761 for non-resolvable use). It exists only because git-lfs's URL parser does not natively understand `s3://` URLs and would otherwise fall back to SSH-style endpoint discovery; setting `remote.<name>.lfsurl` short-circuits that path and gives the scoped agent lookup a stable URL to match against.

### Creating the repo and pushing

Let's assume we want to store TIFF file in LFS.
Expand All @@ -252,13 +268,13 @@ mkdir lfs-repo
cd lfs-repo
git init
git lfs install
git-lfs-s3 install
git remote add origin s3://my-git-bucket/lfs-repo
git-lfs-s3 install --remote origin
git lfs track "*.tiff"
git add .gitattributes
<put file.tiff in the repo>
git add file.tiff
git commit -a -m "my first tiff file"
git remote add origin s3://my-git-bucket/lfs-repo
git push --set-upstream origin main
```

Expand All @@ -278,7 +294,7 @@ To fix:

```bash
cd lfs-repo-clone
git-lfs-s3 install
git-lfs-s3 install --remote origin
git reset --hard main
```

Expand Down
15 changes: 15 additions & 0 deletions git_remote_s3/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,18 @@ def parse_git_url(url: str) -> tuple[UriScheme, str, str, str]:
uri_scheme = UriScheme.S3_ZIP

return uri_scheme, profile, bucket, prefix


LFS_ALIAS_HOST = "lfs-alias.git-remote-s3.test"


def synthetic_lfs_url(bucket: str, prefix: str) -> str:
"""Builds the synthetic LFS endpoint URL for a given bucket and prefix.

The URL is never contacted; it is purely a stable match key so that
``lfs.<url>.standalonetransferagent`` can be scoped per remote, and so
git-lfs's HTTPS-shaped endpoint resolution short-circuits its SSH-style
discovery for ``s3://`` URLs. The hostname uses the RFC 6761-reserved
``.test`` TLD to guarantee non-collision with any real host.
"""
return f"https://{LFS_ALIAS_HOST}/{bucket}/{prefix}"
147 changes: 136 additions & 11 deletions git_remote_s3/lfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import boto3
import threading
import os
from .common import parse_git_url
from typing import Optional
from .common import parse_git_url, synthetic_lfs_url
from .git import validate_ref_name

if "lfs" in __name__:
Expand Down Expand Up @@ -131,32 +132,156 @@ def download(self, event: dict):
sys.stdout.flush()


def install():
result = subprocess.run(
["git", "config", "--add", "lfs.customtransfer.git-lfs-s3.path", "git-lfs-s3"],
def _git_config_get(key: str) -> Optional[str]:
"""Returns the current value of a git config key, or None if unset."""
res = subprocess.run(
["git", "config", "--get", key],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if result.returncode != 0:
sys.stderr.write(result.stderr.decode("utf-8").strip())
if res.returncode != 0:
return None
return res.stdout.decode("utf-8").strip()


def _git_config_set(key: str, value: str) -> None:
"""Sets a git config key to value, replacing any existing values."""
res = subprocess.run(
["git", "config", "--replace-all", key, value],
stderr=subprocess.PIPE,
)
if res.returncode != 0:
sys.stderr.write(res.stderr.decode("utf-8").strip() + "\n")
sys.stderr.flush()
sys.exit(1)
result = subprocess.run(
["git", "config", "--add", "lfs.standalonetransferagent", "git-lfs-s3"],


def _list_git_remotes() -> list:
"""Returns the list of configured git remote names (empty on error)."""
res = subprocess.run(
["git", "remote"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if res.returncode != 0:
return []
return [r for r in res.stdout.decode("utf-8").splitlines() if r.strip()]


def _resolve_s3_remote(remote_name: str) -> tuple:
"""Validates that remote_name exists and points at an S3 URL.

Returns (bucket, prefix). Exits 1 with a clear error message otherwise.
"""
res = subprocess.run(
["git", "remote", "get-url", remote_name],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if result.returncode != 0:
sys.stderr.write(result.stderr.decode("utf-8").strip())
if res.returncode != 0:
sys.stderr.write(
f"error: remote '{remote_name}' is not configured. "
f"Add it first with: "
f"git remote add {remote_name} s3://<bucket>/<prefix>\n"
)
sys.stderr.flush()
sys.exit(1)
url = res.stdout.decode("utf-8").strip()
_, _, bucket, prefix = parse_git_url(url)
if bucket is None or prefix is None:
sys.stderr.write(
f"error: remote '{remote_name}' has URL '{url}', which is not "
f"an s3:// or s3+zip:// URL. --remote can only scope LFS "
f"configuration for S3 remotes.\n"
)
sys.stderr.flush()
sys.exit(1)
return bucket, prefix


def install(*, remote_name: Optional[str] = None) -> None:
"""Installs git-lfs-s3 as a custom transfer agent.

With remote_name=None, writes unscoped configuration that applies to
every remote in the repo (back-compat). With remote_name set, writes
per-remote scoped configuration so the agent only fires for that one
remote — required for coexistence with non-S3 LFS remotes.
"""
if remote_name is None:
_install_unscoped()
else:
_install_scoped(remote_name)


def _install_unscoped() -> None:
remotes = _list_git_remotes()
if len(remotes) > 1:
sys.stderr.write(
f"warning: multiple remotes configured ({', '.join(remotes)}); "
"'git-lfs-s3 install' writes unscoped configuration that "
"applies to ALL remotes. If any non-S3 remote uses LFS, "
"push/pull may fail. Use 'git-lfs-s3 install --remote <name>' "
"to scope to a single S3 remote.\n"
)
sys.stderr.flush()
_git_config_set("lfs.customtransfer.git-lfs-s3.path", "git-lfs-s3")
_git_config_set("lfs.standalonetransferagent", "git-lfs-s3")
sys.stdout.write("git-lfs-s3 installed\n")
sys.stdout.flush()


def _install_scoped(remote_name: str) -> None:
bucket, prefix = _resolve_s3_remote(remote_name)
lfs_url = synthetic_lfs_url(bucket, prefix)

existing_lfsurl = _git_config_get(f"remote.{remote_name}.lfsurl")
if existing_lfsurl is not None and existing_lfsurl != lfs_url:
sys.stderr.write(
f"error: remote.{remote_name}.lfsurl is already set to "
f"'{existing_lfsurl}'. git-lfs-s3 will not overwrite an "
f"existing LFS URL. If this was set in error, unset it with:\n"
f" git config --unset remote.{remote_name}.lfsurl\n"
)
sys.stderr.flush()
sys.exit(1)

if _git_config_get("lfs.standalonetransferagent") is not None:
sys.stderr.write(
"warning: lfs.standalonetransferagent is set unscoped; this "
"applies git-lfs-s3 to ALL remotes and will defeat per-remote "
"scoping. Unset it with:\n"
" git config --unset lfs.standalonetransferagent\n"
)
sys.stderr.flush()

_git_config_set("lfs.customtransfer.git-lfs-s3.path", "git-lfs-s3")
_git_config_set(f"remote.{remote_name}.lfsurl", lfs_url)
_git_config_set(f"lfs.{lfs_url}.standalonetransferagent", "git-lfs-s3")
sys.stdout.write(
f"git-lfs-s3 installed for remote '{remote_name}' " f"(LFS alias: {lfs_url})\n"
)
sys.stdout.flush()


def main(): # noqa: C901
if len(sys.argv) > 1:
if "install" == sys.argv[1]:
install()
remote_name: Optional[str] = None
args = sys.argv[2:]
i = 0
while i < len(args):
if args[i] == "--remote":
if i + 1 >= len(args):
sys.stderr.write("error: --remote requires a value\n")
sys.stderr.flush()
sys.exit(2)
remote_name = args[i + 1]
i += 2
else:
sys.stderr.write(f"error: unknown argument to install: {args[i]}\n")
sys.stderr.flush()
sys.exit(2)
install(remote_name=remote_name)
sys.exit(0)
elif "debug" == sys.argv[1]:
logger.setLevel(logging.DEBUG)
Expand Down
Loading