From 988ea2511b3599449e1497a28a1db4442ef816d9 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Fri, 12 Jun 2026 03:03:11 -0600 Subject: [PATCH 1/2] update todo --- dev/TODO.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dev/TODO.md b/dev/TODO.md index 4870fd7..5ba9ad8 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -1,5 +1,13 @@ # TODO +## In planning: file handling + observability redesign + +Full spec in [PLAN.md](./PLAN.md): `RunPaths` + `--artifacts-dir` + +`--no-files` (Track A), `EventSink` + module guest-mode logging + OTel +wide events (Track B), across src-py-lib and src-auth-perms-sync. +Not started; begin with Track A Phase A0 characterization tests on a +clean worktree off origin/main. + ## High priority: Remote trigger on demand - Sourcegraph webhook for new user coming in v7.4.0 @@ -13,18 +21,11 @@ - How do we avoid stampedes (e.g., bulk repo sync triggering thousands of re-runs)? -## High priority: Reduce worst-case full-permission sync load +## Medium priority: Reduce worst-case full-permission sync load - Use the stress-run evidence in [engineering-requests.md](./engineering-requests.md) to request Sourcegraph bulk explicit-permission read and write APIs. - 2026-06-12: presence-probe resolver internals and measured costs added - there (see "Presence-check resolver internals"); request is ready to - submit. Client-side, `set --users-without-explicit-perms` now matches - rules before probing and hydrates users in aliased batches (5,210s → - 15s on the 10k-user instance), but `get --users-without-explicit-perms` - still probes every active user — only a server-side presence/filter API - fixes that. New evidence 2026-06-10: the whole-instance apply (1,150 repo overwrites x 10,002 bindIDs each at parallelism 16) crashed the test instance's Postgres ("connection refused", "unexpected EOF"); the From af36b9e8b1e82d859e5ff89528b37b20533c35fd Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Fri, 12 Jun 2026 04:07:16 -0600 Subject: [PATCH 2/2] Resolve run paths once at the edge; add artifacts-dir, no-files, result objects Implements dev/PLAN.md Track A (file handling) and Track B consumer wiring (B5), against src-py-lib v0.3.0. - shared/backups.py: frozen RunPaths resolved once per run (resolve_run_paths); absolute paths, exclusive run-directory creation with collision suffixes; artifact_path() family naming; the path ContextVars and cwd fallbacks are gone - New flags: --artifacts-dir (root of the artifacts tree, default unchanged) and --no-files (write nothing; with --apply also requires --no-backup so giving up restore is stated explicitly twice) - get now accepts --maps-path (symmetric with set) - Org-sync snapshots renamed to saml-organizations-{before,after,diff}.json so combined set --sync-saml-orgs runs cannot collide with permission snapshots - Module API: Get/Set/Restore/SyncSamlOrgs return result dataclasses (GetResult carries the auth provider and code host dicts in memory); optional event_sink receives structured wide events; module mode never touches stdlib logging handlers (CLI mode is selected by main() only) - log.json is now OTel Logs Data Model JSONL via src-py-lib EventSink - Tests: RunPaths unit coverage incl. path-escape safety, --no-files rejection matrix, module-mode isolation tests, no-files dry-run parity with files-enabled, fixture harness routed through temp artifact dirs Verification: tests/run.py 102/102 fast, 120/120 --live against the sgdev test instance (apply + restore cycles with GraphQL read-back, wheel install smoke against PyPI src-py-lib 0.3.0) Amp-Thread-ID: https://ampcode.com/threads/T-019eba67-c19d-7166-9690-dcb2f0eed165 Co-authored-by: Amp --- AGENTS.md | 10 +- README.md | 45 +- dev/PLAN.md | 458 ++++++++++++++++++ dev/TODO.md | 16 +- pyproject.toml | 2 +- src/src_auth_perms_sync/__init__.py | 29 +- src/src_auth_perms_sync/cli.py | 336 +++++++++---- src/src_auth_perms_sync/orgs/sync.py | 89 ++-- .../permissions/command.py | 260 +++++----- .../permissions/full_set.py | 274 ++++------- .../permissions/restore.py | 239 ++++----- .../permissions/workflow.py | 77 +-- src/src_auth_perms_sync/shared/backups.py | 155 +++--- src/src_auth_perms_sync/shared/run_context.py | 12 +- tests/e2e/case_runner.py | 73 ++- tests/e2e/test_local_cases.py | 25 + tests/integration/test_cli_entrypoint.py | 26 +- tests/run.py | 76 +-- tests/unit/test_backups.py | 318 ++++++++++-- tests/unit/test_cli_config.py | 232 ++++++--- tests/unit/test_command_additive.py | 78 +-- tests/unit/test_command_files.py | 241 +++++++++ tests/unit/test_module_api.py | 119 +++++ tests/unit/test_snapshot.py | 27 +- uv.lock | 8 +- 25 files changed, 2273 insertions(+), 952 deletions(-) create mode 100644 dev/PLAN.md create mode 100644 tests/unit/test_command_files.py create mode 100644 tests/unit/test_module_api.py diff --git a/AGENTS.md b/AGENTS.md index ca79da4..a51e99d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -201,10 +201,16 @@ organization sync maps SAML groups to Sourcegraph org membership. Read CLI lives in `src/src_auth_perms_sync/`; invoke with `uv run src-auth-perms-sync`. Strict pyright covers the package. Root modules are entrypoints only: -- `cli.py` — `main()`, arg parsing, owns the CLI description. +- `cli.py` — `main()`, arg parsing, owns the CLI description. Module + wrappers (`Get`/`Set`/`Restore`/`SyncSamlOrgs`) return result dataclasses + and never install logging handlers; only `main()` runs CLI-mode logging. - `shared/` — cross-workflow helpers: Sourcegraph auth-provider/user list helpers, shared GraphQL operations and TypedDicts, site-config validation, - and SAML group parsing. + and SAML group parsing. `shared/backups.py` defines `RunPaths`: every + filesystem path for one run, resolved once at the edge + (`resolve_run_paths`) and threaded explicitly — never recompute paths + from cwd or globals below the edge, and honor `run_paths.write_files` + (False under `--no-files`) before any disk write. Business workflows live in packages: diff --git a/README.md b/README.md index 49fbfdd..914d631 100644 --- a/README.md +++ b/README.md @@ -137,14 +137,44 @@ config = src.Config( apply=False, # Dry run (default), set to True to make changes ) -succeeded = src.Set(config) +result = src.Set(config) # truthy on success; result.paths has run artifacts + +# Discovery returns the auth provider and code host data in memory, so you +# can assemble mapping rules without re-parsing the generated YAML files: +get_result = src.Get(config) +for provider in get_result.auth_providers: + ... +for code_host in get_result.code_hosts: + ... # Other command wrappers: -# succeeded = src.Get(config) -# succeeded = src.Restore(config) -# succeeded = src.SyncSamlOrgs(config) +# result = src.Restore(config) +# result = src.SyncSamlOrgs(config) +``` + +Module mode never touches your `logging` handlers or the root logger — your +application's logging config stays in charge. To see progress messages: + +```python +import logging + +logging.basicConfig(level=logging.INFO) # or your own handlers +logging.getLogger("src_auth_perms_sync").setLevel(logging.INFO) +logging.getLogger("src_py_lib").setLevel(logging.INFO) ``` +To receive structured wide events programmatically, pass an event sink: + +```python +events = src.InMemoryEventSink() +src.Get(config, event_sink=events) # or src.CallbackEventSink(my_function) +``` + +To run fully disk-free (no generated YAML, snapshots, or log file), set +`no_files=True`. Combined with `apply=True` this also requires +`no_backup=True`, because skipping files gives up the before/after +snapshots that make `--apply` reversible. + ## Inputs - Environment variables (CLI), or src.Config args (Python import) @@ -154,7 +184,12 @@ succeeded = src.Set(config) - YAML maps file - By default: `src-auth-perms-sync-runs//maps.yaml` - - Or pass `--maps-path ./path/to/maps.yaml` + - Or pass `--maps-path ./path/to/maps.yaml` (works for both `get` and `set`, + so the maps file can live outside the generated artifacts tree) + - `--artifacts-dir DIR` moves the whole artifacts tree (generated YAML, + snapshots, logs); the default is `./src-auth-perms-sync-runs` + - `--no-files` writes nothing to disk; with `--apply` it also requires + `--no-backup` - A list of mapping rules - Each mapping rule takes - A map of filters for users diff --git a/dev/PLAN.md b/dev/PLAN.md new file mode 100644 index 0000000..d7eb5de --- /dev/null +++ b/dev/PLAN.md @@ -0,0 +1,458 @@ +# PLAN: file handling + observability redesign + +> **Status (2026-06-12): implemented**, except Track A Phase A4 +> (in-memory mapping rules for `Set`), which the plan marks optional — +> tracked in [TODO.md](./TODO.md). Shipped as src-py-lib v0.3.0 and the +> src-auth-perms-sync `refactor-logging-and-files` PR. Phases were +> compressed into one PR per repo (no ContextVar bridge phase was +> needed); everything else landed as specified. + +Spans both repos (we own both, both greenfield, no external users): + +- `src-py-lib` — reusable plumbing (logging, events, OTel, HTTP, config) +- `src-auth-perms-sync` — consumer; runs as a CLI and as an importable + Python module (`src.Get(config)` etc.) + +Two tracks. Track A fixes how files are handled. Track B fixes how logs and +structured events are handled. They meet where the JSONL event sink takes its +path from `RunPaths`. + +## Goals + +1. Module-import customers get data back in memory, control over (or freedom + from) disk writes, and their own `logging` config left untouched. +2. CLI customers get explicit control over where files land, with default + paths unchanged. +3. Structured events become first-class, OTel-standard wide events instead of + stowaways on the stdlib logging pipe. +4. Net code reduction: delete the path ContextVars, the logging demux filters, + and the magic record attributes. + +## Current state (verified in code, this thread) + +### Files written today + +All output roots at `Path.cwd()/src-auth-perms-sync-runs//` via +`backups.endpoint_artifacts_directory()`. Three species share that tree: + +| Species | Files | Nature | +| --- | --- | --- | +| Human-edited input | `maps.yaml` | `get` creates empty if missing; `set` reads | +| Regenerated reference | `code-hosts.yaml`, `auth-providers.yaml` | overwritten by every `get` | +| Append-only audit | `runs/-/{before,after,diff}.json` + copies | gate reversibility | + +The audit run directory also carries a `maps.yaml` copy and `log.json`. + +Path plumbing: `cli.py:_run_or_raise` computes a run directory, then sets two +ContextVars (`backups.run_artifacts_context`). Deep code in +`permissions/restore.py`, `permissions/full_set.py`, `orgs/sync.py` calls +`backups.backup_timestamp()` / `backups.backup_path(name, ts, endpoint, cmd, +state)`, which read the ContextVars or silently recompute from cwd. + +### What `--no-backup` does and does not do + +`--no-backup` skips only the reversibility artifacts (before/after snapshots, +maps.yaml backup copy). Even with it set, the tool still writes: + +- `maps.yaml` creation (`run_get`, unconditional) +- `code-hosts.yaml` / `auth-providers.yaml` (`cmd_get`, unconditional) +- `log.json` (`_run_or_raise` always passes a log file path) +- dry-run snapshots/diffs — `full_set.py` gates on + `if not (dry_run or do_backup)`, so dry runs write regardless, because the + diff is the dry run's output + +### Logging today + +- Two record species ride one stdlib logging pipe: human messages + (`log.info(...)` on module loggers) and structured events + (`span()`/`log_event()`), the latter with message `event=%s` and the payload + smuggled in record attrs `_src_py_lib_structured_event` / + `_src_py_lib_structured_fields`, emitted on the ROOT logger + (`logger_name=""` default). +- Demux at sinks: terminal handler carries `_DropStructuredEvents`; + `JSONLogFileHandler` knows the magic attrs and renders both species into + `log.json` (human records become `{"event": "log", ...}`). +- OTel spans open in parallel inside `span()`; OTel SDK is a hard dependency. + +### Bugs / hazards found during investigation + +1. **Module import nukes customer logging.** `configure_logging()` runs on + every `src.Get(config)` call and, with `logger_name=""`, clears the ROOT + logger's handlers and sets `propagate = False`. A host application's + logging config is destroyed. +2. **Snapshot filename collision risk.** In a combined `set + --sync-saml-orgs` run, permission snapshots and org snapshots both resolve + to `before.json` in the same run directory (`backup_path` ignores + `source_name` when `state` is given). +3. **Run-directory collision.** Timestamps are second-precision; concurrent + module calls or rapid CLI invocations can collide. +4. **HTTP metric reset misplaced.** `reset_observability_metrics()` lives in + `configure_logging()`; skip handler config and a long-running script's + second run reports cumulative counters. +5. **Audit events ride logger levels.** A host that sets root level WARNING + silently drops our info-level audit events. + +## Hard invariants (do not break; see AGENTS.md) + +All four AGENTS.md invariants apply. The one this plan touches most is +invariant 3 — snapshots gate reversibility: `--apply` defaults to +before/after snapshots; `--no-backup` stays an explicit escape hatch, never +the default. + +New corollary adopted in this plan: any flag combination that removes the +undo path must be stated explicitly twice (`--no-files` + `--apply` requires +`--no-backup`). + +--- + +## Track A: file handling (`RunPaths`) + +### Design decisions + +- **One knob**: `artifacts_dir` Config field, CLI `--artifacts-dir`, env + `SRC_AUTH_PERMS_SYNC_ARTIFACTS_DIR`. Semantics: the directory that contains + endpoint subdirectories. Default `./src-auth-perms-sync-runs` — existing + default paths do not move. Resolve to absolute once at startup + (`Path.resolve(strict=False)`) so later `os.chdir()` in a host script + cannot redirect writes. No XDG / platformdirs: these are audit artifacts in + secured offline environments; explicit operator paths beat OS conventions. +- **One value object**: frozen `RunPaths`, built once at the edge + (`_run_or_raise`) by `resolve_run_paths(config, endpoint, command)`: + + ```python + @dataclass(frozen=True) + class RunPaths: + timestamp: str + artifacts_dir: Path # resolved artifacts root + endpoint_directory: Path # artifacts_dir / + maps_path: Path # respects --maps-path override + code_hosts_path: Path + auth_providers_path: Path + run_directory: Path # endpoint_directory / runs / - + log_path: Path + ``` + + Threaded explicitly down `run_command` → `cmd_*` → workflow helpers. Deep + code receives concrete `Path`s; nothing recomputes from + `(timestamp, endpoint, command)`. +- **`maps.yaml` default stays endpoint-scoped.** Endpoint scoping prevents + applying one instance's mappings to another; a silent default move creates + two plausible maps files. Instead make `--maps-path` symmetric: `get` gains + the flag (today only `set` has it), so `get --maps-path ./maps.yaml` + creates/leaves that file and `set --maps-path ./maps.yaml` reads it. +- **Run directories are created exclusively**; on collision append a numeric + suffix (`...-set-apply-2`). Never overwrite run artifacts. +- **Artifact families are named.** Permission snapshots keep `before.json` / + `after.json` / `diff.json`; org-sync snapshots become + `saml-organizations-before.json` etc., so combined runs cannot collide. +- **Writability preflight.** Apply-mode commands resolve paths, create the + run directory, and verify the snapshot destination is writable BEFORE the + first mutation. Never mutate first and discover backups can't be written. +- **`no_files` knob** (CLI `--no-files`, Config field): suppress all disk + output. Distinct axis from `no_backup` (reversibility) — keep both flags. + - Guardrail 1: `no_files=True` + `apply=True` is a config error unless + `no_backup=True` is also set (give up the undo button explicitly, twice). + - Guardrail 2: `no_files` governs output only; `set` still reads its + `maps_path` input (until Track A Phase 4 in-memory maps). + - `no_files` implies `log_file=None` (lib already supports no file handler) + and no JSONL event sink. + - For CLI dry runs `--no-files` discards `diff.json` — operator's explicit + choice; the real audience is module customers, but offer uniformly. +- **Result objects.** Command wrappers return dataclasses; `__bool__` returns + `succeeded` so existing `if src.Get(config):` keeps working. Note + `__bool__` is only partial compat (`Get(c) is True` breaks); acceptable — + no external users yet. `GetResult` carries the same dicts dumped to YAML + (`cmd_get` already has them in memory; thread out via + `run_context.CommandData`): + + ```python + @dataclass(frozen=True) + class GetResult: + succeeded: bool + auth_providers: list[dict[str, Any]] # as written to auth-providers.yaml + code_hosts: list[dict[str, Any]] # as written to code-hosts.yaml + maps_path: Path + maps_created: bool + paths: RunPaths | None + + def __bool__(self) -> bool: ... + ``` + + Same pattern for `SetResult` (planned/applied diff), `RestoreResult`, + `SyncSamlOrgsResult`. Export the path helpers (`default_maps_path`, + `endpoint_artifacts_directory`) from the package `__init__`. + +### Track A phases (each ships independently) + +- **A0 — characterization tests first.** Pin current behavior before + touching paths: default `get` output locations; default `set` maps path; + relative `--maps-path` is cwd-relative; `--no-backup` opt-in only; apply + fails before mutation when artifact paths are unwritable; combined + `set --sync-saml-orgs` snapshot naming (exposes the collision). +- **A1 — `artifacts_dir` + `RunPaths` at the edge.** Add the config field, + `resolve_run_paths()`, exclusive run-dir creation, `no_files` knob with + guardrails, `--maps-path` on `get`. Keep the two ContextVars as a + temporary bridge (`run_artifacts_context(paths.run_directory, ...)`). QoL + lands immediately; refactor risk stays low. +- **A2 — thread `RunPaths` down; delete the ContextVars.** Change + signatures: `run_command(..., paths)` → `run_get/run_set/run_restore/ + run_sync_saml_organizations(..., paths)` → workflow helpers take concrete + paths (`write_maps_backup(paths)`, `write_snapshot_pair(paths, ...)`). + Apply artifact-family naming. When no deep code calls `backup_path()` / + `backup_timestamp()`, delete `run_artifacts_context`, both ContextVars, + and the cwd fallback. Net code reduction. +- **A3 — result dataclasses + exported helpers.** As specified above. CLI + ignores the result; module callers stop re-parsing YAML the library just + wrote. +- **A4 — in-memory mapping rules for `Set` (optional, later).** Module + customers pass parsed mapping rules instead of a maps file; the full + get → assemble → dry-run set loop never touches disk. Snapshots still go + to disk on `--apply` unless `no_files` + `no_backup` are both explicit. + +--- + +## Track B: observability (`EventSink` + OTel wide events) + +### Design decisions: three concerns, three channels + +```text +Human messages Structured events Tracing +log.info("Wrote %s") span() / log_event() OTel spans + │ │ │ + ▼ ▼ │ +stdlib logging EventSink (explicit, per run) │ +module loggers only ├ JSONLEventSink → log.json │ + │ ├ InMemoryEventSink (tests, │ + │ │ module customers) │ + │ ├ CallbackEventSink │ + │ ├ OtelLogsSink (--otel) │ + │ └ NullEventSink (default) │ +CLI mode only: ▲ │ +terminal handler EventBridgeHandler forwards human │ +on package loggers, log records INTO the sink so │ +never root log.json keeps everything │ +``` + +- **Bridge direction reversed.** Today rich events are flattened onto the + logging pipe (magic attrs) and unpacked at the file handler. Instead: + events go to the sink directly as dicts; ONE `EventBridgeHandler` + (CLI-installed) wraps lossy human log records into `{"event_name": "log"}` + events. The adapter direction matches data richness. +- **Entrypoint is the mode signal — no `manage_logging` knob.** + `main()` runs CLI mode (terminal handler + bridge + JSONL sink, handlers + attached only to `("src_auth_perms_sync", "src_py_lib")`, never root, no + `handlers.clear()`, no `propagate` changes on host loggers). + `Get/Set/Restore/SyncSamlOrgs` run guest mode: no handler changes + anywhere, `NullEventSink` default, optional caller-provided sink. Standard + etiquette `logging.NullHandler()` on the package loggers. +- **Sink resolution via ContextVar (`EventRuntime`).** One deliberate + exception to the "kill ContextVars" rule: the event runtime + (`run`, `sink`, `min_level`, `base_fields`) is genuinely ambient execution + context, like the current OTel span. Guardrails: only + `observability_context()` sets it; default is `NullEventSink`; thread + pools keep using `contextvars.copy_context()` via + `submit_with_log_context`; no import-time side effects. + (`RunPaths` stays explicitly threaded — paths are not ambient.) +- **`span()` / `log_event()` / `event()` / `stage()` signatures unchanged.** + Only the backend moves; consumer call sites in src-auth-perms-sync are + untouched. +- **Split `logging_context()`** (currently does too much) into: + - `observability_context(name, *, sink, run, run_fields, open_telemetry, + resource_sample_interval_seconds, ...)` — sets the EventRuntime, OTel + only if explicitly requested, run start/end events, resource sampler, + metric reset (moved here from `configure_logging`), sink lifecycle. + - `cli_logging_handlers(*, sink, run, logger_names, terminal_level, + audit_log_level)` — adds/removes only its own handlers. +- **Teardown ordering** (baked into one context manager): stop resource + sampler → wait for worker threads → emit run-end event → flush/close JSONL + sink → force-flush OTel → remove CLI handlers. Run-end event must land + even on `SystemExit` / exceptions; OTel flush failure must not prevent + JSON flush; no logging from inside sink error handling (recursion). +- **OTel global provider hygiene.** Never configure OTel in guest mode + unless the caller asks; if the host already configured OTel, `span()` + uses it. Add an `owned` flag to `OpenTelemetryRuntime`; only force-flush + providers this run configured. +- **Level policy.** Audit completeness is governed by the sink, not logger + levels: `log.json` defaults to complete (debug); `--quiet` affects + terminal only. Lifecycle events (run start/end, startup config snapshot, + errors) are always emitted. +- **Thread safety.** JSONL sink locks around writes; copy event dicts before + customer callbacks; worker pools complete inside the observability + context; document that `ProcessPoolExecutor` does not inherit context. + +### OTel standards adoption (decided inside B1/B2, not a separate phase) + +- **Event schema = OTel Logs Data Model field names:** + + | Today | OTel standard | + | --- | --- | + | `ts` (string) | `time_unix_nano` | + | `level` | `severity_text` + `severity_number` (INFO=9, WARN=13, ERROR=17) | + | `event` key | `event_name` | + | `message` | `body` | + | `trace` / `span` / `parent_span` | `trace_id` / `span_id` (hex from our OTel spans) | + | other fields | `attributes` | + | run-level fields | `resource` attributes, stamped once per run | + +- **Semantic conventions for attribute names**, pinned in one + `semconv.py` constants module (semconv is versioned and partly + experimental; an upstream rename must be a one-file change): + `error_type` → `error.type`; pid → `process.pid`; Python version → + `process.runtime.version`; `service.name` / `service.version`; HTTP + run-summary counters aligned to `http.client.*`. App-specific fields get + our own namespace so they can never collide: `sync.command`, + `sync.endpoint`, `sync.run.id`, `sync.parallelism`, etc. +- **Wide-event discipline: one event per unit of work.** The span-END event + is THE wide event (the codebase already accumulates attributes onto + `cmd_event[...]` during work — keep that pattern). Demote span-START + events to debug; demote run-start once `startup_event` carries the config + snapshot. `log.json` gets ~half the lines and every line is worth reading. +- **File format: flat JSONL with OTel field names** (human-greppable, + audit-friendly, one trivial transform from OTLP). NOT per-line OTLP/JSON + envelopes (`resourceLogs[...]` — three envelopes deep, miserable to audit + by eye). True-OTLP interop is a separate sink: when `--otel` is on, + `OtelLogsSink` emits wide events through the OTel Logs SDK + (`LoggerProvider` → OTLP/HTTP) — already a dependency. Offline customers + keep the readable file; connected ones get standard wire logs correlated + with our traces via `trace_id`/`span_id`. + +### Custom logic carry-over inventory (audited 2026-06-12) + +Existing custom logic in `src_py_lib/utils/logging.py` and its fate. Most +survives because `span()` / `log_event()` / context machinery keep their +signatures; four items live in code Track B deletes and MUST be carried over +explicitly: + +1. **HTTP header secret redaction** (`_http_headers` + + `SECRET_FIELD_FRAGMENTS`): scrubs authorization/cookie/token headers + before they reach `log.json`. Lives in the handler path B6 deletes. Move + the redaction (and `_is_sensitive_log_field`) into the bridge/mining path. + Losing this writes bearer tokens to disk during wire debugging. +2. **httpcore/httpx wire-debug mining** (`_structured_log_fields`): + `ast.literal_eval`s httpcore debug messages into structured + `status_code` / `http_version` / redacted-header fields; demotes httpx + "HTTP Request:" lines to debug. The bridge attaches to package loggers + only, so httpx/httpcore records never reach the sink — the opt-in + wire-debug mode (`suppress_http_dependency_logs=False`) would silently + die. Carry-over: when suppression is disabled, also attach the bridge to + `("httpx", "httpcore")` and keep the mining + redaction in the bridge. +3. **Log-file retention** (`_prune_old_log_files`, `retain_log_files`): + inert for src-auth-perms-sync (per-run directories don't match the + prune glob) but active for other src-py-lib consumers using the default + `logs/` dir. Keep pruning where the sink's file is created + (`logs_dir`-style construction helper), not in the sink itself. +4. **`exc_info` traceback formatting and field ordering** + (`_ordered_payload`, `LOG_FIELD_ORDER`): move traceback rendering into + `EventBridgeHandler`; move ordering into the JSONL sink, updated to the + OTel field names (`time_unix_nano`, `severity_text`, `event_name`, + `trace_id`, `span_id` first, attributes alphabetical). + +Verified safe without action (mechanism unchanged by the plan): +`log_context`/`stage` inherited fields; `submit_with_log_context`; +trace-field stamping; HTTP metric counters wired into `HTTPClient`; +`ResourceSampler` (emits via `log_event`, so it follows the sink); +`startup_event` git hash + `sanitized_config_snapshot` (incl. `op://` → +"reference" secret-state detection); run-end `SystemExit(0)`/exit-code +semantics; `-v/-q/-s` alias validation and level parsing; OTel span +attributes/status and traceparent propagation for `fetch_sg_traces`. + +`_DropHTTPDependencyLogs` on the terminal handler becomes unnecessary by +construction (handlers sit on package loggers, so httpx/httpcore noise +never reaches them) — delete it; behavior is preserved. + +### Track B steps (lib first, consumer second) + +- **B1 — sinks beside the old code (src-py-lib).** `EventSink` protocol; + `NullEventSink`, `JSONLEventSink` (locked writes, flush/close), + `CompositeEventSink`, `InMemoryEventSink`, `CallbackEventSink`; + `EventRuntime` ContextVar; `semconv.py`; tests for JSONL output shape + (OTel field names, severity numbers, resource stamping). +- **B2 — move `log_event()` off stdlib logging (src-py-lib).** Build the + OTel-shaped payload, emit to the current sink, keep OTel span-event + emission; delete the `LogRecord.extra` magic-attr path. Apply wide-event + discipline (start events → debug). +- **B3 — `EventBridgeHandler` (src-py-lib).** Human records → sink as + `event_name="log"` events. CLI-installed only, attached to + `("src_auth_perms_sync", "src_py_lib")`. +- **B4 — split `logging_context()` (src-py-lib).** Into + `observability_context()` and `cli_logging_handlers()` as above; move + `reset_observability_metrics()` into `observability_context()`; OTel + `owned` flag. +- **B5 — consumer wiring (src-auth-perms-sync).** `_run_or_raise(..., + cli_mode: bool)`; `main()` passes `cli_mode=True` (JSONL sink at + `RunPaths.log_path`, terminal + bridge handlers); `Get/Set/Restore/ + SyncSamlOrgs` pass `cli_mode=False` (guest mode; optional `event_sink` + parameter for customers who want events programmatically). README gains a + module-logging snippet (root WARNING default means a long sync looks + silent — show `logging.getLogger("src_auth_perms_sync")` setup). +- **B6 — delete the old demux (src-py-lib).** Remove + `_DropStructuredEvents`, `_STRUCTURED_EVENT_ATTR`, + `_STRUCTURED_FIELDS_ATTR`, the structured branch in `JSONLogFileHandler` + (or the whole handler, replaced by the sink), and the destructive root + `handlers.clear()`. Regression tests: CLI produces `log.json`; + `log.info()` appears as `event_name="log"`; structured events appear with + full attributes; module `Get(config)` does not mutate root handlers or + levels; thread-pool events retain run/context fields; run-end event + appears on exceptions; second module run in one process reports per-run + (not cumulative) HTTP counters. + +--- + +## Track interaction points + +- `JSONLEventSink` path comes from `RunPaths.log_path` (A1 must land before + B5's consumer wiring, or B5 temporarily uses the current log path helper). +- `no_files` (A1) disables the JSONL sink and the log file; guest mode (B5) + defaults to no file output anyway — `no_files` mainly matters for CLI and + for module callers who set `artifacts_dir`. +- Result dataclasses (A3) and `InMemoryEventSink` / `CallbackEventSink` (B1) + together complete the module story: data back in memory, events back in + memory, zero disk unless asked (and snapshots on `--apply`, always, unless + doubly opted out). + +## Verification + +- `uv run tests/run.py` (lint, format, pyright, unit + fixture tests, CLI + rejection matrix, randomized permission invariants) after every phase. +- `uv run tests/run.py --live` for mutation-path phases (A2 especially), with + before/after snapshot inspection under `src-auth-perms-sync-runs/`. +- A0 characterization tests are the safety net for every later phase: default + paths must not move. +- New fixture cases for: `--artifacts-dir`, `get --maps-path`, `--no-files` + rejection matrix (`--no-files --apply` without `--no-backup` is an error), + combined-run snapshot naming, log.json schema goldens. +- Per AGENTS.md: dry-run against the .env test instance and read the planned + changes before any `--apply`; inspect snapshots afterward. + +## Explicitly out of scope (revisit triggers noted) + +- **Moving the `maps.yaml` default** out of the artifacts tree — revisit only + if compatibility constraints loosen; `--maps-path` symmetry covers the need. +- **XDG / platformdirs** — explicit flag + env is correct for secured offline + environments. +- **`QueueEventSink` / async writer thread** — only if event volume, slow + customer callbacks, or OTLP backpressure measurably hurt sync runs. +- **Artifact manifest / `ArtifactStore` abstraction** — only if artifact + families multiply beyond the current three. +- **Strict per-line OTLP/JSON file format** — only if a customer needs + collector `otlpjsonfile` ingestion of `log.json` as-is; ship as a sink + variant, not a default. + +## Sequencing summary + +```text +A0 characterization tests +A1 artifacts_dir + RunPaths (bridge) + no_files + get --maps-path +B1 sinks + OTel schema + semconv (src-py-lib, parallel with A1/A2) +B2 log_event → sink, wide events +A2 thread RunPaths, delete ContextVars +B3 bridge handler +B4 observability_context split +B5 consumer cli_mode wiring (needs A1 for RunPaths.log_path) +A3 result dataclasses + exports +B6 delete demux + regression tests +A4 in-memory maps for Set (optional, last) +``` + +Each step lands as its own PR from a clean worktree off `origin/main`, lint +and tests green, docs updated. diff --git a/dev/TODO.md b/dev/TODO.md index 5ba9ad8..50f0aa0 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -1,12 +1,14 @@ # TODO -## In planning: file handling + observability redesign - -Full spec in [PLAN.md](./PLAN.md): `RunPaths` + `--artifacts-dir` + -`--no-files` (Track A), `EventSink` + module guest-mode logging + OTel -wide events (Track B), across src-py-lib and src-auth-perms-sync. -Not started; begin with Track A Phase A0 characterization tests on a -clean worktree off origin/main. +## Follow-up: in-memory mapping rules for Set (PLAN.md Track A Phase A4) + +The rest of [PLAN.md](./PLAN.md) is implemented (src-py-lib v0.3.0 + +the consumer refactor-logging-and-files PR). The one deliberately +deferred piece, marked optional in the plan: let module callers pass +parsed mapping rules to `Set` instead of a maps file, so the full +get → assemble → dry-run loop never touches disk. Snapshots must stay +on disk for `--apply` (reversibility invariant); `no_files` + `apply` +must keep requiring `no_backup`. ## High priority: Remote trigger on demand diff --git a/pyproject.toml b/pyproject.toml index acb364f..b65b469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ dependencies = [ "json5>=0.14.0", "pyyaml>=6.0.3", - "src-py-lib[otel]==0.2.1", + "src-py-lib[otel]==0.3.0", ] keywords = [ "Sourcegraph" diff --git a/src/src_auth_perms_sync/__init__.py b/src/src_auth_perms_sync/__init__.py index 8b16d8e..ba228f8 100644 --- a/src/src_auth_perms_sync/__init__.py +++ b/src/src_auth_perms_sync/__init__.py @@ -1,11 +1,36 @@ -"""Importable API for src-auth-perms-sync.""" +"""Importable API for src-auth-perms-sync. -from .cli import Config, Get, Restore, Set, SyncSamlOrgs +Module-mode commands never touch stdlib logging handlers; configure your own +`logging` handlers and levels (e.g. on the `src_auth_perms_sync` logger) to +see progress messages. Pass an `EventSink` (re-exported from `src_py_lib`) +to receive structured wide events programmatically. +""" + +from src_py_lib import ( + CallbackEventSink, + CompositeEventSink, + EventSink, + InMemoryEventSink, + JSONLEventSink, + NullEventSink, +) + +from .cli import CommandResult, Config, Get, GetResult, Restore, Set, SyncSamlOrgs +from .shared.backups import RunPaths __all__ = [ + "CallbackEventSink", + "CommandResult", + "CompositeEventSink", "Config", + "EventSink", "Get", + "GetResult", + "InMemoryEventSink", + "JSONLEventSink", + "NullEventSink", "Restore", + "RunPaths", "Set", "SyncSamlOrgs", ] diff --git a/src/src_auth_perms_sync/cli.py b/src/src_auth_perms_sync/cli.py index a59ef4c..82f9595 100644 --- a/src/src_auth_perms_sync/cli.py +++ b/src/src_auth_perms_sync/cli.py @@ -10,14 +10,16 @@ from __future__ import annotations import argparse +import contextlib +import dataclasses +import importlib.metadata import logging -import os import sys from collections.abc import Sequence from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path -from typing import Literal, NoReturn, TypeAlias, cast +from typing import Any, Literal, NoReturn, TypeAlias, cast import src_py_lib as src from src_py_lib.utils import config as config_utils @@ -32,7 +34,6 @@ CommandName: TypeAlias = Literal["get", "set", "restore", "sync_saml_orgs"] -DEFAULT_MAPS_FILE_NAME = "maps.yaml" COMMON_CONFIG_FIELDS_BEFORE = src.config_field_names( src.SourcegraphClientConfig, ) @@ -47,6 +48,7 @@ ) GET_CONFIG_FIELDS = src.config_field_names( *COMMON_CONFIG_FIELDS_BEFORE, + "maps_path", "users", "users_without_explicit_perms", "created_after", @@ -54,6 +56,8 @@ "repos_without_explicit_perms", "repos_created_after", "no_backup", + "artifacts_dir", + "no_files", "explicit_permissions_batch_size", *COMMON_CONFIG_FIELDS_AFTER, ) @@ -70,6 +74,8 @@ "sync_saml_organizations", "apply", "no_backup", + "artifacts_dir", + "no_files", "explicit_permissions_batch_size", *COMMON_CONFIG_FIELDS_AFTER, ) @@ -78,6 +84,8 @@ "restore_path", "apply", "no_backup", + "artifacts_dir", + "no_files", "explicit_permissions_batch_size", *COMMON_CONFIG_FIELDS_AFTER, ) @@ -85,6 +93,8 @@ *COMMON_CONFIG_FIELDS_BEFORE, "apply", "no_backup", + "artifacts_dir", + "no_files", "parallelism", *COMMON_CONFIG_FIELDS_AFTER, ) @@ -194,12 +204,35 @@ class Config(src.SourcegraphClientConfig, src.LoggingConfig, src.OpenTelemetryCo cli_flag="--maps-path", metavar="FILE", help=( - "Maps YAML file for the set command\n" - "(default: ./src-auth-perms-sync-runs//maps.yaml)\n" + "Maps YAML file for the get and set commands\n" + "(default: //maps.yaml)\n" "Relative paths are resolved from the current working directory" ), help_group="Permission sync", ) + artifacts_dir: Path | None = src.config_field( + default=None, + env_var="SRC_AUTH_PERMS_SYNC_ARTIFACTS_DIR", + cli_flag="--artifacts-dir", + metavar="DIR", + help=( + "Directory containing per-endpoint artifact directories\n" + "(default: ./src-auth-perms-sync-runs)\n" + "Relative paths are resolved from the current working directory" + ), + help_group="Artifacts", + ) + no_files: bool = src.config_field( + default=False, + env_var="SRC_AUTH_PERMS_SYNC_NO_FILES", + cli_flag="--no-files", + cli_action="store_true", + help=( + "Write nothing to disk: no generated YAML, snapshots, or log file\n" + "With --apply, also requires --no-backup (explicitly giving up restore)" + ), + help_group="Artifacts", + ) restore_path: Path | None = src.config_field( default=None, env_var="SRC_AUTH_PERMS_SYNC_RESTORE_PATH", @@ -409,6 +442,14 @@ def validate_command_options(command_name: CommandName, config: Config) -> None: config_error("restore requires --restore-path") if config.restore_path is not None and command_name != "restore": config_error("--restore-path requires the restore command") + if config.maps_path is not None and command_name not in {"get", "set"}: + config_error("--maps-path requires the get or set command") + if config.no_files and config.apply and not config.no_backup: + config_error( + "--no-files with --apply also requires --no-backup: without files there " + "are no before/after snapshots, so say explicitly that you are giving " + "up the restore path" + ) def validate_user_filter_selection(command_name: CommandName, config: Config) -> None: @@ -631,18 +672,6 @@ def load_cli(argv: Sequence[str] | None = None) -> CliInput: return CliInput(command_name=command_name, config=config) -def default_maps_path(endpoint: str) -> Path: - """Return the generated maps path for a Sourcegraph endpoint.""" - return backups.endpoint_artifacts_directory(endpoint) / DEFAULT_MAPS_FILE_NAME - - -def config_with_default_paths(command_name: CommandName, config: Config, endpoint: str) -> Config: - """Return config with omitted file paths filled from generated defaults.""" - if command_name != "set" or config.maps_path is not None: - return config - return config.model_copy(update={"maps_path": default_maps_path(endpoint)}) - - def require_set_input_file(maps_path: Path) -> None: """Exit with a clear error if the selected maps file is missing.""" if maps_path.is_file(): @@ -666,7 +695,12 @@ def require_restore_input_file(restore_path: Path) -> None: raise SystemExit(f"restore snapshot file does not exist: {restore_path}") -def run_fields(config: Config, command: ResolvedCommand, endpoint: str) -> dict[str, object]: +def run_fields( + config: Config, + command: ResolvedCommand, + endpoint: str, + run_paths: backups.RunPaths, +) -> dict[str, object]: """Return run-level fields for structured logging.""" fields: dict[str, object] = { "endpoint": endpoint, @@ -677,14 +711,15 @@ def run_fields(config: Config, command: ResolvedCommand, endpoint: str) -> dict[ "max_attempts": config.max_attempts, "http_timeout_seconds": config.http_timeout_seconds, "sample_interval": config.sample_interval, - "artifacts_dir": str(backups.endpoint_artifacts_directory(endpoint)), - "python_version": sys.version.split()[0], - "pid": os.getpid(), + "artifacts_dir": str(run_paths.endpoint_directory), + "run_directory": str(run_paths.run_directory), } if command.name != "get": fields["apply"] = config.apply if config.no_backup: fields["no_backup"] = True + if config.no_files: + fields["no_files"] = True if command.set_mode is not None: fields["set_mode"] = command.set_mode if command.sync_saml_organizations: @@ -704,8 +739,9 @@ def run_with_client( config: Config, command: ResolvedCommand, endpoint: str, + run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, -) -> None: +) -> run_context.CommandData: """Create a client, run the selected command, and always close HTTP resources.""" http = src.HTTPClient( timeout=config.http_timeout_seconds, @@ -720,7 +756,7 @@ def run_with_client( fetch_sg_traces=config.fetch_sg_traces, ) try: - run_command(config, command, client, worker_pool) + return run_command(config, command, client, run_paths, worker_pool) finally: client.http.close() @@ -729,26 +765,31 @@ def run_command( config: Config, command: ResolvedCommand, client: src.SourcegraphClient, + run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, -) -> None: +) -> run_context.CommandData: """Dispatch the selected command.""" sourcegraph_site_config = site_config.validate_site_config(client) command_data = run_context.CommandData() if command.name == "get": - command_data = run_get(config, client, sourcegraph_site_config, worker_pool) + command_data = run_get(config, client, sourcegraph_site_config, run_paths, worker_pool) elif command.name == "set": - command_data = run_set(config, command, client, sourcegraph_site_config, worker_pool) + command_data = run_set( + config, command, client, sourcegraph_site_config, run_paths, worker_pool + ) elif command.name == "restore": - run_restore(config, client, sourcegraph_site_config, worker_pool) + run_restore(config, client, sourcegraph_site_config, run_paths, worker_pool) + return command_data else: run_sync_saml_organizations( config, client, sourcegraph_site_config, command_data, + run_paths, worker_pool, ) - return + return command_data if command.sync_saml_organizations: run_sync_saml_organizations( @@ -756,8 +797,10 @@ def run_command( client, sourcegraph_site_config, command_data, + run_paths, worker_pool, ) + return command_data def run_set( @@ -765,17 +808,15 @@ def run_set( command: ResolvedCommand, client: src.SourcegraphClient, sourcegraph_site_config: site_config.SiteConfig, + run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, ) -> run_context.CommandData: """Run the selected repo-permission sync command.""" assert command.set_options is not None - maps_path = config.maps_path - if maps_path is None: - raise SystemExit("set requires a maps file path") - require_set_input_file(maps_path) + require_set_input_file(run_paths.maps_path) return permissions_command.cmd_set( client, - maps_path, + run_paths, command.set_options, dry_run=not config.apply, parallelism=config.parallelism, @@ -794,6 +835,7 @@ def run_restore( config: Config, client: src.SourcegraphClient, sourcegraph_site_config: site_config.SiteConfig, + run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, ) -> None: """Run the selected repo-permission restore command.""" @@ -802,6 +844,7 @@ def run_restore( permissions_command.cmd_restore( client, config.restore_path, + run_paths, dry_run=not config.apply, parallelism=config.parallelism, explicit_permissions_batch_size=config.explicit_permissions_batch_size, @@ -816,11 +859,13 @@ def run_sync_saml_organizations( client: src.SourcegraphClient, sourcegraph_site_config: site_config.SiteConfig, command_data: run_context.CommandData, + run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, ) -> None: """Run the selected SAML organization sync command.""" organizations_command.cmd_sync_saml_organizations( client, + run_paths, dry_run=not config.apply, parallelism=config.parallelism, saml_groups_attribute_name_by_config_id=( @@ -836,22 +881,23 @@ def run_get( config: Config, client: src.SourcegraphClient, sourcegraph_site_config: site_config.SiteConfig, + run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, ) -> run_context.CommandData: """Run the default read-only discovery command.""" - artifacts_directory = backups.endpoint_artifacts_directory(client.endpoint) - maps_path = default_maps_path(client.endpoint) - maps_created = permissions_maps.create_maps_yaml_if_missing(maps_path) - if maps_created: - log.info("maps.yaml missing, created %s with an empty maps list.", maps_path) + maps_created = False + if run_paths.write_files: + maps_created = permissions_maps.create_maps_yaml_if_missing(run_paths.maps_path) + if maps_created: + log.info("maps.yaml missing, created %s with an empty maps list.", run_paths.maps_path) + else: + log.info("Left existing %s unchanged.", run_paths.maps_path) else: - log.info("Left existing %s unchanged.", maps_path) + log.info("Skipping maps.yaml creation because --no-files is set.") - return permissions_command.cmd_get( + command_data = permissions_command.cmd_get( client, - artifacts_directory / "code-hosts.yaml", - artifacts_directory / "auth-providers.yaml", - maps_path, + run_paths, user_identifiers=config.users, users_without_explicit_perms=config.users_without_explicit_perms, user_created_after=config.created_after, @@ -869,6 +915,7 @@ def run_get( retain_saml_group_users=False, worker_pool=worker_pool, ) + return dataclasses.replace(command_data, maps_created=maps_created) def reraise_system_exit_with_logged_error(exception: SystemExit) -> NoReturn: @@ -879,83 +926,188 @@ def reraise_system_exit_with_logged_error(exception: SystemExit) -> NoReturn: raise exception -def Get(config: Config) -> bool: - """Run repository permission discovery and return whether it succeeded.""" - return _run("get", config) +@dataclass(frozen=True) +class CommandResult: + """Outcome of one module-mode command run.""" + + succeeded: bool + paths: backups.RunPaths | None = None + + def __bool__(self) -> bool: + return self.succeeded + + +@dataclass(frozen=True) +class GetResult: + """Outcome of one discovery run, carrying the discovered data in memory. + + `auth_providers` and `code_hosts` hold the same dicts written to + `auth-providers.yaml` and `code-hosts.yaml`, so module callers can + assemble mapping rules without re-parsing files. + """ + + succeeded: bool + paths: backups.RunPaths | None = None + auth_providers: tuple[dict[str, Any], ...] = () + code_hosts: tuple[dict[str, Any], ...] = () + maps_created: bool = False + + def __bool__(self) -> bool: + return self.succeeded + + +def Get(config: Config, *, event_sink: src.EventSink | None = None) -> GetResult: + """Run repository permission discovery; returns data and paths in memory.""" + succeeded, command_data, run_paths = _run("get", config, event_sink) + if not succeeded or command_data is None: + return GetResult(succeeded=succeeded, paths=run_paths) + return GetResult( + succeeded=True, + paths=run_paths, + auth_providers=tuple(command_data.auth_provider_views or ()), + code_hosts=tuple(command_data.code_host_views or ()), + maps_created=command_data.maps_created, + ) -def Set(config: Config) -> bool: - """Run repository permission reconciliation and return whether it succeeded.""" - return _run("set", config) +def Set(config: Config, *, event_sink: src.EventSink | None = None) -> CommandResult: + """Run repository permission reconciliation.""" + succeeded, _, run_paths = _run("set", config, event_sink) + return CommandResult(succeeded=succeeded, paths=run_paths) -def Restore(config: Config) -> bool: - """Run repository permission restore and return whether it succeeded.""" - return _run("restore", config) +def Restore(config: Config, *, event_sink: src.EventSink | None = None) -> CommandResult: + """Run repository permission restore.""" + succeeded, _, run_paths = _run("restore", config, event_sink) + return CommandResult(succeeded=succeeded, paths=run_paths) -def SyncSamlOrgs(config: Config) -> bool: - """Run SAML organization sync and return whether it succeeded.""" - return _run("sync_saml_orgs", config) +def SyncSamlOrgs(config: Config, *, event_sink: src.EventSink | None = None) -> CommandResult: + """Run SAML organization sync.""" + succeeded, _, run_paths = _run("sync_saml_orgs", config, event_sink) + return CommandResult(succeeded=succeeded, paths=run_paths) -def _run(command_name: CommandName, config: Config) -> bool: - """Run a command and return whether it completed successfully.""" +def _run( + command_name: CommandName, + config: Config, + event_sink: src.EventSink | None, +) -> tuple[bool, run_context.CommandData | None, backups.RunPaths | None]: + """Run a module-mode command, reporting success instead of raising.""" try: - _run_or_raise(command_name, config) + command_data, run_paths = _run_or_raise(command_name, config, event_sink=event_sink) except SystemExit as exception: - return exception.code in (None, 0) + return exception.code in (None, 0), None, None except Exception: log.exception("src-auth-perms-sync run failed.") - return False - return True + return False, None, None + return True, command_data, run_paths -def _run_or_raise(command_name: CommandName, config: Config) -> None: - """Run src-auth-perms-sync, preserving CLI-style exceptions.""" +def _package_version() -> str: + try: + return importlib.metadata.version("src-auth-perms-sync") + except importlib.metadata.PackageNotFoundError: + return "unknown" + + +def _module_event_sink( + stack: contextlib.ExitStack, + run_paths: backups.RunPaths, + event_sink: src.EventSink | None, +) -> src.EventSink | None: + """Compose the module-mode sink: JSONL run log plus optional caller sink.""" + sinks: list[src.EventSink] = [] + if run_paths.write_files: + sinks.append(stack.enter_context(src.JSONLEventSink(run_paths.log_path))) + if event_sink is not None: + sinks.append(event_sink) + if not sinks: + return None + if len(sinks) == 1: + return sinks[0] + return src.CompositeEventSink(tuple(sinks)) + + +def _run_or_raise( + command_name: CommandName, + config: Config, + *, + cli_mode: bool = False, + event_sink: src.EventSink | None = None, +) -> tuple[run_context.CommandData, backups.RunPaths]: + """Run src-auth-perms-sync, preserving CLI-style exceptions. + + CLI mode installs terminal and event-bridge handlers on this package's + loggers; module mode never touches stdlib logging handlers, so the host + application's logging configuration stays in charge. + """ validate_config(command_name, config) command = resolve_command(command_name, config) try: endpoint = src.normalize_sourcegraph_endpoint(config.src_endpoint) except ValueError as error: config_error(str(error)) - config = config_with_default_paths(command_name, config, endpoint) - run_timestamp = backups.backup_timestamp() - run_directory = backups.artifact_run_directory( - run_timestamp, - endpoint, - command.artifact_name, + run_paths = backups.resolve_run_paths( + endpoint=endpoint, + command_artifact_name=command.artifact_name, + artifacts_dir=config.artifacts_dir, + maps_path=config.maps_path, + write_files=not config.no_files, ) - - logging_settings = src.logging_settings_from_config( + fields = run_fields(config, command, endpoint, run_paths) + resource = { + "service.name": "src-auth-perms-sync", + "service.version": _package_version(), + } + open_telemetry_settings = src.open_telemetry_settings_from_config( config, - log_file=backups.run_log_path(run_directory), - logs_dir=None, - resource_sample_interval_seconds=config.sample_interval, - open_telemetry=src.open_telemetry_settings_from_config( - config, - force_traces=config.fetch_sg_traces, - service_name="src-auth-perms-sync", - ), + force_traces=config.fetch_sg_traces, + service_name="src-auth-perms-sync", ) - with ( - backups.run_artifacts_context(run_directory, run_timestamp), - src.logging( - config, - command=command.name, - git_cwd=__file__, - logging_config=logging_settings, - run_fields=run_fields(config, command, endpoint), - ), - run_context.thread_pool(config.parallelism) as worker_pool, - ): + with contextlib.ExitStack() as stack: + if cli_mode: + logging_settings = src.logging_settings_from_config( + config, + logger_names=("src_auth_perms_sync", "src_py_lib"), + log_file=run_paths.log_path if run_paths.write_files else None, + logs_dir=None, + resource_sample_interval_seconds=config.sample_interval, + open_telemetry=open_telemetry_settings, + ) + stack.enter_context( + src.logging( + config, + command=command.name, + git_cwd=__file__, + logging_config=logging_settings, + run_fields=fields, + resource=resource, + ) + ) + else: + stack.enter_context( + src.observability_context( + command.name, + config, + sink=_module_event_sink(stack, run_paths, event_sink), + git_cwd=__file__, + run_fields=fields, + resource=resource, + open_telemetry=open_telemetry_settings, + resource_sample_interval_seconds=config.sample_interval, + log_file=run_paths.log_path if run_paths.write_files else None, + ) + ) + worker_pool = stack.enter_context(run_context.thread_pool(config.parallelism)) try: - run_with_client(config, command, endpoint, worker_pool) + command_data = run_with_client(config, command, endpoint, run_paths, worker_pool) except SystemExit as exception: reraise_system_exit_with_logged_error(exception) + return command_data, run_paths def main() -> None: cli_input = load_cli() - _run_or_raise(cli_input.command_name, cli_input.config) + _run_or_raise(cli_input.command_name, cli_input.config, cli_mode=True) diff --git a/src/src_auth_perms_sync/orgs/sync.py b/src/src_auth_perms_sync/orgs/sync.py index e52a466..ce435b7 100644 --- a/src/src_auth_perms_sync/orgs/sync.py +++ b/src/src_auth_perms_sync/orgs/sync.py @@ -134,15 +134,13 @@ def _log_organization_sync_plan(sync_state: _OrganizationSyncState) -> None: def _write_organization_snapshot_pair( - timestamp: str, - endpoint: str, - command_name: str, + run_paths: backups.RunPaths, before_snapshot: organization_types.OrganizationSnapshot, after_snapshot: organization_types.OrganizationSnapshot, ) -> tuple[Path, Path, Path]: - before_path = _organization_snapshot_path(timestamp, endpoint, command_name, "before") - after_path = _organization_snapshot_path(timestamp, endpoint, command_name, "after") - diff_path = _organization_snapshot_path(timestamp, endpoint, command_name, "diff") + before_path = _organization_snapshot_path(run_paths, "before") + after_path = _organization_snapshot_path(run_paths, "after") + diff_path = _organization_snapshot_path(run_paths, "diff") _write_organization_snapshot(before_path, before_snapshot) _write_organization_snapshot(after_path, after_snapshot) _write_organization_snapshot_diff(diff_path, before_snapshot, after_snapshot) @@ -150,16 +148,17 @@ def _write_organization_snapshot_pair( def _finish_organization_dry_run( - endpoint: str, - timestamp: str, + run_paths: backups.RunPaths, sync_state: _OrganizationSyncState, do_backup: bool, ) -> None: - if do_backup: + if not do_backup: + log.info("Skipped dry-run org snapshots because --no-backup was set.") + elif not run_paths.write_files: + log.info("Skipping dry-run org snapshots because --no-files is set.") + else: before_path, after_path, diff_path = _write_organization_snapshot_pair( - timestamp, - endpoint, - "sync-saml-orgs-dry-run", + run_paths, sync_state.before_snapshot, sync_state.expected_snapshot, ) @@ -169,22 +168,17 @@ def _finish_organization_dry_run( after_path, diff_path, ) - else: - log.info("Skipped dry-run org snapshots because --no-backup was set.") log.info("Dry run complete. Pass --apply to mutate organization membership.") def _write_organization_apply_before_snapshot( - endpoint: str, - timestamp: str, + run_paths: backups.RunPaths, before_snapshot: organization_types.OrganizationSnapshot, -) -> Path: - before_path = _organization_snapshot_path( - timestamp, - endpoint, - "sync-saml-orgs-apply", - "before", - ) +) -> Path | None: + if not run_paths.write_files: + log.info("Skipping before org snapshot because --no-files is set.") + return None + before_path = _organization_snapshot_path(run_paths, "before") _write_organization_snapshot(before_path, before_snapshot) log.info("Wrote before org snapshot: %s.", before_path) return before_path @@ -261,9 +255,9 @@ def _apply_organization_sync( def _finish_organization_apply_with_backup( client: src.SourcegraphClient, - timestamp: str, + run_paths: backups.RunPaths, sync_state: _OrganizationSyncState, - before_path: Path, + before_path: Path | None, parallelism: int, worker_pool: ThreadPoolExecutor | None = None, ) -> None: @@ -280,25 +274,27 @@ def _finish_organization_apply_with_backup( current_user_after["username"], ) after_snapshot = _snapshot_from_states(client.endpoint, sync_state.targets, after_states) - after_path, diff_path = _write_organization_after_snapshot( - timestamp, - client.endpoint, - sync_state.before_snapshot, - after_snapshot, - ) - log.info("Wrote after org snapshot: %s diff=%s.", after_path, diff_path) + if run_paths.write_files: + after_path, diff_path = _write_organization_after_snapshot( + run_paths, + sync_state.before_snapshot, + after_snapshot, + ) + log.info("Wrote after org snapshot: %s diff=%s.", after_path, diff_path) + else: + log.info("Skipping after and diff org snapshots because --no-files is set.") _validate_organization_sync(after_snapshot, sync_state.expected_snapshot) - log.info("To inspect the pre-sync org membership state, read:\n %s", before_path) + if before_path is not None: + log.info("To inspect the pre-sync org membership state, read:\n %s", before_path) def _write_organization_after_snapshot( - timestamp: str, - endpoint: str, + run_paths: backups.RunPaths, before_snapshot: organization_types.OrganizationSnapshot, after_snapshot: organization_types.OrganizationSnapshot, ) -> tuple[Path, Path]: - after_path = _organization_snapshot_path(timestamp, endpoint, "sync-saml-orgs-apply", "after") - diff_path = _organization_snapshot_path(timestamp, endpoint, "sync-saml-orgs-apply", "diff") + after_path = _organization_snapshot_path(run_paths, "after") + diff_path = _organization_snapshot_path(run_paths, "diff") _write_organization_snapshot(after_path, after_snapshot) _write_organization_snapshot_diff(diff_path, before_snapshot, after_snapshot) return after_path, diff_path @@ -320,6 +316,7 @@ def _raise_for_failed_organization_sync(result: _OrganizationApplyResult) -> Non def cmd_sync_saml_organizations( client: src.SourcegraphClient, + run_paths: backups.RunPaths, *, dry_run: bool, parallelism: int, @@ -354,16 +351,14 @@ def cmd_sync_saml_organizations( _log_organization_sync_plan(sync_state) - timestamp = backups.backup_timestamp() if dry_run: - _finish_organization_dry_run(client.endpoint, timestamp, sync_state, do_backup) + _finish_organization_dry_run(run_paths, sync_state, do_backup) return before_path: Path | None = None if do_backup: before_path = _write_organization_apply_before_snapshot( - client.endpoint, - timestamp, + run_paths, sync_state.before_snapshot, ) @@ -377,10 +372,9 @@ def cmd_sync_saml_organizations( _record_organization_apply_event(command_event, apply_result) if do_backup: - assert before_path is not None _finish_organization_apply_with_backup( client, - timestamp, + run_paths, sync_state, before_path, parallelism, @@ -1174,13 +1168,8 @@ def _write_organization_snapshot_diff( disk_event["bytes"] = len(contents) -def _organization_snapshot_path( - timestamp: str, - endpoint: str, - command: str, - state: str, -) -> Path: - return backups.backup_path("saml-organizations", timestamp, endpoint, command, state) +def _organization_snapshot_path(run_paths: backups.RunPaths, state: str) -> Path: + return run_paths.artifact_path(state, family="saml-organizations") def _chunks(values: list[str], size: int) -> list[list[str]]: diff --git a/src/src_auth_perms_sync/permissions/command.py b/src/src_auth_perms_sync/permissions/command.py index 6711437..5605277 100644 --- a/src/src_auth_perms_sync/permissions/command.py +++ b/src/src_auth_perms_sync/permissions/command.py @@ -31,7 +31,6 @@ load_repository_candidates_by_names, load_repository_candidates_created_on_or_after, parse_cli_date, - snapshot_path, sourcegraph_datetime_filter, user_ids_created_on_or_after, write_maps_backup, @@ -214,9 +213,7 @@ def _filter_get_snapshot_to_repositories_without_explicit_perms( def cmd_get( client: src.SourcegraphClient, - code_hosts_path: Path, - auth_providers_path: Path, - maps_path: Path, + run_paths: backups.RunPaths, *, user_identifiers: tuple[str, ...], users_without_explicit_perms: bool, @@ -235,9 +232,9 @@ def cmd_get( ) -> run_context.CommandData: """Refresh the generated discovery YAML files. - `code_hosts_path` receives Sourcegraph code host connection configs, - `auth_providers_path` receives auth provider configs, and `maps_path` - is used for the generated get-snapshot name. + `run_paths.code_hosts_path` receives Sourcegraph code host connection + configs, `run_paths.auth_providers_path` receives auth provider configs, + and `run_paths.maps_path` names the maps file recorded in the snapshot. `saml_groups_attribute_name_by_config_id` is the per-`configID` override map produced by `validate_site_config`; non-default @@ -322,12 +319,15 @@ def cmd_get( for provider in raw_providers ] - permissions_maps.dump_code_hosts_yaml(code_hosts_path, services) - permissions_maps.dump_auth_providers_yaml(auth_providers_path, providers) - cmd_event["code_hosts_path"] = str(code_hosts_path) - cmd_event["auth_providers_path"] = str(auth_providers_path) - cmd_event["maps_path"] = str(maps_path) - log.info("Wrote %s and %s", code_hosts_path, auth_providers_path) + if run_paths.write_files: + permissions_maps.dump_code_hosts_yaml(run_paths.code_hosts_path, services) + permissions_maps.dump_auth_providers_yaml(run_paths.auth_providers_path, providers) + log.info("Wrote %s and %s", run_paths.code_hosts_path, run_paths.auth_providers_path) + else: + log.info("Skipping code-hosts.yaml and auth-providers.yaml because --no-files is set.") + cmd_event["code_hosts_path"] = str(run_paths.code_hosts_path) + cmd_event["auth_providers_path"] = str(run_paths.auth_providers_path) + cmd_event["maps_path"] = str(run_paths.maps_path) if do_backup: selected_repository_ids = _load_get_repository_filter_ids( @@ -335,13 +335,12 @@ def cmd_get( repository_names=repository_names, repository_created_after=repository_created_after, ) - timestamp = backups.backup_timestamp() before_snapshot = permission_snapshot.build_snapshot( client, users, parallelism, bind_id_mode, - maps_path, + run_paths.maps_path, expected_user_count=len(users), explicit_permissions_batch_size=explicit_permissions_batch_size, worker_pool=worker_pool, @@ -352,18 +351,22 @@ def cmd_get( client, before_snapshot, ) - before_path = snapshot_path(maps_path, timestamp, client.endpoint, "get", "before") - permission_snapshot.write_snapshot(before_path, before_snapshot) - cmd_event["before_snapshot_path"] = str(before_path) - maps_backup_path = write_maps_backup(maps_path, timestamp, client.endpoint, "get") - if maps_backup_path is not None: - cmd_event["maps_backup_path"] = str(maps_backup_path) - log.info( - "Wrote before-snapshot: %s (%d repo(s) with explicit grants, %d total grant(s)).", - before_path, - before_snapshot["stats"]["repos_with_explicit_grants"], - before_snapshot["stats"]["total_grants"], - ) + if run_paths.write_files: + before_path = run_paths.artifact_path("before") + permission_snapshot.write_snapshot(before_path, before_snapshot) + cmd_event["before_snapshot_path"] = str(before_path) + maps_backup_path = write_maps_backup(run_paths.maps_path, run_paths) + if maps_backup_path is not None: + cmd_event["maps_backup_path"] = str(maps_backup_path) + log.info( + "Wrote before-snapshot: %s " + "(%d repo(s) with explicit grants, %d total grant(s)).", + before_path, + before_snapshot["stats"]["repos_with_explicit_grants"], + before_snapshot["stats"]["total_grants"], + ) + else: + log.info("Skipping get before-snapshot and maps backup because --no-files is set.") else: log.info("Skipping get before-snapshot and maps backup because --no-backup is set.") saml_group_users = ( @@ -386,6 +389,8 @@ def cmd_get( return run_context.CommandData( auth_providers=raw_providers, saml_group_users=saml_group_users, + auth_provider_views=providers, + code_host_views=services, ) @@ -578,8 +583,9 @@ def _log_user_planning_progress( def cmd_set( client: src.SourcegraphClient, - input_path: Path, - options: permission_types.SetCommandOptions, + run_paths: backups.RunPaths, + set_options: permission_types.SetCommandOptions, + *, dry_run: bool, parallelism: int, explicit_permissions_batch_size: int, @@ -590,10 +596,11 @@ def cmd_set( worker_pool: ThreadPoolExecutor | None = None, ) -> run_context.CommandData: """Dispatch the selected set mode.""" + options = set_options if options.mode == "full": return permissions_full_set.cmd_set_full( client, - input_path, + run_paths, options.user_created_after, repository_names=(), repositories_without_explicit_perms=False, @@ -611,7 +618,7 @@ def cmd_set( assert options.repository_names return permissions_full_set.cmd_set_full( client, - input_path, + run_paths, None, repository_names=options.repository_names, repositories_without_explicit_perms=False, @@ -628,7 +635,7 @@ def cmd_set( if options.mode == "repos_without_explicit_perms": return permissions_full_set.cmd_set_full( client, - input_path, + run_paths, None, repository_names=(), repositories_without_explicit_perms=True, @@ -646,7 +653,7 @@ def cmd_set( assert options.repository_created_after is not None return permissions_full_set.cmd_set_full( client, - input_path, + run_paths, None, repository_names=(), repositories_without_explicit_perms=False, @@ -664,7 +671,7 @@ def cmd_set( assert options.user_identifiers return cmd_set_additive_users( client, - input_path, + run_paths, options.user_identifiers, options.user_created_after, dry_run, @@ -677,7 +684,7 @@ def cmd_set( if options.mode == "users_without_explicit_perms": return cmd_set_additive_users_without_explicit_perms( client, - input_path, + run_paths, options.user_created_after, dry_run, parallelism, @@ -691,7 +698,7 @@ def cmd_set( assert options.user_created_after is not None return cmd_set_additive_created_after( client, - input_path, + run_paths, options.user_created_after, dry_run, parallelism, @@ -705,7 +712,7 @@ def cmd_set( def cmd_set_additive_users( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, user_identifiers: tuple[str, ...], user_created_after: str | None, dry_run: bool, @@ -718,16 +725,16 @@ def cmd_set_additive_users( """Add missing mapped permissions for resolved users.""" with src.span( "cmd_set_additive_users", - input_path=str(input_path), + input_path=str(run_paths.maps_path), user_identifiers=user_identifiers, user_created_after=user_created_after, dry_run=dry_run, parallelism=parallelism, do_backup=do_backup, ): - mapping_rules = load_mapping_rules(input_path) + mapping_rules = load_mapping_rules(run_paths.maps_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", input_path) + log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) return run_context.CommandData() include_user_emails = permissions_mapping.mapping_rules_need_user_emails(mapping_rules) include_user_account_data = permissions_mapping.mapping_rules_need_saml_account_data( @@ -769,14 +776,13 @@ def cmd_set_additive_users( if not matching_rules: _run_additive_apply( client, - input_path, + run_paths, users, [], dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users", worker_pool=worker_pool, ) return run_context.CommandData(auth_providers=context.providers) @@ -814,14 +820,13 @@ def cmd_set_additive_users( ) _run_additive_apply( client, - input_path, + run_paths, users, additions, dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users", existing_repos_by_user_id=existing_repos_by_user_id, worker_pool=worker_pool, ) @@ -830,7 +835,7 @@ def cmd_set_additive_users( def cmd_set_additive_users_without_explicit_perms( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, user_created_after: str | None, dry_run: bool, parallelism: int, @@ -848,15 +853,15 @@ def cmd_set_additive_users_without_explicit_perms( ) with src.span( "cmd_set_additive_users_without_explicit_perms", - input_path=str(input_path), + input_path=str(run_paths.maps_path), user_created_after=user_created_after, dry_run=dry_run, parallelism=parallelism, do_backup=do_backup, ): - mapping_rules = load_mapping_rules(input_path) + mapping_rules = load_mapping_rules(run_paths.maps_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", input_path) + log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) return run_context.CommandData() context = load_mapping_context_discovery( client, @@ -919,14 +924,13 @@ def cmd_set_additive_users_without_explicit_perms( if not users: _run_additive_apply( client, - input_path, + run_paths, users, [], dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users-without-explicit-perms", worker_pool=worker_pool, ) return run_context.CommandData(auth_providers=context.providers) @@ -940,14 +944,13 @@ def cmd_set_additive_users_without_explicit_perms( if not matching_rules: _run_additive_apply( client, - input_path, + run_paths, users, [], dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users-without-explicit-perms", worker_pool=worker_pool, ) return run_context.CommandData(auth_providers=context.providers) @@ -994,14 +997,13 @@ def cmd_set_additive_users_without_explicit_perms( ) _run_additive_apply( client, - input_path, + run_paths, users, additions, dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users-without-explicit-perms", worker_pool=worker_pool, ) return run_context.CommandData(auth_providers=context.providers) @@ -1009,7 +1011,7 @@ def cmd_set_additive_users_without_explicit_perms( def cmd_set_additive_created_after( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, user_created_after: str, dry_run: bool, parallelism: int, @@ -1024,15 +1026,15 @@ def cmd_set_additive_created_after( ) with src.span( "cmd_set_additive_created_after", - input_path=str(input_path), + input_path=str(run_paths.maps_path), user_created_after=user_created_after, dry_run=dry_run, parallelism=parallelism, do_backup=do_backup, ): - mapping_rules = load_mapping_rules(input_path) + mapping_rules = load_mapping_rules(run_paths.maps_path) if not mapping_rules: - log.warning("No maps defined in %s — nothing to do.", input_path) + log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) return run_context.CommandData() context = load_mapping_context_discovery( client, @@ -1065,14 +1067,13 @@ def cmd_set_additive_created_after( if not users: _run_additive_apply( client, - input_path, + run_paths, users, [], dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users-created-after", worker_pool=worker_pool, ) return run_context.CommandData(auth_providers=context.providers) @@ -1086,14 +1087,13 @@ def cmd_set_additive_created_after( if not matching_rules: _run_additive_apply( client, - input_path, + run_paths, users, [], dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users-created-after", worker_pool=worker_pool, ) return run_context.CommandData(auth_providers=context.providers) @@ -1131,14 +1131,13 @@ def cmd_set_additive_created_after( ) _run_additive_apply( client, - input_path, + run_paths, users, additions, dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, do_backup=do_backup, - command_name="set-add-users-created-after", existing_repos_by_user_id=existing_repos_by_user_id, worker_pool=worker_pool, ) @@ -1275,21 +1274,15 @@ def _plan_additions_for_user( return additions -def _additive_run_label(command_name: str, dry_run: bool) -> str: - return f"{command_name}-dry-run" if dry_run else f"{command_name}-apply" - - def _write_additive_initial_artifacts( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, snapshot_users: list[permission_snapshot.SnapshotUser], additions: list[permissions_apply.PermissionAddition], - timestamp: str, *, dry_run: bool, parallelism: int, bind_id_mode: str, - command_name: str, existing_repos_by_user_id: dict[str, list[permission_types.Repository]] | None = None, worker_pool: ThreadPoolExecutor | None = None, ) -> permission_snapshot.UserScopedSnapshot: @@ -1300,7 +1293,7 @@ def _write_additive_initial_artifacts( snapshot_users, parallelism, bind_id_mode, - input_path, + run_paths.maps_path, worker_pool=worker_pool, ) else: @@ -1309,33 +1302,30 @@ def _write_additive_initial_artifacts( snapshot_users, existing_repos_by_user_id, bind_id_mode, - input_path, + run_paths.maps_path, ) - run_label = _additive_run_label(command_name, dry_run) - before_path = snapshot_path(input_path, timestamp, client.endpoint, run_label, "before") - after_path = snapshot_path(input_path, timestamp, client.endpoint, run_label, "after") - permission_snapshot.write_user_scoped_snapshot(before_path, before_snapshot) after_planned_snapshot = _user_scoped_snapshot_with_additions( before_snapshot, additions, ) - diff_path: Path | None = None - if dry_run or not additions: - permission_snapshot.write_user_scoped_snapshot(after_path, after_planned_snapshot) - diff_path = write_user_scoped_snapshot_diff_file( - input_path, - timestamp, - client.endpoint, - run_label, - before_snapshot, - after_planned_snapshot, - ) - maps_backup_path = write_maps_backup(input_path, timestamp, client.endpoint, run_label) - log.info("Wrote scoped before-snapshot: %s", before_path) - if dry_run or not additions: - log.info("Wrote scoped after-snapshot: %s diff=%s", after_path, diff_path) - if maps_backup_path is not None: - log.info("Wrote maps backup for additive run: %s", maps_backup_path) + if run_paths.write_files: + before_path = run_paths.artifact_path("before") + permission_snapshot.write_user_scoped_snapshot(before_path, before_snapshot) + log.info("Wrote scoped before-snapshot: %s", before_path) + if dry_run or not additions: + after_path = run_paths.artifact_path("after") + permission_snapshot.write_user_scoped_snapshot(after_path, after_planned_snapshot) + diff_path = write_user_scoped_snapshot_diff_file( + run_paths, + before_snapshot, + after_planned_snapshot, + ) + log.info("Wrote scoped after-snapshot: %s diff=%s", after_path, diff_path) + maps_backup_path = write_maps_backup(run_paths.maps_path, run_paths) + if maps_backup_path is not None: + log.info("Wrote maps backup for additive run: %s", maps_backup_path) + else: + log.info("Skipping additive snapshot and maps backup files because --no-files is set.") log.info( "Diff (before → planned after):\n%s", permission_snapshot.render_user_scoped_diff(before_snapshot, after_planned_snapshot), @@ -1397,15 +1387,13 @@ def _apply_additive_permissions( def _finish_additive_apply_with_backup( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, snapshot_users: list[permission_snapshot.SnapshotUser], before_snapshot: permission_snapshot.UserScopedSnapshot, additions: list[permissions_apply.PermissionAddition], - timestamp: str, *, parallelism: int, bind_id_mode: str, - command_name: str, worker_pool: ThreadPoolExecutor | None = None, ) -> None: """Capture and validate additive post-apply state.""" @@ -1414,26 +1402,20 @@ def _finish_additive_apply_with_backup( snapshot_users, parallelism, bind_id_mode, - input_path, + run_paths.maps_path, worker_pool=worker_pool, ) - after_path = snapshot_path( - input_path, - timestamp, - client.endpoint, - f"{command_name}-apply", - "after", - ) - permission_snapshot.write_user_scoped_snapshot(after_path, after_snapshot) - diff_path = write_user_scoped_snapshot_diff_file( - input_path, - timestamp, - client.endpoint, - f"{command_name}-apply", - before_snapshot, - after_snapshot, - ) - log.info("Wrote scoped after-snapshot: %s diff=%s", after_path, diff_path) + if run_paths.write_files: + after_path = run_paths.artifact_path("after") + permission_snapshot.write_user_scoped_snapshot(after_path, after_snapshot) + diff_path = write_user_scoped_snapshot_diff_file( + run_paths, + before_snapshot, + after_snapshot, + ) + log.info("Wrote scoped after-snapshot: %s diff=%s", after_path, diff_path) + else: + log.info("Skipping scoped after-snapshot files because --no-files is set.") log.info( "Diff (before → after):\n%s", permission_snapshot.render_user_scoped_diff(before_snapshot, after_snapshot), @@ -1454,7 +1436,7 @@ def _raise_for_failed_additive(mutations: shared_types.MutationCounts) -> None: def _run_additive_apply( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, users: list[shared_types.User], additions: list[permissions_apply.PermissionAddition], *, @@ -1462,7 +1444,6 @@ def _run_additive_apply( parallelism: int, bind_id_mode: str, do_backup: bool, - command_name: str, existing_repos_by_user_id: dict[str, list[permission_types.Repository]] | None = None, worker_pool: ThreadPoolExecutor | None = None, ) -> None: @@ -1473,19 +1454,15 @@ def _run_additive_apply( snapshot_users = _snapshot_users_from_users(users) before_snapshot: permission_snapshot.UserScopedSnapshot | None = None - timestamp: str | None = None if do_backup: - timestamp = backups.backup_timestamp() before_snapshot = _write_additive_initial_artifacts( client, - input_path, + run_paths, snapshot_users, additions, - timestamp, dry_run=dry_run, parallelism=parallelism, bind_id_mode=bind_id_mode, - command_name=command_name, existing_repos_by_user_id=existing_repos_by_user_id, worker_pool=worker_pool, ) @@ -1503,17 +1480,14 @@ def _run_additive_apply( if do_backup: assert before_snapshot is not None - assert timestamp is not None _finish_additive_apply_with_backup( client, - input_path, + run_paths, snapshot_users, before_snapshot, additions, - timestamp, parallelism=parallelism, bind_id_mode=bind_id_mode, - command_name=command_name, worker_pool=worker_pool, ) @@ -1615,7 +1589,9 @@ def _validate_additive_after( def cmd_restore_user_scoped( client: src.SourcegraphClient, - snapshot_path: Path, + restore_path: Path, + run_paths: backups.RunPaths, + *, dry_run: bool, parallelism: int, bind_id_mode: str, @@ -1625,18 +1601,21 @@ def cmd_restore_user_scoped( """Restore explicit permissions for the users present in a scoped snapshot.""" permissions_restore.cmd_restore_user_scoped( client, - snapshot_path, - dry_run, - parallelism, - bind_id_mode, - do_backup, + restore_path, + run_paths, + dry_run=dry_run, + parallelism=parallelism, + bind_id_mode=bind_id_mode, + do_backup=do_backup, worker_pool=worker_pool, ) def cmd_restore( client: src.SourcegraphClient, - snapshot_path: Path, + restore_path: Path, + run_paths: backups.RunPaths, + *, dry_run: bool, parallelism: int, explicit_permissions_batch_size: int, @@ -1647,11 +1626,12 @@ def cmd_restore( """Restore explicit-permissions state on the instance to match a snapshot.""" permissions_restore.cmd_restore( client, - snapshot_path, - dry_run, - parallelism, - explicit_permissions_batch_size, - bind_id_mode, - do_backup, - worker_pool, + restore_path, + run_paths, + dry_run=dry_run, + parallelism=parallelism, + explicit_permissions_batch_size=explicit_permissions_batch_size, + bind_id_mode=bind_id_mode, + do_backup=do_backup, + worker_pool=worker_pool, ) diff --git a/src/src_auth_perms_sync/permissions/full_set.py b/src/src_auth_perms_sync/permissions/full_set.py index dcc4661..2c69fad 100644 --- a/src/src_auth_perms_sync/permissions/full_set.py +++ b/src/src_auth_perms_sync/permissions/full_set.py @@ -25,8 +25,8 @@ load_repository_candidates_by_names, load_repository_candidates_created_on_or_after, mapping_context_with_repository_candidates, + projected_snapshot_shell, render_projected_snapshot_diff, - snapshot_path, user_ids_created_on_or_after, validate_post_apply, write_maps_backup, @@ -45,7 +45,6 @@ class _FullSetUserState: users: list[shared_types.User] before_snapshot: permission_snapshot.Snapshot | None = None - before_timestamp: str | None = None @dataclass(frozen=True) @@ -54,7 +53,6 @@ class _FullSetSnapshotState: users: list[permission_snapshot.SnapshotUser] before_snapshot: permission_snapshot.Snapshot | None = None - before_timestamp: str | None = None selected_repository_ids: set[str] | None = None @@ -93,10 +91,6 @@ class _FullSetApplyResult: full_short_circuit: bool -def _set_full_command_name(dry_run: bool) -> str: - return "set-dry-run" if dry_run else "set-apply" - - def _capture_full_set_snapshot_state( client: src.SourcegraphClient, input_path: Path, @@ -116,7 +110,6 @@ def _capture_full_set_snapshot_state( expected_user_count, client.endpoint, ) - before_timestamp = backups.backup_timestamp() before_snapshot = permission_snapshot.build_snapshot( client, shared_sourcegraph.list_users_streaming( @@ -143,7 +136,6 @@ def _capture_full_set_snapshot_state( return _FullSetUserState( users=users, before_snapshot=before_snapshot, - before_timestamp=before_timestamp, ) @@ -260,7 +252,6 @@ def _filter_full_set_user_state_snapshot( snapshot_state.before_snapshot, selected_repository_ids, ), - before_timestamp=snapshot_state.before_timestamp, ) @@ -273,42 +264,17 @@ def _compact_full_set_snapshot_state( return _FullSetSnapshotState( users=permission_snapshot.compact_snapshot_users(users), before_snapshot=snapshot_state.before_snapshot, - before_timestamp=snapshot_state.before_timestamp, selected_repository_ids=selected_repository_ids, ) def _require_before_snapshot( snapshot_state: _FullSetUserState | _FullSetSnapshotState, -) -> tuple[permission_snapshot.Snapshot, str]: +) -> permission_snapshot.Snapshot: assert snapshot_state.before_snapshot is not None, ( "snapshot writes require a prefetched before snapshot" ) - assert snapshot_state.before_timestamp is not None, ( - "snapshot writes require a prefetched before timestamp" - ) - return snapshot_state.before_snapshot, snapshot_state.before_timestamp - - -def _write_full_set_snapshot_pair( - input_path: Path, - timestamp: str, - endpoint: str, - command_name: str, - before_snapshot: permission_snapshot.Snapshot, - after_snapshot: permission_snapshot.Snapshot, -) -> tuple[Path, Path, Path, Path | None]: - """Write before/after/diff snapshots and the companion maps backup.""" - before_path, after_path, diff_path = write_snapshot_pair( - input_path, - timestamp, - endpoint, - command_name, - before_snapshot, - after_snapshot, - ) - maps_backup_path = write_maps_backup(input_path, timestamp, endpoint, command_name) - return before_path, after_path, diff_path, maps_backup_path + return snapshot_state.before_snapshot def _recordmaps_backup_path(command_event: dict[str, Any], maps_backup_path: Path | None) -> None: @@ -316,34 +282,6 @@ def _recordmaps_backup_path(command_event: dict[str, Any], maps_backup_path: Pat command_event["maps_backup_path"] = str(maps_backup_path) -def _write_noop_full_set_snapshots( - input_path: Path, - timestamp: str, - endpoint: str, - command_name: str, - before_snapshot: permission_snapshot.Snapshot, - dry_run: bool, -) -> tuple[Path, Path, Path, Path | None]: - """Write identical before/after snapshots for a no-op full-set run.""" - before_path, after_path, diff_path, maps_backup_path = _write_full_set_snapshot_pair( - input_path, - timestamp, - endpoint, - command_name, - before_snapshot, - before_snapshot, - ) - run_mode = "dry-run" if dry_run else "apply" - log.info( - "Wrote %s snapshots: before=%s after=%s diff=%s.", - run_mode, - before_path, - after_path, - diff_path, - ) - return before_path, after_path, diff_path, maps_backup_path - - def plan_full_set_permissions( context: permission_types.MappingContext, users: list[shared_types.User], @@ -434,38 +372,37 @@ def _full_set_username_overwrites( def _finish_full_set_dry_run( - input_path: Path, - endpoint: str, + run_paths: backups.RunPaths, snapshot_state: _FullSetSnapshotState, plan: _FullSetPlan, command_event: dict[str, Any], ) -> None: """Write dry-run artifacts and log the planned mutations.""" - before_snapshot, timestamp = _require_before_snapshot(snapshot_state) - before_path = snapshot_path(input_path, timestamp, endpoint, "set-dry-run", "before") - after_path = snapshot_path(input_path, timestamp, endpoint, "set-dry-run", "after") - after_snapshot = write_projected_snapshot( - after_path, - before_snapshot, - plan.expected_users, - plan.repo_names, - ) - diff_path = write_projected_snapshot_diff_file( - input_path, - timestamp, - endpoint, - "set-dry-run", - before_snapshot, - after_snapshot, - plan.expected_users, - plan.repo_names, - ) - log.info( - "Wrote dry-run snapshots: before=%s after=%s diff=%s.", - before_path, - after_path, - diff_path, - ) + before_snapshot = _require_before_snapshot(snapshot_state) + if run_paths.write_files: + after_path = run_paths.artifact_path("after") + after_snapshot = write_projected_snapshot( + after_path, + before_snapshot, + plan.expected_users, + plan.repo_names, + ) + diff_path = write_projected_snapshot_diff_file( + run_paths, + before_snapshot, + after_snapshot, + plan.expected_users, + plan.repo_names, + ) + log.info( + "Wrote dry-run snapshots: before=%s after=%s diff=%s.", + run_paths.artifact_path("before"), + after_path, + diff_path, + ) + else: + after_snapshot = projected_snapshot_shell(before_snapshot, plan.expected_users) + log.info("Skipping dry-run snapshot files because --no-files is set.") log.info( "Diff (before → dry-run after):\n%s", render_projected_snapshot_diff( @@ -572,17 +509,17 @@ def _overwrites_with_preserved_pending( def _write_full_set_before_snapshot( - input_path: Path, - timestamp: str, - endpoint: str, - command_name: str, + run_paths: backups.RunPaths, before_snapshot: permission_snapshot.Snapshot, command_event: dict[str, Any], ) -> Path: """Persist the before-snapshot and maps backup before planning mutations.""" - before_path = snapshot_path(input_path, timestamp, endpoint, command_name, "before") + before_path = run_paths.artifact_path("before") + if not run_paths.write_files: + log.info("Skipping before-snapshot and maps backup files because --no-files is set.") + return before_path permission_snapshot.write_snapshot(before_path, before_snapshot) - maps_backup_path = write_maps_backup(input_path, timestamp, endpoint, command_name) + maps_backup_path = write_maps_backup(run_paths.maps_path, run_paths) _recordmaps_backup_path(command_event, maps_backup_path) log.info( "Wrote before-snapshot: %s (%d repo(s) with explicit grants, %d total grant(s)).", @@ -655,8 +592,7 @@ def _record_full_set_event_fields( def _finish_full_set_apply_with_backup( client: src.SourcegraphClient, - input_path: Path, - timestamp: str, + run_paths: backups.RunPaths, before_path: Path, before_snapshot: permission_snapshot.Snapshot, snapshot_state: _FullSetSnapshotState, @@ -677,30 +613,27 @@ def _finish_full_set_apply_with_backup( snapshot_state.users, parallelism, bind_id_mode, - input_path, + run_paths.maps_path, expected_user_count=len(snapshot_state.users), explicit_permissions_batch_size=explicit_permissions_batch_size, worker_pool=worker_pool, selected_repository_ids=snapshot_state.selected_repository_ids, ) - after_path = snapshot_path(input_path, timestamp, client.endpoint, "set-apply", "after") - permission_snapshot.write_snapshot(after_path, after_snapshot) - diff_path = write_snapshot_diff_file( - input_path, - timestamp, - client.endpoint, - "set-apply", - before_snapshot, - after_snapshot, - ) - log.info( - "Wrote after-snapshot: %s diff=%s (%d repo(s) with explicit grants, %d total grant(s)).", - after_path, - diff_path, - after_snapshot["stats"]["repos_with_explicit_grants"], - after_snapshot["stats"]["total_grants"], - ) + if run_paths.write_files: + after_path = run_paths.artifact_path("after") + permission_snapshot.write_snapshot(after_path, after_snapshot) + diff_path = write_snapshot_diff_file(run_paths, before_snapshot, after_snapshot) + log.info( + "Wrote after-snapshot: %s diff=%s " + "(%d repo(s) with explicit grants, %d total grant(s)).", + after_path, + diff_path, + after_snapshot["stats"]["repos_with_explicit_grants"], + after_snapshot["stats"]["total_grants"], + ) + else: + log.info("Skipping after-snapshot and diff files because --no-files is set.") log.info( "Diff (before → after):\n%s", permission_snapshot.render_snapshot_diff(before_snapshot, after_snapshot), @@ -712,12 +645,13 @@ def _finish_full_set_apply_with_backup( set(plan.expected_users), expected_pending_users=before_snapshot["pending_users"], ) - log.info( - "To roll back the explicit-permissions state captured in " - "the before-snapshot, run:\n" - " uv run src-auth-perms-sync restore --restore-path %s --apply", - before_path, - ) + if run_paths.write_files: + log.info( + "To roll back the explicit-permissions state captured in " + "the before-snapshot, run:\n" + " uv run src-auth-perms-sync restore --restore-path %s --apply", + before_path, + ) def _raise_for_failed_full_set_apply( @@ -740,30 +674,35 @@ def _raise_for_failed_full_set_apply( def _write_noop_full_set_artifacts( - input_path: Path, - endpoint: str, - command_name: str, + run_paths: backups.RunPaths, snapshot_state: _FullSetUserState | _FullSetSnapshotState, dry_run: bool, command_event: dict[str, Any], ) -> None: """Write no-op before/after snapshots for an empty full-set run.""" - before_snapshot, timestamp = _require_before_snapshot(snapshot_state) - *_, maps_backup_path = _write_noop_full_set_snapshots( - input_path, - timestamp, - endpoint, - command_name, + before_snapshot = _require_before_snapshot(snapshot_state) + if not run_paths.write_files: + log.info("Skipping no-op snapshot files because --no-files is set.") + return + before_path, after_path, diff_path = write_snapshot_pair( + run_paths, + before_snapshot, before_snapshot, - dry_run, ) + maps_backup_path = write_maps_backup(run_paths.maps_path, run_paths) _recordmaps_backup_path(command_event, maps_backup_path) + log.info( + "Wrote %s snapshots: before=%s after=%s diff=%s.", + "dry-run" if dry_run else "apply", + before_path, + after_path, + diff_path, + ) def _finish_empty_full_set_mapping_rules( client: src.SourcegraphClient, - input_path: Path, - command_name: str, + run_paths: backups.RunPaths, dry_run: bool, repository_names: tuple[str, ...], repositories_without_explicit_perms: bool, @@ -775,7 +714,7 @@ def _finish_empty_full_set_mapping_rules( command_event: dict[str, Any], worker_pool: ThreadPoolExecutor | None = None, ) -> None: - log.warning("No maps defined in %s — nothing to do.", input_path) + log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) if not (dry_run or do_backup): return @@ -791,7 +730,7 @@ def _finish_empty_full_set_mapping_rules( ) snapshot_state = _capture_full_set_snapshot_state( client, - input_path, + run_paths.maps_path, parallelism, explicit_permissions_batch_size, bind_id_mode, @@ -800,7 +739,7 @@ def _finish_empty_full_set_mapping_rules( selected_repository_ids=selected_repository_ids, ) if repositories_without_explicit_perms: - before_snapshot, _ = _require_before_snapshot(snapshot_state) + before_snapshot = _require_before_snapshot(snapshot_state) selected_repository_candidates = _load_repositories_without_explicit_permissions( client, before_snapshot, @@ -810,9 +749,7 @@ def _finish_empty_full_set_mapping_rules( _repository_ids(selected_repository_candidates), ) _write_noop_full_set_artifacts( - input_path, - client.endpoint, - command_name, + run_paths, snapshot_state, dry_run, command_event, @@ -821,7 +758,7 @@ def _finish_empty_full_set_mapping_rules( def _load_full_set_plan( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, mapping_rules: list[permission_types.MappingRule], user_created_after: str | None, repository_names: tuple[str, ...], @@ -833,7 +770,6 @@ def _load_full_set_plan( saml_groups_attribute_name_by_config_id: dict[str, str], capture_before: bool, write_before_snapshot: bool, - command_name: str, command_event: dict[str, Any], retain_saml_group_users: bool, worker_pool: ThreadPoolExecutor | None = None, @@ -855,7 +791,7 @@ def _load_full_set_plan( ) user_state = _load_full_set_snapshot_state( client, - input_path, + run_paths.maps_path, parallelism, explicit_permissions_batch_size, bind_id_mode, @@ -866,7 +802,7 @@ def _load_full_set_plan( selected_repository_ids=selected_repository_ids, ) if repositories_without_explicit_perms: - before_snapshot, _ = _require_before_snapshot(user_state) + before_snapshot = _require_before_snapshot(user_state) selected_repository_candidates = _load_repositories_without_explicit_permissions( client, before_snapshot, @@ -879,12 +815,9 @@ def _load_full_set_plan( before_path: Path | None = None if write_before_snapshot: - before_snapshot, before_timestamp = _require_before_snapshot(user_state) + before_snapshot = _require_before_snapshot(user_state) before_path = _write_full_set_before_snapshot( - input_path, - before_timestamp, - client.endpoint, - command_name, + run_paths, before_snapshot, command_event, ) @@ -940,9 +873,7 @@ def _load_full_set_plan( def _finish_empty_full_set_plan( - input_path: Path, - endpoint: str, - command_name: str, + run_paths: backups.RunPaths, snapshot_state: _FullSetSnapshotState, dry_run: bool, do_backup: bool, @@ -951,9 +882,7 @@ def _finish_empty_full_set_plan( log.warning("No repos resolved across any mapping — nothing to do.") if dry_run or do_backup: _write_noop_full_set_artifacts( - input_path, - endpoint, - command_name, + run_paths, snapshot_state, dry_run, command_event, @@ -962,7 +891,7 @@ def _finish_empty_full_set_plan( def _run_full_set_apply( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, snapshot_state: _FullSetSnapshotState, plan: _FullSetPlan, mapping_count: int, @@ -982,10 +911,8 @@ def _run_full_set_apply( ) before_snapshot: permission_snapshot.Snapshot | None = None if do_backup: - before_snapshot, before_timestamp = _require_before_snapshot(snapshot_state) + before_snapshot = _require_before_snapshot(snapshot_state) assert before_path is not None - else: - before_timestamp = backups.backup_timestamp() # The before-snapshot's pending grants are already scoped to any repo # selection; without one (--no-backup), fetch the live pending state so @@ -1011,8 +938,7 @@ def _run_full_set_apply( assert before_path is not None and before_snapshot is not None _finish_full_set_apply_with_backup( client, - input_path, - before_timestamp, + run_paths, before_path, before_snapshot, snapshot_state, @@ -1029,7 +955,7 @@ def _run_full_set_apply( def cmd_set_full( client: src.SourcegraphClient, - input_path: Path, + run_paths: backups.RunPaths, user_created_after: str | None, repository_names: tuple[str, ...], repositories_without_explicit_perms: bool, @@ -1046,7 +972,7 @@ def cmd_set_full( """Overwrite each mapped repo with the union of users from all rules.""" with src.span( "cmd_set", - input_path=str(input_path), + input_path=str(run_paths.maps_path), user_created_after=user_created_after, repository_names=repository_names or None, repositories_without_explicit_perms=(True if repositories_without_explicit_perms else None), @@ -1055,13 +981,11 @@ def cmd_set_full( parallelism=parallelism, do_backup=do_backup, ) as command_event: - mapping_rules = load_mapping_rules(input_path) - command_name = _set_full_command_name(dry_run) + mapping_rules = load_mapping_rules(run_paths.maps_path) if not mapping_rules: _finish_empty_full_set_mapping_rules( client, - input_path, - command_name, + run_paths, dry_run, repository_names, repositories_without_explicit_perms, @@ -1077,7 +1001,7 @@ def cmd_set_full( loaded_plan = _load_full_set_plan( client, - input_path, + run_paths, mapping_rules, user_created_after, repository_names, @@ -1089,7 +1013,6 @@ def cmd_set_full( saml_groups_attribute_name_by_config_id, capture_before=dry_run or do_backup or repositories_without_explicit_perms, write_before_snapshot=dry_run or do_backup, - command_name=command_name, command_event=command_event, retain_saml_group_users=retain_saml_group_users, worker_pool=worker_pool, @@ -1098,9 +1021,7 @@ def cmd_set_full( plan = loaded_plan.plan if not plan.expected_users: _finish_empty_full_set_plan( - input_path, - client.endpoint, - command_name, + run_paths, snapshot_state, dry_run, do_backup, @@ -1110,8 +1031,7 @@ def cmd_set_full( if dry_run: _finish_full_set_dry_run( - input_path, - client.endpoint, + run_paths, snapshot_state, plan, command_event, @@ -1120,7 +1040,7 @@ def cmd_set_full( _run_full_set_apply( client, - input_path, + run_paths, snapshot_state, plan, len(mapping_rules), diff --git a/src/src_auth_perms_sync/permissions/restore.py b/src/src_auth_perms_sync/permissions/restore.py index 52de8dc..5ff7405 100644 --- a/src/src_auth_perms_sync/permissions/restore.py +++ b/src/src_auth_perms_sync/permissions/restore.py @@ -16,9 +16,6 @@ from . import apply as permissions_apply from . import snapshot as permission_snapshot from . import types as permission_types -from .workflow import ( - snapshot_path as snapshot_artifact_path, -) from .workflow import ( write_snapshot_diff_file, write_snapshot_pair, @@ -75,6 +72,8 @@ class _UserScopedRestoreMutationResult: def cmd_restore_user_scoped( client: src.SourcegraphClient, snapshot_path: Path, + run_paths: backups.RunPaths, + *, dry_run: bool, parallelism: int, bind_id_mode: str, @@ -112,14 +111,9 @@ def cmd_restore_user_scoped( ) _log_user_scoped_restore_plan(snapshot_state, plan) - timestamp = backups.backup_timestamp() - command_name = _user_scoped_restore_command_name(dry_run) if dry_run or do_backup: _write_user_scoped_restore_initial_artifacts( - client, - snapshot_path, - timestamp, - command_name, + run_paths, snapshot_state.current_snapshot, snapshot_state.target_snapshot, dry_run, @@ -130,10 +124,7 @@ def cmd_restore_user_scoped( return if not plan.additions and not plan.removals: _finish_empty_user_scoped_restore_plan( - client, - snapshot_path, - timestamp, - command_name, + run_paths, snapshot_state.current_snapshot, do_backup, ) @@ -145,8 +136,7 @@ def cmd_restore_user_scoped( _finish_user_scoped_restore_apply_with_backup( client, snapshot_path, - timestamp, - command_name, + run_paths, snapshot_state, parallelism, bind_id_mode, @@ -206,10 +196,6 @@ def _plan_user_scoped_restore( return _UserScopedRestorePlan(additions=additions, removals=removals) -def _user_scoped_restore_command_name(dry_run: bool) -> str: - return "restore-scoped-dry-run" if dry_run else "restore-scoped-apply" - - def _validate_user_scoped_restore_snapshot_context( client: src.SourcegraphClient, target_snapshot: permission_snapshot.UserScopedSnapshot, @@ -277,40 +263,25 @@ def _log_user_scoped_restore_plan( def _write_user_scoped_restore_initial_artifacts( - client: src.SourcegraphClient, - snapshot_path: Path, - timestamp: str, - command_name: str, + run_paths: backups.RunPaths, current_snapshot: permission_snapshot.UserScopedSnapshot, target_snapshot: permission_snapshot.UserScopedSnapshot, dry_run: bool, ) -> None: """Write before-snapshot and optional dry-run target artifacts.""" - before_restore_path = snapshot_artifact_path( - snapshot_path, - timestamp, - client.endpoint, - command_name, - "before", - ) - after_restore_path = snapshot_artifact_path( - snapshot_path, - timestamp, - client.endpoint, - command_name, - "after", - ) + if not run_paths.write_files: + log.info("Skipping scoped restore snapshot files because --no-files is set.") + return + before_restore_path = run_paths.artifact_path("before") permission_snapshot.write_user_scoped_snapshot(before_restore_path, current_snapshot) log.info("Wrote scoped restore before-snapshot: %s", before_restore_path) if not dry_run: return + after_restore_path = run_paths.artifact_path("after") permission_snapshot.write_user_scoped_snapshot(after_restore_path, target_snapshot) diff_path = write_user_scoped_snapshot_diff_file( - snapshot_path, - timestamp, - client.endpoint, - command_name, + run_paths, current_snapshot, target_snapshot, ) @@ -318,30 +289,21 @@ def _write_user_scoped_restore_initial_artifacts( def _finish_empty_user_scoped_restore_plan( - client: src.SourcegraphClient, - snapshot_path: Path, - timestamp: str, - command_name: str, + run_paths: backups.RunPaths, current_snapshot: permission_snapshot.UserScopedSnapshot, do_backup: bool, ) -> None: log.info("Scoped restore target already matches current state — nothing to apply.") if not do_backup: return + if not run_paths.write_files: + log.info("Skipping scoped restore snapshot files because --no-files is set.") + return - after_restore_path = snapshot_artifact_path( - snapshot_path, - timestamp, - client.endpoint, - command_name, - "after", - ) + after_restore_path = run_paths.artifact_path("after") permission_snapshot.write_user_scoped_snapshot(after_restore_path, current_snapshot) diff_path = write_user_scoped_snapshot_diff_file( - snapshot_path, - timestamp, - client.endpoint, - command_name, + run_paths, current_snapshot, current_snapshot, ) @@ -419,8 +381,7 @@ def _apply_user_scoped_restore( def _finish_user_scoped_restore_apply_with_backup( client: src.SourcegraphClient, snapshot_path: Path, - timestamp: str, - command_name: str, + run_paths: backups.RunPaths, snapshot_state: _UserScopedRestoreState, parallelism: int, bind_id_mode: str, @@ -435,23 +396,17 @@ def _finish_user_scoped_restore_apply_with_backup( snapshot_path, worker_pool=worker_pool, ) - after_restore_path = snapshot_artifact_path( - snapshot_path, - timestamp, - client.endpoint, - command_name, - "after", - ) - permission_snapshot.write_user_scoped_snapshot(after_restore_path, after_restore_snapshot) - diff_path = write_user_scoped_snapshot_diff_file( - snapshot_path, - timestamp, - client.endpoint, - command_name, - snapshot_state.current_snapshot, - after_restore_snapshot, - ) - log.info("Wrote scoped restore after-snapshot: %s diff=%s", after_restore_path, diff_path) + if run_paths.write_files: + after_restore_path = run_paths.artifact_path("after") + permission_snapshot.write_user_scoped_snapshot(after_restore_path, after_restore_snapshot) + diff_path = write_user_scoped_snapshot_diff_file( + run_paths, + snapshot_state.current_snapshot, + after_restore_snapshot, + ) + log.info("Wrote scoped restore after-snapshot: %s diff=%s", after_restore_path, diff_path) + else: + log.info("Skipping scoped restore after-snapshot files because --no-files is set.") residual = permission_snapshot.render_user_scoped_diff( after_restore_snapshot, snapshot_state.target_snapshot, @@ -494,10 +449,6 @@ def _log_user_scoped_restore_done(mutations: _UserScopedRestoreMutationResult) - ) -def _restore_command_name(dry_run: bool) -> str: - return "restore-dry-run" if dry_run else "restore-apply" - - def _validate_restore_snapshot_context( client: src.SourcegraphClient, target_snapshot: permission_snapshot.Snapshot, @@ -644,8 +595,7 @@ def repository_name(repo_id: str) -> str: def _finish_empty_restore_plan( - client: src.SourcegraphClient, - snapshot_path: Path, + run_paths: backups.RunPaths, current_snapshot: permission_snapshot.Snapshot, dry_run: bool, do_backup: bool, @@ -654,14 +604,12 @@ def _finish_empty_restore_plan( log.info("Nothing to restore: current explicit-permissions state already matches snapshot.") if not (dry_run or do_backup): return + if not run_paths.write_files: + log.info("Skipping restore snapshot files because --no-files is set.") + return - timestamp = backups.backup_timestamp() - command_name = _restore_command_name(dry_run) before_restore_path, after_restore_path, diff_path = write_snapshot_pair( - snapshot_path, - timestamp, - client.endpoint, - command_name, + run_paths, current_snapshot, current_snapshot, ) @@ -694,43 +642,36 @@ def _log_full_restore_plan(snapshot_state: RestoreSnapshotState, plan: RestorePl def _finish_restore_dry_run( - client: src.SourcegraphClient, - snapshot_path: Path, + run_paths: backups.RunPaths, snapshot_state: RestoreSnapshotState, ) -> None: """Write dry-run restore artifacts and stop before mutation.""" - timestamp = backups.backup_timestamp() - before_restore_path, after_restore_path, diff_path = write_snapshot_pair( - snapshot_path, - timestamp, - client.endpoint, - "restore-dry-run", - snapshot_state.current_snapshot, - snapshot_state.target_snapshot, - ) - log.info( - "Wrote restore dry-run snapshots: before=%s after=%s diff=%s.", - before_restore_path, - after_restore_path, - diff_path, - ) + if run_paths.write_files: + before_restore_path, after_restore_path, diff_path = write_snapshot_pair( + run_paths, + snapshot_state.current_snapshot, + snapshot_state.target_snapshot, + ) + log.info( + "Wrote restore dry-run snapshots: before=%s after=%s diff=%s.", + before_restore_path, + after_restore_path, + diff_path, + ) + else: + log.info("Skipping restore dry-run snapshot files because --no-files is set.") log.info("Dry run complete. Pass --apply to mutate state.") def _write_restore_apply_before_snapshot( - snapshot_path: Path, - timestamp: str, - endpoint: str, + run_paths: backups.RunPaths, current_snapshot: permission_snapshot.Snapshot, -) -> Path: +) -> None: """Persist the pre-restore state so the restore is reversible.""" - before_restore_path = snapshot_artifact_path( - snapshot_path, - timestamp, - endpoint, - "restore-apply", - "before", - ) + if not run_paths.write_files: + log.info("Skipping pre-restore snapshot because --no-files is set.") + return + before_restore_path = run_paths.artifact_path("before") permission_snapshot.write_snapshot(before_restore_path, current_snapshot) log.info( "Wrote pre-restore snapshot: %s (%d repo(s) with explicit grants, %d total grant(s)).", @@ -738,7 +679,6 @@ def _write_restore_apply_before_snapshot( current_snapshot["stats"]["repos_with_explicit_grants"], current_snapshot["stats"]["total_grants"], ) - return before_restore_path def _apply_restore_overwrites( @@ -789,7 +729,7 @@ def _record_restore_event_fields( def _finish_restore_apply_with_backup( client: src.SourcegraphClient, snapshot_path: Path, - timestamp: str, + run_paths: backups.RunPaths, snapshot_state: RestoreSnapshotState, parallelism: int, explicit_permissions_batch_size: int, @@ -808,30 +748,24 @@ def _finish_restore_apply_with_backup( explicit_permissions_batch_size=explicit_permissions_batch_size, worker_pool=worker_pool, ) - after_restore_path = snapshot_artifact_path( - snapshot_path, - timestamp, - client.endpoint, - "restore-apply", - "after", - ) - permission_snapshot.write_snapshot(after_restore_path, after_restore_snapshot) - diff_path = write_snapshot_diff_file( - snapshot_path, - timestamp, - client.endpoint, - "restore-apply", - snapshot_state.current_snapshot, - after_restore_snapshot, - ) - log.info( - "Wrote post-restore snapshot: %s diff=%s " - "(%d repo(s) with explicit grants, %d total grant(s)).", - after_restore_path, - diff_path, - after_restore_snapshot["stats"]["repos_with_explicit_grants"], - after_restore_snapshot["stats"]["total_grants"], - ) + if run_paths.write_files: + after_restore_path = run_paths.artifact_path("after") + permission_snapshot.write_snapshot(after_restore_path, after_restore_snapshot) + diff_path = write_snapshot_diff_file( + run_paths, + snapshot_state.current_snapshot, + after_restore_snapshot, + ) + log.info( + "Wrote post-restore snapshot: %s diff=%s " + "(%d repo(s) with explicit grants, %d total grant(s)).", + after_restore_path, + diff_path, + after_restore_snapshot["stats"]["repos_with_explicit_grants"], + after_restore_snapshot["stats"]["total_grants"], + ) + else: + log.info("Skipping post-restore snapshot files because --no-files is set.") residual = permission_snapshot.render_snapshot_diff( after_restore_snapshot, snapshot_state.target_snapshot, @@ -863,6 +797,8 @@ def _raise_for_failed_restore(mutations: shared_types.MutationCounts, overwrite_ def cmd_restore( client: src.SourcegraphClient, snapshot_path: Path, + run_paths: backups.RunPaths, + *, dry_run: bool, parallelism: int, explicit_permissions_batch_size: int, @@ -876,10 +812,11 @@ def cmd_restore( cmd_restore_user_scoped( client, snapshot_path, - dry_run, - parallelism, - bind_id_mode, - do_backup, + run_paths, + dry_run=dry_run, + parallelism=parallelism, + bind_id_mode=bind_id_mode, + do_backup=do_backup, target_snapshot=cast(permission_snapshot.UserScopedSnapshot, target_snapshot), worker_pool=worker_pool, ) @@ -911,8 +848,7 @@ def cmd_restore( plan = plan_full_restore(snapshot_state) if not plan.overwrites: _finish_empty_restore_plan( - client, - snapshot_path, + run_paths, snapshot_state.current_snapshot, dry_run, do_backup, @@ -921,15 +857,12 @@ def cmd_restore( _log_full_restore_plan(snapshot_state, plan) if dry_run: - _finish_restore_dry_run(client, snapshot_path, snapshot_state) + _finish_restore_dry_run(run_paths, snapshot_state) return - timestamp = backups.backup_timestamp() if do_backup: _write_restore_apply_before_snapshot( - snapshot_path, - timestamp, - client.endpoint, + run_paths, snapshot_state.current_snapshot, ) @@ -940,7 +873,7 @@ def cmd_restore( _finish_restore_apply_with_backup( client, snapshot_path, - timestamp, + run_paths, snapshot_state, parallelism, explicit_permissions_batch_size, diff --git a/src/src_auth_perms_sync/permissions/workflow.py b/src/src_auth_perms_sync/permissions/workflow.py index 8c98ee5..58d17a1 100644 --- a/src/src_auth_perms_sync/permissions/workflow.py +++ b/src/src_auth_perms_sync/permissions/workflow.py @@ -276,53 +276,25 @@ def load_repository_candidates_created_on_or_after( return candidates -def snapshot_path( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, - state: str | None = None, -) -> Path: - """Return a path inside the run's artifact directory. - - Example: maps.yaml + endpoint + timestamp + set-apply + before → - src-auth-perms-sync-runs/sourcegraph.example.com/runs/2026-04-27-01-54-23-set-apply/before.json. - """ - return backups.backup_path(input_path.name, timestamp, endpoint, command, state) - - def write_snapshot_pair( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, + run_paths: backups.RunPaths, before_snapshot: permission_snapshot.Snapshot, after_snapshot: permission_snapshot.Snapshot, ) -> tuple[Path, Path, Path]: - before_path = snapshot_path(input_path, timestamp, endpoint, command, "before") - after_path = snapshot_path(input_path, timestamp, endpoint, command, "after") + before_path = run_paths.artifact_path("before") + after_path = run_paths.artifact_path("after") permission_snapshot.write_snapshot(before_path, before_snapshot) permission_snapshot.write_snapshot(after_path, after_snapshot) - diff_path = write_snapshot_diff_file( - input_path, - timestamp, - endpoint, - command, - before_snapshot, - after_snapshot, - ) + diff_path = write_snapshot_diff_file(run_paths, before_snapshot, after_snapshot) return before_path, after_path, diff_path def write_snapshot_diff_file( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, + run_paths: backups.RunPaths, before_snapshot: permission_snapshot.Snapshot, after_snapshot: permission_snapshot.Snapshot, ) -> Path: - diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff") + diff_path = run_paths.artifact_path("diff") permission_snapshot.write_snapshot_diff_from_snapshots( diff_path, before_snapshot, @@ -332,14 +304,11 @@ def write_snapshot_diff_file( def write_user_scoped_snapshot_diff_file( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, + run_paths: backups.RunPaths, before_snapshot: permission_snapshot.UserScopedSnapshot, after_snapshot: permission_snapshot.UserScopedSnapshot, ) -> Path: - diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff") + diff_path = run_paths.artifact_path("diff") permission_snapshot.write_user_scoped_snapshot_diff( diff_path, permission_snapshot.build_user_scoped_snapshot_diff(before_snapshot, after_snapshot), @@ -347,28 +316,15 @@ def write_user_scoped_snapshot_diff_file( return diff_path -def maps_backup_path( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, -) -> Path: - """Path for the companion copy of the maps YAML used for a backup run.""" - return backups.backup_path(input_path.name, timestamp, endpoint, command, suffix="yaml") - - -def write_maps_backup( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, -) -> Path | None: - """Copy the active maps YAML next to the JSON snapshots for auditability.""" +def write_maps_backup(input_path: Path, run_paths: backups.RunPaths) -> Path | None: + """Copy the run's input file next to the JSON snapshots for auditability.""" + if not run_paths.write_files: + return None if not input_path.exists(): log.warning("Could not back up maps file %s because it does not exist.", input_path) return None - output_path = maps_backup_path(input_path, timestamp, endpoint, command) + output_path = run_paths.input_copy_path(input_path.name) with src.span( "disk_io", level="DEBUG", @@ -493,17 +449,14 @@ def write_projected_snapshot( def write_projected_snapshot_diff_file( - input_path: Path, - timestamp: str, - endpoint: str, - command: str, + run_paths: backups.RunPaths, before_snapshot: permission_snapshot.Snapshot, after_snapshot: permission_snapshot.Snapshot, expected_users: dict[str, tuple[str, ...]], repo_names: dict[str, str], ) -> Path: """Write a diff for a projected full-set after snapshot.""" - diff_path = snapshot_path(input_path, timestamp, endpoint, command, "diff") + diff_path = run_paths.artifact_path("diff") repo_ids = projected_snapshot_repo_ids(before_snapshot, expected_users) permission_snapshot.write_snapshot_diff_from_snapshot_parts( diff_path, diff --git a/src/src_auth_perms_sync/shared/backups.py b/src/src_auth_perms_sync/shared/backups.py index e4a2ca7..9c6b8f0 100644 --- a/src/src_auth_perms_sync/shared/backups.py +++ b/src/src_auth_perms_sync/shared/backups.py @@ -1,84 +1,131 @@ -"""Endpoint-scoped artifact path helpers.""" +"""Endpoint-scoped artifact paths, resolved once per run into `RunPaths`.""" from __future__ import annotations import datetime import re -from collections.abc import Generator -from contextlib import contextmanager -from contextvars import ContextVar +from dataclasses import dataclass from pathlib import Path from urllib.parse import urlsplit ARTIFACTS_DIR_NAME = "src-auth-perms-sync-runs" +DEFAULT_MAPS_FILE_NAME = "maps.yaml" LOG_FILE_NAME = "log.json" RUNS_DIR_NAME = "runs" +CODE_HOSTS_FILE_NAME = "code-hosts.yaml" +AUTH_PROVIDERS_FILE_NAME = "auth-providers.yaml" -_CURRENT_RUN_ARTIFACTS_DIRECTORY: ContextVar[Path | None] = ContextVar( - "current_run_artifacts_directory", - default=None, -) -_CURRENT_RUN_TIMESTAMP: ContextVar[str | None] = ContextVar( - "current_run_timestamp", - default=None, -) +@dataclass(frozen=True) +class RunPaths: + """Every filesystem location one run may read or write, resolved once. -def backup_timestamp() -> str: - """Return a filesystem-friendly UTC timestamp.""" - run_timestamp = _CURRENT_RUN_TIMESTAMP.get() - if run_timestamp is not None: - return run_timestamp - return datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d-%H-%M-%S") + Built at the CLI/module edge by `resolve_run_paths()` and threaded + explicitly through command code; nothing below the edge recomputes paths + from cwd or global state. `write_files` is False under `--no-files`: + path values stay valid for naming and messages, but nothing is written. + """ + timestamp: str + artifacts_dir: Path + endpoint_directory: Path + maps_path: Path + code_hosts_path: Path + auth_providers_path: Path + run_directory: Path + write_files: bool = True -@contextmanager -def run_artifacts_context(run_directory: Path, timestamp: str) -> Generator[None]: - """Make backup helpers write into the current CLI run directory.""" - directory_token = _CURRENT_RUN_ARTIFACTS_DIRECTORY.set(run_directory) - timestamp_token = _CURRENT_RUN_TIMESTAMP.set(timestamp) - try: - yield - finally: - _CURRENT_RUN_TIMESTAMP.reset(timestamp_token) - _CURRENT_RUN_ARTIFACTS_DIRECTORY.reset(directory_token) + @property + def log_path(self) -> Path: + """The structured JSONL event log for this run.""" + return self.run_directory / LOG_FILE_NAME + def artifact_path(self, state: str, *, family: str | None = None, suffix: str = "json") -> Path: + """Return a run artifact path such as `before.json` or `-diff.json`.""" + name = f"{family}-{state}" if family else state + return self.run_directory / f"{safe_filename_part(name)}.{suffix}" -def artifact_run_directory(timestamp: str, endpoint: str, command: str) -> Path: - """Return the artifact directory for one command run.""" - run_directory = safe_filename_part(f"{timestamp}-{command}") - return endpoint_artifacts_directory(endpoint) / RUNS_DIR_NAME / run_directory + def input_copy_path(self, input_name: str) -> Path: + """Return the audit-copy path for an input file (e.g. the active maps YAML).""" + return self.run_directory / safe_filename_part(input_name) -def backup_path( - source_name: str, - timestamp: str, - endpoint: str, - command: str, - state: str | None = None, +def resolve_run_paths( *, - suffix: str = "json", -) -> Path: - """Return an artifact path under one directory per endpoint run.""" - backup_directory = _CURRENT_RUN_ARTIFACTS_DIRECTORY.get() or artifact_run_directory( + endpoint: str, + command_artifact_name: str, + artifacts_dir: Path | None = None, + maps_path: Path | None = None, + write_files: bool = True, + current_directory: Path | None = None, +) -> RunPaths: + """Resolve every path for one run; create the run directory exclusively. + + `artifacts_dir` is the directory containing endpoint subdirectories + (default: `./src-auth-perms-sync-runs`). Paths are resolved to absolute + once, so later working-directory changes cannot redirect writes. Run + directories are created exclusively; a same-second collision gets a + numeric suffix rather than sharing or overwriting an existing run. + """ + base_directory = current_directory or Path.cwd() + resolved_artifacts_dir = ( + artifacts_dir if artifacts_dir is not None else base_directory / ARTIFACTS_DIR_NAME + ) + if not resolved_artifacts_dir.is_absolute(): + resolved_artifacts_dir = base_directory / resolved_artifacts_dir + resolved_artifacts_dir = resolved_artifacts_dir.resolve() + endpoint_directory = resolved_artifacts_dir / endpoint_directory_name(endpoint) + + resolved_maps_path = ( + maps_path if maps_path is not None else endpoint_directory / DEFAULT_MAPS_FILE_NAME + ) + if not resolved_maps_path.is_absolute(): + resolved_maps_path = base_directory / resolved_maps_path + resolved_maps_path = resolved_maps_path.resolve() + + timestamp = run_timestamp() + run_directory = _exclusive_run_directory( + endpoint_directory / RUNS_DIR_NAME, timestamp, - endpoint, - command, + command_artifact_name, + create=write_files, + ) + return RunPaths( + timestamp=timestamp, + artifacts_dir=resolved_artifacts_dir, + endpoint_directory=endpoint_directory, + maps_path=resolved_maps_path, + code_hosts_path=endpoint_directory / CODE_HOSTS_FILE_NAME, + auth_providers_path=endpoint_directory / AUTH_PROVIDERS_FILE_NAME, + run_directory=run_directory, + write_files=write_files, ) - if state is None: - return backup_directory / safe_filename_part(source_name) - return backup_directory / f"{safe_filename_part(state)}.{suffix}" -def run_log_path(run_directory: Path) -> Path: - """Return the structured log path for a run artifact directory.""" - return run_directory / LOG_FILE_NAME +def run_timestamp() -> str: + """Return a filesystem-friendly UTC timestamp.""" + return datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d-%H-%M-%S") -def endpoint_artifacts_directory(endpoint: str, current_directory: Path | None = None) -> Path: - """Return this endpoint's artifact directory under the current working directory.""" - base_directory = current_directory or Path.cwd() - return base_directory / ARTIFACTS_DIR_NAME / endpoint_directory_name(endpoint) +def _exclusive_run_directory( + runs_directory: Path, + timestamp: str, + command_artifact_name: str, + *, + create: bool, +) -> Path: + """Create (or name) a unique run directory; never reuse an existing one.""" + base_name = safe_filename_part(f"{timestamp}-{command_artifact_name}") + candidate = runs_directory / base_name + if not create: + return candidate + for attempt in range(2, 1000): + try: + candidate.mkdir(parents=True, exist_ok=False) + return candidate + except FileExistsError: + candidate = runs_directory / f"{base_name}-{attempt}" + raise OSError(f"could not create a unique run directory under {runs_directory}") def endpoint_directory_name(endpoint: str) -> str: diff --git a/src/src_auth_perms_sync/shared/run_context.py b/src/src_auth_perms_sync/shared/run_context.py index 45369e2..1364358 100644 --- a/src/src_auth_perms_sync/shared/run_context.py +++ b/src/src_auth_perms_sync/shared/run_context.py @@ -8,7 +8,7 @@ from concurrent.futures import FIRST_COMPLETED, Future, ThreadPoolExecutor, wait from contextlib import contextmanager from dataclasses import dataclass -from typing import Generic, TypeVar, cast +from typing import Any, Generic, TypeVar, cast import src_py_lib as src @@ -21,10 +21,18 @@ @dataclass(frozen=True) class CommandData: - """Instance data a command loaded and later commands may reuse.""" + """Instance data a command loaded and later commands or callers may reuse. + + `auth_provider_views` and `code_host_views` carry the same dicts the get + command writes to `auth-providers.yaml` and `code-hosts.yaml`, so module + callers receive discovery data without re-parsing files. + """ auth_providers: list[shared_types.AuthProvider] | None = None saml_group_users: list[shared_types.SamlGroupUser] | None = None + auth_provider_views: list[dict[str, Any]] | None = None + code_host_views: list[dict[str, Any]] | None = None + maps_created: bool = False @dataclass(frozen=True) diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index 6c14f45..df35a3d 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -16,6 +16,7 @@ import json import shlex import sys +import tempfile from collections.abc import Iterator, Mapping, Sequence from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass @@ -27,6 +28,7 @@ from src_py_lib.utils.config import config_options from src_auth_perms_sync import cli +from src_auth_perms_sync.shared import backups from src_auth_perms_sync.shared import types as shared_types FIXTURES_DIR = Path(__file__).with_name("fixtures") @@ -139,6 +141,9 @@ class FixtureRunResult: command_failure: str | None = None expected_errors: tuple[str, ...] = () runner: str = "cli" # "cli" (parsed argv) or "import" (programmatic Config) + # Files written under the run's temporary artifacts directory, relative + # to it. Empty when the run wrote nothing (e.g. under no_files). + artifact_file_names: tuple[str, ...] = () @property def failure(self) -> str | None: @@ -776,7 +781,9 @@ def cli_input_for_case( return cli.CliInput(command_name=command_name, config=config) -def run_fixture_case(case_name: str, runner: str = "cli") -> FixtureRunResult: +def run_fixture_case( + case_name: str, runner: str = "cli", *, no_files: bool = False +) -> FixtureRunResult: case = load_e2e_cases()[case_name] case_dir = FIXTURES_DIR / case_name before_state = load_state(case_dir / "before.json") @@ -787,26 +794,51 @@ def run_fixture_case(case_name: str, runner: str = "cli") -> FixtureRunResult: expected_state = FakeSourcegraphClient(load_state(expected_source)).export_state() client = FakeSourcegraphClient(before_state) command_failure: str | None = None - - try: - cli_input = cli_input_for_case(case, case_name, client.endpoint, runner) - # Local runs execute in-process against the in-memory fake, where - # client parallelism buys nothing and only adds scheduling - # nondeterminism — pin it to 1 regardless of the case's command - # line. Live/performance runs use the command line as written. - local_config = cli_input.config.model_copy(update={"parallelism": 1}) - command = cli.resolve_command(cli_input.command_name, local_config) - with ThreadPoolExecutor(max_workers=local_config.parallelism) as worker_pool: - cli.run_command( - local_config, - command, - cast(src.SourcegraphClient, client), - worker_pool, + artifact_file_names: tuple[str, ...] = () + + # Route run artifacts (snapshots, maps copies, generated maps.yaml) into + # a per-case temporary directory so local test runs never pollute the + # repo's ./src-auth-perms-sync-runs tree; the directory is removed when + # the case finishes. + with tempfile.TemporaryDirectory(prefix="src-auth-perms-sync-case-") as temp_directory: + artifacts_dir = Path(temp_directory) + try: + cli_input = cli_input_for_case(case, case_name, client.endpoint, runner) + # Local runs execute in-process against the in-memory fake, where + # client parallelism buys nothing and only adds scheduling + # nondeterminism — pin it to 1 regardless of the case's command + # line. Live/performance runs use the command line as written. + config_updates: dict[str, object] = {"parallelism": 1} + if no_files: + config_updates["no_files"] = True + local_config = cli_input.config.model_copy(update=config_updates) + command = cli.resolve_command(cli_input.command_name, local_config) + run_paths = backups.resolve_run_paths( + endpoint=client.endpoint, + command_artifact_name=command.artifact_name, + artifacts_dir=artifacts_dir, + maps_path=local_config.maps_path, + write_files=not local_config.no_files, ) - except SystemExit as exception: - command_failure = f"SystemExit: {exception.code!r}" - except Exception as exception: - command_failure = f"{type(exception).__name__}: {exception}" + with ThreadPoolExecutor(max_workers=local_config.parallelism) as worker_pool: + cli.run_command( + local_config, + command, + cast(src.SourcegraphClient, client), + run_paths, + worker_pool, + ) + except SystemExit as exception: + command_failure = f"SystemExit: {exception.code!r}" + except Exception as exception: + command_failure = f"{type(exception).__name__}: {exception}" + artifact_file_names = tuple( + sorted( + str(path.relative_to(artifacts_dir)) + for path in artifacts_dir.rglob("*") + if path.is_file() + ) + ) actual_state = client.export_state() return FixtureRunResult( @@ -824,6 +856,7 @@ def run_fixture_case(case_name: str, runner: str = "cli") -> FixtureRunResult: command_failure=command_failure, expected_errors=tuple(case.get("expectedErrors", [])), runner=runner, + artifact_file_names=artifact_file_names, ) diff --git a/tests/e2e/test_local_cases.py b/tests/e2e/test_local_cases.py index b766ef0..0ee338d 100644 --- a/tests/e2e/test_local_cases.py +++ b/tests/e2e/test_local_cases.py @@ -93,6 +93,31 @@ def test_local_replay_cases(self) -> None: with self.subTest(case=case_name): self.assertEqual("", run_local_replay_case(case_name)) + def test_no_files_set_dry_run_matches_files_enabled(self) -> None: + """no_files must not change a set dry-run's decisions, and writes nothing. + + The same fixture case runs twice through the import API — once with + files enabled and once with no_files — and must plan identical + mutations and end in identical instance state, while the no_files + run's temporary artifacts directory stays completely empty. + """ + case_name = "full-overwrite-dry-run" + with_files = run_fixture_case(case_name, "import") + without_files = run_fixture_case(case_name, "import", no_files=True) + self.assertIsNone(with_files.failure) + self.assertIsNone(without_files.failure) + self.assertEqual(with_files.actual_mutations, without_files.actual_mutations) + self.assertEqual(with_files.actual_state, without_files.actual_state) + self.assertTrue( + with_files.artifact_file_names, + "the files-enabled run should write run artifacts (maps copy / snapshots)", + ) + self.assertEqual( + (), + without_files.artifact_file_names, + "no_files run must leave its artifacts directory empty", + ) + def test_local_state_cases(self) -> None: for case_name, case in load_e2e_cases().items(): if "local" not in case_modes(case) or is_replay_case(case): diff --git a/tests/integration/test_cli_entrypoint.py b/tests/integration/test_cli_entrypoint.py index 499a5fa..4ad8c42 100644 --- a/tests/integration/test_cli_entrypoint.py +++ b/tests/integration/test_cli_entrypoint.py @@ -4,6 +4,22 @@ import sys import unittest +import src_py_lib as src + +import src_auth_perms_sync +from src_auth_perms_sync import cli +from src_auth_perms_sync.shared import backups + + +class PackageImportTests(unittest.TestCase): + def test_importing_the_package_exposes_module_mode_names(self) -> None: + self.assertIs(src_auth_perms_sync.GetResult, cli.GetResult) + self.assertIs(src_auth_perms_sync.CommandResult, cli.CommandResult) + self.assertIs(src_auth_perms_sync.RunPaths, backups.RunPaths) + self.assertIs(src_auth_perms_sync.EventSink, src.EventSink) + for exported_name in ("GetResult", "CommandResult", "RunPaths", "EventSink"): + self.assertIn(exported_name, src_auth_perms_sync.__all__) + class CliEntrypointTests(unittest.TestCase): def test_module_help_prints_usage(self) -> None: @@ -52,12 +68,20 @@ def test_command_help_prints_command_specific_options(self) -> None: text=True, ) - self.assertNotIn("--apply", get_help.stdout) + # get is read-only: the --apply option must not exist (it is only + # mentioned inside the --no-files help text). + self.assertNotIn("[--apply]", get_help.stdout) + self.assertNotIn("Apply changes", get_help.stdout) self.assertIn("--no-backup", get_help.stdout) + self.assertIn("--maps-path FILE", get_help.stdout) + self.assertIn("--artifacts-dir DIR", get_help.stdout) + self.assertIn("--no-files", get_help.stdout) self.assertNotIn("--sync-saml-orgs", get_help.stdout) self.assertIn("--users USERS", get_help.stdout) self.assertNotIn("--user USER", get_help.stdout) self.assertIn("--maps-path FILE", set_help.stdout) + self.assertIn("--artifacts-dir DIR", set_help.stdout) + self.assertIn("--no-files", set_help.stdout) self.assertIn("--users USERS", set_help.stdout) self.assertIn("--sync-saml-orgs", set_help.stdout) self.assertNotIn("--restore-path", set_help.stdout) diff --git a/tests/run.py b/tests/run.py index 06a181d..12770a5 100644 --- a/tests/run.py +++ b/tests/run.py @@ -724,9 +724,9 @@ def sample_once(self) -> None: @dataclass(frozen=True) class RunLogSummary: - """Resource usage and the run end record from one CLI run's structured log.""" + """Resource usage and the run-end event attributes from one CLI run's structured log.""" - run_record: dict[str, Any] | None + run_end_attributes: dict[str, Any] | None sampled_peak_rss_mb: float | None resource_sample_count: int max_num_fds: int | None @@ -749,12 +749,20 @@ def int_field(record: dict[str, Any], name: str) -> int | None: return None +def record_attributes(record: dict[str, Any]) -> dict[str, Any]: + """Return one OTel log record's attributes mapping (empty when absent).""" + attributes = record.get("attributes") + if isinstance(attributes, dict): + return cast("dict[str, Any]", attributes) + return {} + + def read_run_log_summary(log_path: Path | None) -> RunLogSummary: - """Parse a CLI run's log.json for the run end record and resource samples.""" + """Parse a CLI run's log.json for the run-end event and resource samples.""" empty = RunLogSummary(None, None, 0, None, None, None) if log_path is None or not log_path.is_file(): return empty - run_record: dict[str, Any] | None = None + run_end_attributes: dict[str, Any] | None = None sampled_peak_rss_mb: float | None = None resource_sample_count = 0 max_num_fds: int | None = None @@ -768,30 +776,31 @@ def read_run_log_summary(log_path: Path | None) -> RunLogSummary: record = cast("dict[str, Any]", json.loads(line)) except json.JSONDecodeError: continue - if record.get("event") == "resource_sample": + attributes = record_attributes(record) + if record.get("event_name") == "resource_sample": resource_sample_count += 1 - sample_rss = float_field(record, "peak_rss_mb", "rss_mb", "process_rss_mb") + sample_rss = float_field(attributes, "peak_rss_mb", "rss_mb", "process_rss_mb") if sample_rss is not None and ( sampled_peak_rss_mb is None or sample_rss > sampled_peak_rss_mb ): sampled_peak_rss_mb = sample_rss - sample_fds = int_field(record, "num_fds") + sample_fds = int_field(attributes, "num_fds") if sample_fds is not None and (max_num_fds is None or sample_fds > max_num_fds): max_num_fds = sample_fds - sample_threads = int_field(record, "num_threads") + sample_threads = int_field(attributes, "num_threads") if sample_threads is not None and ( max_num_threads is None or sample_threads > max_num_threads ): max_num_threads = sample_threads - sample_cpu = float_field(record, "process_cpu_percent", "cpu_percent") + sample_cpu = float_field(attributes, "process_cpu_percent", "cpu_percent") if sample_cpu is not None and ( max_process_cpu_percent is None or sample_cpu > max_process_cpu_percent ): max_process_cpu_percent = sample_cpu - if record.get("event") == "run" and record.get("phase") == "end": - run_record = record + if record.get("event_name") == "run" and attributes.get("phase") == "end": + run_end_attributes = attributes return RunLogSummary( - run_record=run_record, + run_end_attributes=run_end_attributes, sampled_peak_rss_mb=sampled_peak_rss_mb, resource_sample_count=resource_sample_count, max_num_fds=max_num_fds, @@ -2727,9 +2736,9 @@ def performance_row( summary = read_run_log_summary(result.log_path) duration_ms: float | None = None peak_rss_mb: float | None = None - if summary.run_record is not None: - duration_ms = float_field(summary.run_record, "duration_ms") - peak_rss_mb = float_field(summary.run_record, "peak_rss_mb") + if summary.run_end_attributes is not None: + duration_ms = float_field(summary.run_end_attributes, "duration_ms") + peak_rss_mb = float_field(summary.run_end_attributes, "peak_rss_mb") return { "case": strip_iteration_suffix(case_name), "variant": variant_name, @@ -3046,11 +3055,17 @@ def run_set_full_in_memory( ) command = cli.resolve_command("set", config) artifacts_directory = maps_path.parent / f"artifacts-{time.monotonic_ns()}" - with ( - backups.run_artifacts_context(artifacts_directory, backups.backup_timestamp()), - ThreadPoolExecutor(max_workers=1) as worker_pool, - ): - cli.run_command(config, command, cast("src.SourcegraphClient", client), worker_pool) + run_paths = backups.resolve_run_paths( + endpoint=state["endpoint"], + command_artifact_name=command.artifact_name, + artifacts_dir=artifacts_directory, + maps_path=maps_path, + write_files=True, + ) + with ThreadPoolExecutor(max_workers=1) as worker_pool: + cli.run_command( + config, command, cast("src.SourcegraphClient", client), run_paths, worker_pool + ) return client.export_state(), client.mutation_count @@ -3373,7 +3388,7 @@ def mutations_succeeded_from_log(log_path: Path | None) -> int | None: record = cast("dict[str, Any]", json.loads(line)) except json.JSONDecodeError: continue - value = record.get("mutations_succeeded") + value = record_attributes(record).get("mutations_succeeded") if isinstance(value, int): succeeded = value return succeeded @@ -3452,12 +3467,13 @@ def graphql_trace_request_from_record(record: dict[str, Any]) -> dict[str, Any] import src_py_lib as src from src_py_lib.clients.sourcegraph import sourcegraph_trace_from_headers - if record.get("event") != "http_request" or record.get("phase") != "end": + attributes = record_attributes(record) + if record.get("event_name") != "http_request" or attributes.get("phase") != "end": return None - if not str(record.get("url", "")).endswith("/.api/graphql"): + if not str(attributes.get("url", "")).endswith("/.api/graphql"): return None - request_headers = string_headers(record.get("request_headers")) - response_headers = string_headers(record.get("response_headers")) + request_headers = string_headers(attributes.get("request_headers")) + response_headers = string_headers(attributes.get("response_headers")) trace = sourcegraph_trace_from_headers(response_headers, request_headers) if trace is None: trace_id = trace_id_from_traceparent(header_value(request_headers, "traceparent")) @@ -3468,11 +3484,11 @@ def graphql_trace_request_from_record(record: dict[str, Any]) -> dict[str, Any] trace_url=header_value(response_headers, "x-trace-url"), ) return trace.to_json() | { - "duration_ms": float_field(record, "duration_ms") or 0.0, - "timestamp": record.get("ts"), - "status": record.get("status"), - "status_code": record.get("status_code"), - "error_type": record.get("error_type"), + "duration_ms": float_field(attributes, "duration_ms") or 0.0, + "timestamp": record.get("time_unix_nano"), + "status": attributes.get("status"), + "status_code": attributes.get("status_code"), + "error_type": attributes.get("error.type"), } diff --git a/tests/unit/test_backups.py b/tests/unit/test_backups.py index 340f773..6161838 100644 --- a/tests/unit/test_backups.py +++ b/tests/unit/test_backups.py @@ -1,83 +1,297 @@ from __future__ import annotations +import tempfile import unittest from pathlib import Path +from unittest import mock from src_auth_perms_sync.shared import backups +FIXED_TIMESTAMP = "2026-06-12-00-00-00" -class BackupPathTests(unittest.TestCase): - def test_endpoint_directory_name_uses_hostname_and_port(self) -> None: + +def make_run_paths(run_directory: Path) -> backups.RunPaths: + return backups.RunPaths( + timestamp=FIXED_TIMESTAMP, + artifacts_dir=run_directory.parent, + endpoint_directory=run_directory.parent, + maps_path=run_directory.parent / "maps.yaml", + code_hosts_path=run_directory.parent / "code-hosts.yaml", + auth_providers_path=run_directory.parent / "auth-providers.yaml", + run_directory=run_directory, + ) + + +class EndpointDirectoryNameTests(unittest.TestCase): + def test_uses_hostname_and_port(self) -> None: self.assertEqual( "sourcegraph.example.com", - backups.endpoint_directory_name("https://Sourcegraph.example.com"), + backups.endpoint_directory_name("https://sourcegraph.example.com"), ) self.assertEqual( "sourcegraph.example.com-3443", backups.endpoint_directory_name("https://sourcegraph.example.com:3443"), ) - def test_endpoint_artifacts_directory_uses_current_directory(self) -> None: + def test_lowercases_hostnames(self) -> None: + self.assertEqual( + "sourcegraph.example.com", + backups.endpoint_directory_name("https://Sourcegraph.Example.COM"), + ) + + def test_accepts_scheme_less_endpoints(self) -> None: + self.assertEqual( + "sourcegraph.example.com", + backups.endpoint_directory_name("sourcegraph.example.com"), + ) self.assertEqual( - Path("/tmp/work") / backups.ARTIFACTS_DIR_NAME / "sourcegraph.example.com", - backups.endpoint_artifacts_directory( - "https://sourcegraph.example.com", Path("/tmp/work") - ), + "sourcegraph.example.com-3443", + backups.endpoint_directory_name("sourcegraph.example.com:3443/path"), + ) + + def test_replaces_unsafe_characters(self) -> None: + self.assertEqual("host_name", backups.endpoint_directory_name("Host Name")) + self.assertEqual("unknown", backups.endpoint_directory_name("///")) + + +class SafeFilenamePartTests(unittest.TestCase): + def test_falls_back_for_empty_values(self) -> None: + self.assertEqual("unknown", backups.safe_filename_part("///")) + self.assertEqual("a_b-c.d", backups.safe_filename_part("a/b-c.d")) + + def test_strips_leading_dots_so_traversal_cannot_survive(self) -> None: + self.assertEqual("etc_passwd", backups.safe_filename_part("../../etc/passwd")) + self.assertEqual("unknown", backups.safe_filename_part("..")) + + +class ResolveRunPathsTests(unittest.TestCase): + def resolve( + self, + current_directory: Path, + *, + artifacts_dir: Path | None = None, + maps_path: Path | None = None, + write_files: bool = True, + ) -> backups.RunPaths: + return backups.resolve_run_paths( + endpoint="https://sourcegraph.example.com", + command_artifact_name="get", + artifacts_dir=artifacts_dir, + maps_path=maps_path, + write_files=write_files, + current_directory=current_directory, ) - def test_backup_path_uses_safe_endpoint_source_command_and_state(self) -> None: + def test_default_artifacts_dir_is_runs_directory_under_current_directory(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = self.resolve(directory) + + self.assertEqual( + (directory / backups.ARTIFACTS_DIR_NAME).resolve(), + run_paths.artifacts_dir, + ) self.assertEqual( - Path.cwd() - / backups.ARTIFACTS_DIR_NAME - / "sourcegraph.example.com" - / backups.RUNS_DIR_NAME - / "2026-05-23-set_users" - / "before.json", - backups.backup_path( - "repo/1", - "2026-05-23", - "https://sourcegraph.example.com", - "set:users", - "before", - ), + run_paths.artifacts_dir / "sourcegraph.example.com", + run_paths.endpoint_directory, ) - def test_backup_path_copies_source_file_with_original_name(self) -> None: + def test_relative_artifacts_dir_resolves_against_current_directory(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = self.resolve(directory, artifacts_dir=Path("custom-artifacts")) + + self.assertEqual((directory / "custom-artifacts").resolve(), run_paths.artifacts_dir) + + def test_absolute_artifacts_dir_is_used_verbatim(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + artifacts_dir = directory / "absolute-artifacts" + run_paths = self.resolve(directory / "elsewhere", artifacts_dir=artifacts_dir) + + self.assertEqual(artifacts_dir.resolve(), run_paths.artifacts_dir) + + def test_default_maps_path_is_endpoint_directory_maps_yaml(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + run_paths = self.resolve(Path(directory_name)) + self.assertEqual( - Path.cwd() - / backups.ARTIFACTS_DIR_NAME - / "sourcegraph.example.com" - / backups.RUNS_DIR_NAME - / "2026-05-23-get" - / "maps.yaml", - backups.backup_path( - "maps.yaml", - "2026-05-23", - "https://sourcegraph.example.com", - "get", - suffix="yaml", - ), + run_paths.endpoint_directory / backups.DEFAULT_MAPS_FILE_NAME, + run_paths.maps_path, ) - def test_backup_path_uses_current_run_artifacts_context(self) -> None: - run_directory = backups.artifact_run_directory( - "2026-05-23", - "https://sourcegraph.example.com", - "get", + def test_relative_maps_path_resolves_against_current_directory(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = self.resolve(directory, maps_path=Path("custom-maps.yaml")) + + self.assertEqual((directory / "custom-maps.yaml").resolve(), run_paths.maps_path) + + def test_absolute_maps_path_override_is_respected(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + maps_path = directory / "team-maps.yaml" + run_paths = self.resolve(directory, maps_path=maps_path) + + self.assertEqual(maps_path.resolve(), run_paths.maps_path) + + def test_generated_yaml_paths_live_in_the_endpoint_directory(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + run_paths = self.resolve(Path(directory_name)) + + self.assertEqual( + run_paths.endpoint_directory / backups.CODE_HOSTS_FILE_NAME, + run_paths.code_hosts_path, + ) + self.assertEqual( + run_paths.endpoint_directory / backups.AUTH_PROVIDERS_FILE_NAME, + run_paths.auth_providers_path, ) - with backups.run_artifacts_context(run_directory, "2026-05-23"): + def test_creates_the_run_directory_exclusively(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + with mock.patch.object(backups, "run_timestamp", return_value=FIXED_TIMESTAMP): + run_paths = self.resolve(directory) + + self.assertTrue(run_paths.run_directory.is_dir()) + self.assertEqual(f"{FIXED_TIMESTAMP}-get", run_paths.run_directory.name) + self.assertEqual( + run_paths.endpoint_directory / backups.RUNS_DIR_NAME, + run_paths.run_directory.parent, + ) + + def test_same_second_collisions_get_distinct_suffixed_directories(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + with mock.patch.object(backups, "run_timestamp", return_value=FIXED_TIMESTAMP): + first_run = self.resolve(directory) + colliding_run = self.resolve(directory) + third_run = self.resolve(directory) + + self.assertEqual(f"{FIXED_TIMESTAMP}-get", first_run.run_directory.name) + self.assertEqual(f"{FIXED_TIMESTAMP}-get-2", colliding_run.run_directory.name) + self.assertEqual(f"{FIXED_TIMESTAMP}-get-3", third_run.run_directory.name) self.assertEqual( - run_directory / "before.json", - backups.backup_path( - "ignored.yaml", - "unused-timestamp", - "https://unused.example.com", - "unused-command", - "before", + 3, + len( + { + first_run.run_directory, + colliding_run.run_directory, + third_run.run_directory, + } ), ) + self.assertTrue(colliding_run.run_directory.is_dir()) + self.assertTrue(third_run.run_directory.is_dir()) - def test_safe_filename_part_falls_back_for_empty_values(self) -> None: - self.assertEqual("unknown", backups.safe_filename_part("///")) - self.assertEqual("a_b-c.d", backups.safe_filename_part("a/b-c.d")) + def test_pre_existing_timestamp_directory_is_never_reused(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + artifacts_dir = (directory / backups.ARTIFACTS_DIR_NAME).resolve() + occupied_run_directory = ( + artifacts_dir + / "sourcegraph.example.com" + / backups.RUNS_DIR_NAME + / f"{FIXED_TIMESTAMP}-get" + ) + occupied_run_directory.mkdir(parents=True) + sentinel = occupied_run_directory / "before.json" + sentinel.write_text("{}") + + with mock.patch.object(backups, "run_timestamp", return_value=FIXED_TIMESTAMP): + run_paths = self.resolve(directory) + + self.assertEqual(f"{FIXED_TIMESTAMP}-get-2", run_paths.run_directory.name) + self.assertNotEqual(occupied_run_directory, run_paths.run_directory) + # The occupied run's artifacts stay untouched. + self.assertEqual("{}", sentinel.read_text()) + + def test_write_files_false_creates_no_directories(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = self.resolve(directory, write_files=False) + + self.assertFalse(run_paths.write_files) + self.assertFalse(run_paths.run_directory.exists()) + self.assertEqual([], list(directory.iterdir())) + + +class RunPathsArtifactTests(unittest.TestCase): + def test_artifact_path_names_state_files(self) -> None: + run_directory = Path("/artifacts/sourcegraph.example.com/runs/run") + run_paths = make_run_paths(run_directory) + + self.assertEqual(run_directory / "before.json", run_paths.artifact_path("before")) + self.assertEqual(run_directory / "after.json", run_paths.artifact_path("after")) + self.assertEqual(run_directory / "diff.json", run_paths.artifact_path("diff")) + + def test_artifact_path_family_prefixes_keep_combined_runs_collision_free(self) -> None: + run_directory = Path("/artifacts/sourcegraph.example.com/runs/run") + run_paths = make_run_paths(run_directory) + + family_paths = { + run_paths.artifact_path(state, family="saml-organizations") + for state in ("before", "after", "diff") + } + permission_paths = {run_paths.artifact_path(state) for state in ("before", "after", "diff")} + + self.assertEqual( + run_directory / "saml-organizations-before.json", + run_paths.artifact_path("before", family="saml-organizations"), + ) + self.assertEqual(set(), family_paths & permission_paths) + + def test_artifact_path_supports_custom_suffixes(self) -> None: + run_paths = make_run_paths(Path("/artifacts/endpoint/runs/run")) + + self.assertEqual( + run_paths.run_directory / "before.yaml", + run_paths.artifact_path("before", suffix="yaml"), + ) + + def test_artifact_path_sanitizes_hostile_names_inside_the_run_directory(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + run_directory = Path(directory_name) / "runs" / "run" + run_paths = make_run_paths(run_directory) + + for hostile_name in ("../../etc/passwd", "..", "/etc/shadow", "a/../../b"): + with self.subTest(hostile_name=hostile_name): + artifact = run_paths.artifact_path(hostile_name) + self.assertEqual(run_directory, artifact.parent) + self.assertTrue( + artifact.resolve().is_relative_to(run_directory.resolve()), + f"{artifact} escapes {run_directory}", + ) + + def test_input_copy_path_keeps_the_original_file_name(self) -> None: + run_paths = make_run_paths(Path("/artifacts/endpoint/runs/run")) + + self.assertEqual( + run_paths.run_directory / "maps.yaml", + run_paths.input_copy_path("maps.yaml"), + ) + + def test_input_copy_path_sanitizes_hostile_names_inside_the_run_directory(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + run_directory = Path(directory_name) / "runs" / "run" + run_paths = make_run_paths(run_directory) + + for hostile_name in ("../../etc/passwd", "..", "/etc/shadow"): + with self.subTest(hostile_name=hostile_name): + copy_path = run_paths.input_copy_path(hostile_name) + self.assertEqual(run_directory, copy_path.parent) + self.assertTrue( + copy_path.resolve().is_relative_to(run_directory.resolve()), + f"{copy_path} escapes {run_directory}", + ) + + def test_log_path_is_log_json_in_the_run_directory(self) -> None: + run_paths = make_run_paths(Path("/artifacts/endpoint/runs/run")) + + self.assertEqual(run_paths.run_directory / backups.LOG_FILE_NAME, run_paths.log_path) + self.assertEqual("log.json", run_paths.log_path.name) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_cli_config.py b/tests/unit/test_cli_config.py index 646c472..887ff44 100644 --- a/tests/unit/test_cli_config.py +++ b/tests/unit/test_cli_config.py @@ -42,6 +42,18 @@ def load_config_from_env(**env: str) -> cli.Config: ) +def make_run_paths() -> backups.RunPaths: + return backups.RunPaths( + timestamp="2026-06-12-00-00-00", + artifacts_dir=Path("artifacts"), + endpoint_directory=Path("artifacts/sourcegraph.example.com"), + maps_path=Path("maps.yaml"), + code_hosts_path=Path("code-hosts.yaml"), + auth_providers_path=Path("auth-providers.yaml"), + run_directory=Path("artifacts/sourcegraph.example.com/runs/run"), + ) + + class CliConfigTests(unittest.TestCase): def test_resolve_command_uses_explicit_command_name(self) -> None: command = cli.resolve_command("get", make_config()) @@ -310,6 +322,113 @@ def test_validate_config_rejects_apply_with_get(self) -> None: def test_validate_config_allows_get_no_backup(self) -> None: cli.validate_config("get", make_config(no_backup=True)) + def test_validate_config_rejects_no_files_apply_without_no_backup(self) -> None: + expected_message = "--no-files with --apply also requires --no-backup" + self.assert_config_error( + "set", + make_config(full=True, no_files=True, apply=True), + expected_message, + ) + self.assert_config_error( + "restore", + make_config(restore_path=Path("snapshot.json"), no_files=True, apply=True), + expected_message, + ) + + def test_validate_config_allows_no_files_apply_with_no_backup(self) -> None: + cli.validate_config( + "set", + make_config(full=True, no_files=True, apply=True, no_backup=True), + ) + cli.validate_config( + "restore", + make_config( + restore_path=Path("snapshot.json"), + no_files=True, + apply=True, + no_backup=True, + ), + ) + + def test_validate_config_allows_no_files_without_apply(self) -> None: + cli.validate_config("get", make_config(no_files=True)) + cli.validate_config("set", make_config(full=True, no_files=True)) + cli.validate_config( + "restore", + make_config(restore_path=Path("snapshot.json"), no_files=True), + ) + cli.validate_config("sync_saml_orgs", make_config(no_files=True)) + + def test_validate_config_rejects_maps_path_outside_get_and_set(self) -> None: + expected_message = "--maps-path requires the get or set command" + self.assert_config_error( + "restore", + make_config(restore_path=Path("snapshot.json"), maps_path=Path("maps.yaml")), + expected_message, + ) + self.assert_config_error( + "sync_saml_orgs", + make_config(maps_path=Path("maps.yaml")), + expected_message, + ) + + def test_validate_config_allows_maps_path_with_get_and_set(self) -> None: + cli.validate_config("get", make_config(maps_path=Path("maps.yaml"))) + cli.validate_config("set", make_config(maps_path=Path("maps.yaml"), full=True)) + + def test_artifacts_dir_flag_is_accepted_on_every_command(self) -> None: + with ( + tempfile.TemporaryDirectory() as directory, + mock.patch.dict( + os.environ, + { + "SRC_ENDPOINT": "https://sourcegraph.example.com", + "SRC_ACCESS_TOKEN": "secret", + }, + clear=True, + ), + ): + env_file = Path(directory) / ".env" + env_file.write_text("") + snapshot_path = Path(directory) / "before.json" + snapshot_path.write_text("{}") + artifacts_dir = Path(directory) / "artifact-output" + extra_arguments_by_command = { + "get": [], + "set": ["--full"], + "restore": ["--restore-path", str(snapshot_path)], + "sync-saml-orgs": [], + } + + for command_argument, extra_arguments in extra_arguments_by_command.items(): + with self.subTest(command=command_argument): + cli_input = cli.load_cli( + [ + command_argument, + "--env-file", + str(env_file), + "--artifacts-dir", + str(artifacts_dir), + *extra_arguments, + ] + ) + self.assertEqual(artifacts_dir, cli_input.config.artifacts_dir) + + def test_get_help_lists_artifact_options(self) -> None: + captured_stdout = io.StringIO() + + with ( + contextlib.redirect_stdout(captured_stdout), + self.assertRaises(SystemExit) as exit_context, + ): + cli.load_cli(["get", "--help"]) + + self.assertEqual(0, exit_context.exception.code) + help_text = captured_stdout.getvalue() + self.assertIn("--maps-path", help_text) + self.assertIn("--artifacts-dir", help_text) + self.assertIn("--no-files", help_text) + def test_validate_config_rejects_restore_without_restore_path(self) -> None: self.assert_config_error("restore", make_config(), "restore requires --restore-path") @@ -439,6 +558,7 @@ def capture_client( _config: cli.Config, _command: cli.ResolvedCommand, client: src.SourcegraphClient, + _run_paths: backups.RunPaths, _worker_pool: ThreadPoolExecutor, ) -> None: captured_clients.append(client) @@ -451,6 +571,7 @@ def capture_client( configuration, command, "https://sourcegraph.example.com", + make_run_paths(), worker_pool, ) @@ -466,6 +587,7 @@ def capture_client( _config: cli.Config, _command: cli.ResolvedCommand, client: src.SourcegraphClient, + _run_paths: backups.RunPaths, _worker_pool: ThreadPoolExecutor, ) -> None: captured_clients.append(client) @@ -478,6 +600,7 @@ def capture_client( configuration, command, "https://sourcegraph.example.com", + make_run_paths(), worker_pool, ) @@ -518,30 +641,6 @@ def test_require_restore_input_file_reports_missing_snapshot_file(self) -> None: cli.require_restore_input_file(Path(directory) / "missing.json") self.assertIn("restore snapshot file does not exist", str(exit_context.exception)) - def test_config_with_default_paths_only_defaults_omitted_maps_path(self) -> None: - endpoint_directory = Path.cwd() / backups.ARTIFACTS_DIR_NAME / "sourcegraph.example.com" - - defaulted_set_config = cli.config_with_default_paths( - "set", - make_config(), - "https://sourcegraph.example.com", - ) - self.assertEqual(endpoint_directory / "maps.yaml", defaulted_set_config.maps_path) - - explicit_set_config = cli.config_with_default_paths( - "set", - make_config(maps_path=Path("maps.yaml")), - "https://sourcegraph.example.com", - ) - self.assertEqual(Path("maps.yaml"), explicit_set_config.maps_path) - - restore_config = cli.config_with_default_paths( - "restore", - make_config(restore_path=Path("snapshot.json")), - "https://sourcegraph.example.com", - ) - self.assertEqual(Path("snapshot.json"), restore_config.restore_path) - def test_run_fields_include_command_arguments_without_command_duplicates(self) -> None: configuration = make_config( maps_path=Path("maps.yaml"), @@ -550,7 +649,12 @@ def test_run_fields_include_command_arguments_without_command_duplicates(self) - ) command = cli.resolve_command("set", configuration) - fields = cli.run_fields(configuration, command, "https://sourcegraph.example.com") + fields = cli.run_fields( + configuration, + command, + "https://sourcegraph.example.com", + make_run_paths(), + ) self.assertEqual("users", fields["set_mode"]) self.assertEqual(True, fields["apply"]) @@ -565,7 +669,12 @@ def test_run_fields_omit_irrelevant_false_flags(self) -> None: configuration = make_config() command = cli.resolve_command("get", configuration) - fields = cli.run_fields(configuration, command, "https://sourcegraph.example.com") + fields = cli.run_fields( + configuration, + command, + "https://sourcegraph.example.com", + make_run_paths(), + ) self.assertNotIn("apply", fields) self.assertNotIn("no_backup", fields) @@ -577,7 +686,12 @@ def test_run_fields_include_no_backup_only_when_set(self) -> None: configuration = make_config(no_backup=True) command = cli.resolve_command("get", configuration) - fields = cli.run_fields(configuration, command, "https://sourcegraph.example.com") + fields = cli.run_fields( + configuration, + command, + "https://sourcegraph.example.com", + make_run_paths(), + ) self.assertEqual(True, fields["no_backup"]) @@ -592,6 +706,7 @@ def test_run_get_passes_no_backup_to_permission_command(self) -> None: auth_providers_by_config_id={}, saml_groups_attribute_name_by_config_id={}, ) + run_paths = make_run_paths() worker_pool = cast(ThreadPoolExecutor, object()) with ( @@ -604,7 +719,7 @@ def test_run_get_passes_no_backup_to_permission_command(self) -> None: return_value=cli.run_context.CommandData(), ) as cmd_get, ): - cli.run_get(configuration, client, sourcegraph_site_config, worker_pool) + cli.run_get(configuration, client, sourcegraph_site_config, run_paths, worker_pool) self.assertFalse(cmd_get.call_args.kwargs["do_backup"]) @@ -626,9 +741,7 @@ def test_cmd_get_no_backup_skips_snapshot_artifacts(self) -> None: ): permissions_command.cmd_get( client, - Path("code-hosts.yaml"), - Path("auth-providers.yaml"), - Path("maps.yaml"), + make_run_paths(), user_identifiers=(), users_without_explicit_perms=False, user_created_after=None, @@ -660,7 +773,7 @@ def test_cmd_set_dispatches_repo_filters_to_full_set(self) -> None: ) as cmd_set_full: permissions_command.cmd_set( client, - Path("maps.yaml"), + make_run_paths(), options, dry_run=True, parallelism=1, @@ -683,6 +796,7 @@ def test_run_command_passes_set_data_to_combined_sync(self) -> None: client = cast(src.SourcegraphClient, object()) sourcegraph_site_config = object() command_data = cli.run_context.CommandData() + run_paths = make_run_paths() with ( ThreadPoolExecutor(max_workers=1) as worker_pool, @@ -694,13 +808,14 @@ def test_run_command_passes_set_data_to_combined_sync(self) -> None: mock.patch.object(cli, "run_set", return_value=command_data) as run_set, mock.patch.object(cli, "run_sync_saml_organizations") as run_sync_saml_orgs, ): - cli.run_command(configuration, command, client, worker_pool) + cli.run_command(configuration, command, client, run_paths, worker_pool) run_set.assert_called_once_with( configuration, command, client, sourcegraph_site_config, + run_paths, worker_pool, ) run_sync_saml_orgs.assert_called_once_with( @@ -708,6 +823,7 @@ def test_run_command_passes_set_data_to_combined_sync(self) -> None: client, sourcegraph_site_config, command_data, + run_paths, worker_pool, ) @@ -717,32 +833,46 @@ def test_package_exports_programmatic_runner_and_config(self) -> None: self.assertIs(src_auth_perms_sync.Set, cli.Set) self.assertIs(src_auth_perms_sync.Restore, cli.Restore) self.assertIs(src_auth_perms_sync.SyncSamlOrgs, cli.SyncSamlOrgs) + self.assertIs(src_auth_perms_sync.GetResult, cli.GetResult) + self.assertIs(src_auth_perms_sync.CommandResult, cli.CommandResult) + self.assertIs(src_auth_perms_sync.RunPaths, backups.RunPaths) + self.assertIs(src_auth_perms_sync.EventSink, src.EventSink) + self.assertIs(src_auth_perms_sync.InMemoryEventSink, src.InMemoryEventSink) self.assertEqual( - ["Config", "Get", "Restore", "Set", "SyncSamlOrgs"], + [ + "CallbackEventSink", + "CommandResult", + "CompositeEventSink", + "Config", + "EventSink", + "Get", + "GetResult", + "InMemoryEventSink", + "JSONLEventSink", + "NullEventSink", + "Restore", + "RunPaths", + "Set", + "SyncSamlOrgs", + ], src_auth_perms_sync.__all__, ) def test_programmatic_runner_uses_supplied_config(self) -> None: - configuration = make_config(parallelism=1, sample_interval=0) + configuration = make_config(parallelism=1, sample_interval=0, no_files=True) captured: list[tuple[cli.Config, cli.ResolvedCommand, str]] = [] def capture_run( scoped_config: cli.Config, command: cli.ResolvedCommand, endpoint: str, + _run_paths: backups.RunPaths, _worker_pool: ThreadPoolExecutor, - ) -> None: + ) -> cli.run_context.CommandData: captured.append((scoped_config, command, endpoint)) + return cli.run_context.CommandData() - with ( - mock.patch.object(cli, "run_with_client", side_effect=capture_run), - mock.patch.object( - cli.src, - "logging_settings_from_config", - return_value=object(), - ), - mock.patch.object(cli.src, "logging", return_value=contextlib.nullcontext(None)), - ): + with mock.patch.object(cli, "run_with_client", side_effect=capture_run): self.assertTrue(src_auth_perms_sync.Get(configuration)) self.assertEqual(1, len(captured)) @@ -752,17 +882,9 @@ def capture_run( self.assertEqual("https://sourcegraph.example.com", endpoint) def test_programmatic_runner_returns_false_on_failure(self) -> None: - configuration = make_config(parallelism=1, sample_interval=0) + configuration = make_config(parallelism=1, sample_interval=0, no_files=True) - with ( - mock.patch.object(cli, "run_with_client", side_effect=SystemExit(1)), - mock.patch.object( - cli.src, - "logging_settings_from_config", - return_value=object(), - ), - mock.patch.object(cli.src, "logging", return_value=contextlib.nullcontext(None)), - ): + with mock.patch.object(cli, "run_with_client", side_effect=SystemExit(1)): self.assertFalse(src_auth_perms_sync.Get(configuration)) def assert_config_error( diff --git a/tests/unit/test_command_additive.py b/tests/unit/test_command_additive.py index ceac1cf..838b421 100644 --- a/tests/unit/test_command_additive.py +++ b/tests/unit/test_command_additive.py @@ -100,6 +100,19 @@ def _repositories_by_alias(self, variables: src.JSONDict) -> dict[str, object]: return response +def make_run_paths(directory: Path, maps_path: Path) -> backups.RunPaths: + endpoint_directory = directory / "artifacts" / "sourcegraph.example.com" + return backups.RunPaths( + timestamp="2026-06-09-10-00-00", + artifacts_dir=directory / "artifacts", + endpoint_directory=endpoint_directory, + maps_path=maps_path, + code_hosts_path=endpoint_directory / "code-hosts.yaml", + auth_providers_path=endpoint_directory / "auth-providers.yaml", + run_directory=directory / "run-artifacts", + ) + + class AdditiveCommandTests(unittest.TestCase): def test_no_backup_dry_run_skips_artifacts_and_repo_load_when_no_rule_matches( self, @@ -127,22 +140,21 @@ def test_no_backup_dry_run_skips_artifacts_and_repo_load_when_no_rule_matches( """.lstrip(), encoding="utf-8", ) - run_directory = directory / "run-artifacts" - - with backups.run_artifacts_context(run_directory, "2026-06-09-10-00-00"): - command.cmd_set_additive_users( - cast(src.SourcegraphClient, client), - maps_path, - ("marc",), - None, - dry_run=True, - parallelism=1, - bind_id_mode="USERNAME", - saml_groups_attribute_name_by_config_id={}, - do_backup=False, - ) - - self.assertFalse(run_directory.exists()) + run_paths = make_run_paths(directory, maps_path) + + command.cmd_set_additive_users( + cast(src.SourcegraphClient, client), + run_paths, + ("marc",), + None, + dry_run=True, + parallelism=1, + bind_id_mode="USERNAME", + saml_groups_attribute_name_by_config_id={}, + do_backup=False, + ) + + self.assertFalse(run_paths.run_directory.exists()) self.assertEqual([], client.repo_service_ids) self.assertEqual(0, client.explicit_repo_fetch_count) @@ -159,7 +171,8 @@ def test_additive_users_loads_only_referenced_code_hosts(self) -> None: ) with tempfile.TemporaryDirectory() as directory_name: - maps_path = Path(directory_name) / "maps.yaml" + directory = Path(directory_name) + maps_path = directory / "maps.yaml" maps_path.write_text( """ maps: @@ -176,7 +189,7 @@ def test_additive_users_loads_only_referenced_code_hosts(self) -> None: command.cmd_set_additive_users( cast(src.SourcegraphClient, client), - maps_path, + make_run_paths(directory, maps_path), ("alice",), None, dry_run=True, @@ -213,21 +226,22 @@ def test_backup_dry_run_reuses_planning_explicit_repo_read_for_snapshot(self) -> """.lstrip(), encoding="utf-8", ) - run_directory = directory / "run-artifacts" - - with backups.run_artifacts_context(run_directory, "2026-06-09-10-00-00"): - command.cmd_set_additive_users( - cast(src.SourcegraphClient, client), - maps_path, - ("alice",), - None, - dry_run=True, - parallelism=1, - bind_id_mode="USERNAME", - saml_groups_attribute_name_by_config_id={}, - do_backup=True, - ) + run_paths = make_run_paths(directory, maps_path) + run_paths.run_directory.mkdir(parents=True) + + command.cmd_set_additive_users( + cast(src.SourcegraphClient, client), + run_paths, + ("alice",), + None, + dry_run=True, + parallelism=1, + bind_id_mode="USERNAME", + saml_groups_attribute_name_by_config_id={}, + do_backup=True, + ) + run_directory = run_paths.run_directory self.assertTrue((run_directory / "before.json").exists()) self.assertTrue((run_directory / "after.json").exists()) self.assertTrue((run_directory / "diff.json").exists()) diff --git a/tests/unit/test_command_files.py b/tests/unit/test_command_files.py new file mode 100644 index 0000000..44e278c --- /dev/null +++ b/tests/unit/test_command_files.py @@ -0,0 +1,241 @@ +"""File-writing behavior of command workflows under write_files True/False. + +These guard the --no-files contract: with write_files=False a run must leave +no artifacts on disk while still returning discovery data in memory, and with +write_files=True the on-disk YAML must match the in-memory views exactly. +""" + +from __future__ import annotations + +import json +import tempfile +import unittest +from pathlib import Path +from types import SimpleNamespace +from typing import cast +from unittest import mock + +import src_py_lib as src +import yaml + +from src_auth_perms_sync.permissions import command as permissions_command +from src_auth_perms_sync.permissions import snapshot as permission_snapshot +from src_auth_perms_sync.permissions import types as permission_types +from src_auth_perms_sync.permissions import workflow as permission_workflow +from src_auth_perms_sync.shared import backups +from src_auth_perms_sync.shared import types as shared_types + + +def make_run_paths(directory: Path, *, write_files: bool) -> backups.RunPaths: + endpoint_directory = directory / "artifacts" / "sourcegraph.example.com" + return backups.RunPaths( + timestamp="2026-06-12-00-00-00", + artifacts_dir=directory / "artifacts", + endpoint_directory=endpoint_directory, + maps_path=directory / "maps.yaml", + code_hosts_path=endpoint_directory / "code-hosts.yaml", + auth_providers_path=endpoint_directory / "auth-providers.yaml", + run_directory=endpoint_directory / "runs" / "2026-06-12-00-00-00-get", + write_files=write_files, + ) + + +def make_auth_provider() -> shared_types.AuthProvider: + return { + "serviceType": "github", + "serviceID": "https://github.example.com", + "clientID": "client-1", + "displayName": "GitHub Enterprise SSO", + "isBuiltin": False, + "configID": "github-sso", + } + + +def make_external_service() -> permission_types.ExternalService: + return { + "id": "RXh0ZXJuYWxTZXJ2aWNlOjE=", + "kind": "GITHUB", + "displayName": "GitHub Enterprise", + "url": "https://github.example.com", + "repoCount": 1, + "createdAt": "2026-06-09T00:00:00Z", + "updatedAt": "2026-06-09T00:00:00Z", + "lastSyncAt": None, + "nextSyncAt": None, + "lastSyncError": None, + "warning": None, + "unrestricted": False, + "suspended": False, + "hasConnectionCheck": False, + "supportsRepoExclusion": False, + "creator": None, + "lastUpdater": None, + "config": "{}", + } + + +def make_snapshot() -> permission_snapshot.Snapshot: + return { + "schema_version": permission_snapshot.SNAPSHOT_SCHEMA_VERSION, + "captured_at": "2026-06-12T00:00:00+00:00", + "endpoint": "https://sourcegraph.example.com", + "bindID_mode": "USERNAME", + "config_file": None, + "config_sha256": None, + "pending_users": {}, + "stats": { + "total_users_scanned": 1, + "users_with_explicit_grants": 1, + "repos_with_explicit_grants": 1, + "total_grants": 1, + }, + "repos": { + src.encode_repository_id(1): { + "name": "github.com/sourcegraph/example", + "users": ["alice"], + } + }, + } + + +def run_cmd_get( + run_paths: backups.RunPaths, + *, + do_backup: bool, +) -> permissions_command.run_context.CommandData: + client = cast( + src.SourcegraphClient, + SimpleNamespace(endpoint="https://sourcegraph.example.com"), + ) + with ( + mock.patch.object( + permissions_command, + "load_discovery", + return_value=([make_auth_provider()], [make_external_service()], {}), + ), + mock.patch.object(permissions_command, "_load_get_users", return_value=[]), + mock.patch.object( + permissions_command.permission_snapshot, + "build_snapshot", + return_value=make_snapshot(), + ), + ): + return permissions_command.cmd_get( + client, + run_paths, + user_identifiers=(), + users_without_explicit_perms=False, + user_created_after=None, + repository_names=(), + repositories_without_explicit_perms=False, + repository_created_after=None, + parallelism=1, + explicit_permissions_batch_size=25, + bind_id_mode="USERNAME", + saml_groups_attribute_name_by_config_id={}, + auth_providers_by_config_id={}, + do_backup=do_backup, + ) + + +class CmdGetFileBehaviorTests(unittest.TestCase): + def test_no_files_run_creates_nothing_but_returns_views(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + maps_path = directory / "maps.yaml" + maps_path.write_text("maps: []\n") + run_paths = make_run_paths(directory, write_files=False) + + command_data = run_cmd_get(run_paths, do_backup=True) + + # Only the pre-existing maps file remains; no YAML, snapshots, + # maps backups, or run directories appeared anywhere. + self.assertEqual({maps_path}, set(directory.rglob("*"))) + + self.assertIsNotNone(command_data.auth_provider_views) + self.assertIsNotNone(command_data.code_host_views) + assert command_data.auth_provider_views is not None + assert command_data.code_host_views is not None + self.assertEqual("github", command_data.auth_provider_views[0]["type"]) + self.assertEqual("GitHub Enterprise", command_data.code_host_views[0]["displayName"]) + + def test_writing_run_dumps_yaml_matching_the_returned_views(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = make_run_paths(directory, write_files=True) + + command_data = run_cmd_get(run_paths, do_backup=False) + + self.assertTrue(run_paths.code_hosts_path.is_file()) + self.assertTrue(run_paths.auth_providers_path.is_file()) + code_hosts_on_disk = yaml.safe_load(run_paths.code_hosts_path.read_text()) + auth_providers_on_disk = yaml.safe_load(run_paths.auth_providers_path.read_text()) + + self.assertEqual(command_data.code_host_views, code_hosts_on_disk["codeHostConnections"]) + self.assertEqual(command_data.auth_provider_views, auth_providers_on_disk["authProviders"]) + + +class WriteMapsBackupTests(unittest.TestCase): + def test_copies_the_input_file_into_the_run_directory_with_its_name(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + maps_path = directory / "team-maps.yaml" + maps_path.write_text("maps: []\n") + run_paths = make_run_paths(directory, write_files=True) + run_paths.run_directory.mkdir(parents=True) + + backup_path = permission_workflow.write_maps_backup(maps_path, run_paths) + + self.assertEqual(run_paths.run_directory / "team-maps.yaml", backup_path) + assert backup_path is not None + self.assertEqual("maps: []\n", backup_path.read_text()) + + def test_returns_none_without_writing_when_the_input_is_missing(self) -> None: + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = make_run_paths(directory, write_files=True) + run_paths.run_directory.mkdir(parents=True) + + backup_path = permission_workflow.write_maps_backup( + directory / "missing-maps.yaml", + run_paths, + ) + + self.assertIsNone(backup_path) + self.assertEqual([], list(run_paths.run_directory.iterdir())) + + +class WriteSnapshotPairTests(unittest.TestCase): + def test_writes_before_after_and_diff_at_artifact_paths(self) -> None: + before = make_snapshot() + after = make_snapshot() + after["repos"][src.encode_repository_id(1)] = { + "name": "github.com/sourcegraph/example", + "users": ["alice", "bob"], + } + + with tempfile.TemporaryDirectory() as directory_name: + directory = Path(directory_name) + run_paths = make_run_paths(directory, write_files=True) + run_paths.run_directory.mkdir(parents=True) + + before_path, after_path, diff_path = permission_workflow.write_snapshot_pair( + run_paths, + before, + after, + ) + + self.assertEqual(run_paths.artifact_path("before"), before_path) + self.assertEqual(run_paths.artifact_path("after"), after_path) + self.assertEqual(run_paths.artifact_path("diff"), diff_path) + before_on_disk = json.loads(before_path.read_text()) + after_on_disk = json.loads(after_path.read_text()) + diff_on_disk = json.loads(diff_path.read_text()) + + self.assertEqual(["alice"], before_on_disk["repos"]["1"]["users"]) + self.assertEqual(["alice", "bob"], after_on_disk["repos"]["1"]["users"]) + self.assertEqual(1, diff_on_disk["summary"]["grants_added"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_module_api.py b/tests/unit/test_module_api.py new file mode 100644 index 0000000..7b380ff --- /dev/null +++ b/tests/unit/test_module_api.py @@ -0,0 +1,119 @@ +"""Module-mode (importable API) guarantees for host applications. + +Customers embed `src_auth_perms_sync.Get/Set/...` inside their own services: +failures must surface as falsy result objects (never exceptions or process +exits), the host's stdlib logging configuration must stay untouched, and +structured events must reach a caller-supplied sink. +""" + +from __future__ import annotations + +import contextlib +import io +import logging +import os +import tempfile +import unittest +from pathlib import Path +from typing import Any, cast + +import src_py_lib as src + +import src_auth_perms_sync +from src_auth_perms_sync import cli + + +def make_config(**updates: object) -> cli.Config: + base_config = cli.Config( + src_endpoint="https://invalid.invalid", + src_access_token="dummy", + max_attempts=1, + http_timeout_seconds=1.0, + parallelism=1, + sample_interval=0.0, + ) + return base_config.model_copy(update=updates) + + +class ModuleApiTests(unittest.TestCase): + """Every test runs in a throwaway cwd: module runs default their + artifacts directory to `/src-auth-perms-sync-runs`.""" + + def setUp(self) -> None: + self._temporary_directory = tempfile.TemporaryDirectory() + self.working_directory = Path(self._temporary_directory.name) + self._previous_working_directory = Path.cwd() + os.chdir(self.working_directory) + # Silence the lastResort stderr printout for the expected + # "run failed" log without touching the root logger. + self._null_handler = logging.NullHandler() + logging.getLogger("src_auth_perms_sync").addHandler(self._null_handler) + + def tearDown(self) -> None: + logging.getLogger("src_auth_perms_sync").removeHandler(self._null_handler) + os.chdir(self._previous_working_directory) + self._temporary_directory.cleanup() + + def test_get_with_unreachable_endpoint_returns_falsy_without_raising(self) -> None: + root_logger = logging.getLogger() + handlers_before = list(root_logger.handlers) + level_before = root_logger.level + + result = src_auth_perms_sync.Get(make_config()) + + self.assertIsInstance(result, cli.GetResult) + self.assertFalse(result) + self.assertFalse(result.succeeded) + self.assertEqual(handlers_before, list(root_logger.handlers)) + self.assertEqual(level_before, root_logger.level) + + def test_get_result_and_command_result_truthiness_mirror_succeeded(self) -> None: + self.assertTrue(bool(cli.GetResult(succeeded=True))) + self.assertFalse(bool(cli.GetResult(succeeded=False))) + self.assertTrue(bool(cli.CommandResult(succeeded=True))) + self.assertFalse(bool(cli.CommandResult(succeeded=False))) + + def test_set_with_no_files_apply_without_no_backup_returns_falsy(self) -> None: + config = make_config(no_files=True, apply=True, no_backup=False, full=True) + + with contextlib.redirect_stderr(io.StringIO()) as captured_stderr: + result = src_auth_perms_sync.Set(config) + + self.assertIsInstance(result, cli.CommandResult) + self.assertFalse(result) + self.assertIn( + "--no-files with --apply also requires --no-backup", + captured_stderr.getvalue(), + ) + # Validation fails before any path resolution, so nothing is created. + self.assertEqual([], list(self.working_directory.iterdir())) + + def test_event_sink_receives_run_start_and_error_end_events(self) -> None: + sink = src.InMemoryEventSink() + config = make_config(no_files=True) + + result = src_auth_perms_sync.Get(config, event_sink=sink) + + self.assertFalse(result) + # --no-files: the failed run still left the host filesystem untouched. + self.assertEqual([], list(self.working_directory.iterdir())) + + run_events = [event for event in sink.events if event.get("event_name") == "run"] + phases = [self.event_attributes(event).get("phase") for event in run_events] + self.assertIn("start", phases) + self.assertIn("end", phases) + run_end_attributes = self.event_attributes( + next( + event for event in run_events if self.event_attributes(event).get("phase") == "end" + ) + ) + self.assertEqual("error", run_end_attributes.get("status")) + + def event_attributes(self, event: dict[str, Any]) -> dict[str, Any]: + attributes = event.get("attributes") + self.assertIsInstance(attributes, dict) + return cast("dict[str, Any]", attributes) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index a3c3b50..d1ce3ae 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -448,17 +448,22 @@ def test_write_projected_snapshot_keeps_after_repos_out_of_memory(self) -> None: expected_users, repo_names, ) - with backups.run_artifacts_context(directory, "test-run"): - diff_path = permission_workflow.write_projected_snapshot_diff_file( - directory / "maps.yaml", - "test-run", - before["endpoint"], - "set-dry-run", - before, - after, - expected_users, - repo_names, - ) + run_paths = backups.RunPaths( + timestamp="test-run", + artifacts_dir=directory, + endpoint_directory=directory, + maps_path=directory / "maps.yaml", + code_hosts_path=directory / "code-hosts.yaml", + auth_providers_path=directory / "auth-providers.yaml", + run_directory=directory, + ) + diff_path = permission_workflow.write_projected_snapshot_diff_file( + run_paths, + before, + after, + expected_users, + repo_names, + ) after_on_disk = json.loads(after_path.read_text()) diff_on_disk = json.loads(diff_path.read_text()) diff --git a/uv.lock b/uv.lock index 6a749c0..213fb38 100644 --- a/uv.lock +++ b/uv.lock @@ -548,7 +548,7 @@ dev = [ requires-dist = [ { name = "json5", specifier = ">=0.14.0" }, { name = "pyyaml", specifier = ">=6.0.3" }, - { name = "src-py-lib", extras = ["otel"], specifier = "==0.2.1" }, + { name = "src-py-lib", extras = ["otel"], specifier = "==0.3.0" }, ] [package.metadata.requires-dev] @@ -560,7 +560,7 @@ dev = [ [[package]] name = "src-py-lib" -version = "0.2.1" +version = "0.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -568,9 +568,9 @@ dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/31/62/6bfbc05c31522ebabe054175505d833516ecb5a52e46776fabb37629f45e/src_py_lib-0.2.1.tar.gz", hash = "sha256:ad1b4ea92245cc9b144f48dd8a985003309fd0ee028ff2d25e55f73647abdd78", size = 88498, upload-time = "2026-06-09T06:26:57.789Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/5565c6bde9a214d6fc6100cd0a77bcd11facbdbbe5509d6f073a43747dbd/src_py_lib-0.3.0.tar.gz", hash = "sha256:683699e9d25b9c0a515c861e90d3613545d3625dd51ef3bb45f5eee5de29921a", size = 96387, upload-time = "2026-06-12T09:49:14.754Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/b8/ab4433e688deeb321aa992c8098681f2eefe3b705d0f52ebd2186c823596/src_py_lib-0.2.1-py3-none-any.whl", hash = "sha256:d1f736717b4d559974ee61f89c0dff360d33064850d27ec07bfbd41495baa198", size = 49749, upload-time = "2026-06-09T06:26:56.476Z" }, + { url = "https://files.pythonhosted.org/packages/9d/82/60c11be1a31e54bff2653803c94600b181963490eacc8392e64eb8d7bdfc/src_py_lib-0.3.0-py3-none-any.whl", hash = "sha256:0508960fddf49211eecc0e01018f4074fba8fbabe87a0cd0909d645b581bc25e", size = 54388, upload-time = "2026-06-12T09:49:13.617Z" }, ] [package.optional-dependencies]