From 7191f1b54618cbaa09af7e8f96a997cf6e95659b Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Wed, 10 Jun 2026 02:22:39 -0600 Subject: [PATCH 1/9] update tests --- .gitignore | 1 + AGENTS.md | 41 +- dev/hooks/pre-commit | 6 +- dev/memory-efficiency.md | 40 + dev/test-fixture-cases.py | 69 - src/src_auth_perms_sync/cli.py | 4 +- src/src_auth_perms_sync/permissions/apply.py | 34 +- .../permissions/snapshot.py | 83 +- tests/README.md | 82 + tests/__init__.py | 11 + tests/e2e/__init__.py | 10 + ...ission_fixture_cases.py => case_runner.py} | 308 +- .../add-users-preserves-existing/after.json | 41 +- .../add-users-preserves-existing/before.json | 39 +- .../add-users-preserves-existing/case.json | 7 - .../add-users-preserves-existing/maps.yaml | 8 +- .../fixtures/and-filters-intersect/after.json | 103 + .../and-filters-intersect/before.json | 101 + .../fixtures/and-filters-intersect/maps.yaml | 11 + .../after.json | 75 + .../before.json | 75 + .../maps.yaml | 9 + .../fixtures/full-overwrite-unions/after.json | 46 +- .../full-overwrite-unions/before.json | 45 +- .../fixtures/full-overwrite-unions/case.json | 7 - .../fixtures/full-overwrite-unions/maps.yaml | 14 +- .../fixtures/invalid-bad-regex/before.json | 48 + .../e2e/fixtures/invalid-bad-regex/maps.yaml | 8 + .../invalid-missing-repos-section/before.json | 48 + .../invalid-missing-repos-section/maps.yaml | 5 + .../before.json | 46 + .../invalid-set-created-after-date/maps.yaml | 8 + .../before.json | 46 + .../maps.yaml | 8 + .../before.json | 48 + .../invalid-unknown-selector-field/maps.yaml | 8 + tests/e2e/fixtures/no-match-noop/before.json | 48 + tests/e2e/fixtures/no-match-noop/maps.yaml | 8 + .../fixtures/regex-filters-scope/after.json | 97 + .../fixtures/regex-filters-scope/before.json | 91 + .../fixtures/regex-filters-scope/maps.yaml | 8 + .../fixtures/restore-missing-file/before.json | 49 + .../e2e/fixtures/saml-group-filter/after.json | 140 + .../fixtures/saml-group-filter/before.json | 136 + .../e2e/fixtures/saml-group-filter/maps.yaml | 9 + .../set-repos-created-after-noop/before.json | 49 + .../set-repos-created-after-noop/maps.yaml | 8 + .../set-repos-created-after/after.json | 70 + .../set-repos-created-after/before.json | 67 + .../set-repos-created-after/maps.yaml | 10 + .../e2e/fixtures/set-repos-filter/after.json | 72 + .../e2e/fixtures/set-repos-filter/before.json | 71 + tests/e2e/fixtures/set-repos-filter/maps.yaml | 10 + .../after.json | 72 + .../before.json | 69 + .../maps.yaml | 10 + .../set-users-created-after-noop/before.json | 49 + .../set-users-created-after-noop/maps.yaml | 8 + .../set-users-created-after/after.json | 85 + .../set-users-created-after/before.json | 80 + .../set-users-created-after/maps.yaml | 9 + .../maps.yaml | 8 + .../after.json | 72 + .../before.json | 69 + .../maps.yaml | 10 + tests/e2e/test_local_cases.py | 89 + tests/integration/__init__.py | 10 + tests/run.py | 2872 +++++++++++++++++ tests/tests.yaml | 670 ++++ tests/unit/__init__.py | 10 + tests/unit/test_cli_config.py | 2 +- tests/unit/test_snapshot.py | 101 + 72 files changed, 6530 insertions(+), 251 deletions(-) delete mode 100644 dev/test-fixture-cases.py create mode 100644 tests/README.md rename tests/e2e/{test_permission_fixture_cases.py => case_runner.py} (64%) delete mode 100644 tests/e2e/fixtures/add-users-preserves-existing/case.json create mode 100644 tests/e2e/fixtures/and-filters-intersect/after.json create mode 100644 tests/e2e/fixtures/and-filters-intersect/before.json create mode 100644 tests/e2e/fixtures/and-filters-intersect/maps.yaml create mode 100644 tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json create mode 100644 tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json create mode 100644 tests/e2e/fixtures/full-overwrite-removes-stale-grant/maps.yaml delete mode 100644 tests/e2e/fixtures/full-overwrite-unions/case.json create mode 100644 tests/e2e/fixtures/invalid-bad-regex/before.json create mode 100644 tests/e2e/fixtures/invalid-bad-regex/maps.yaml create mode 100644 tests/e2e/fixtures/invalid-missing-repos-section/before.json create mode 100644 tests/e2e/fixtures/invalid-missing-repos-section/maps.yaml create mode 100644 tests/e2e/fixtures/invalid-set-created-after-date/before.json create mode 100644 tests/e2e/fixtures/invalid-set-created-after-date/maps.yaml create mode 100644 tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json create mode 100644 tests/e2e/fixtures/invalid-set-repos-created-after-date/maps.yaml create mode 100644 tests/e2e/fixtures/invalid-unknown-selector-field/before.json create mode 100644 tests/e2e/fixtures/invalid-unknown-selector-field/maps.yaml create mode 100644 tests/e2e/fixtures/no-match-noop/before.json create mode 100644 tests/e2e/fixtures/no-match-noop/maps.yaml create mode 100644 tests/e2e/fixtures/regex-filters-scope/after.json create mode 100644 tests/e2e/fixtures/regex-filters-scope/before.json create mode 100644 tests/e2e/fixtures/regex-filters-scope/maps.yaml create mode 100644 tests/e2e/fixtures/restore-missing-file/before.json create mode 100644 tests/e2e/fixtures/saml-group-filter/after.json create mode 100644 tests/e2e/fixtures/saml-group-filter/before.json create mode 100644 tests/e2e/fixtures/saml-group-filter/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-created-after-noop/before.json create mode 100644 tests/e2e/fixtures/set-repos-created-after-noop/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-created-after/after.json create mode 100644 tests/e2e/fixtures/set-repos-created-after/before.json create mode 100644 tests/e2e/fixtures/set-repos-created-after/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-filter/after.json create mode 100644 tests/e2e/fixtures/set-repos-filter/before.json create mode 100644 tests/e2e/fixtures/set-repos-filter/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-without-explicit-perms/after.json create mode 100644 tests/e2e/fixtures/set-repos-without-explicit-perms/before.json create mode 100644 tests/e2e/fixtures/set-repos-without-explicit-perms/maps.yaml create mode 100644 tests/e2e/fixtures/set-users-created-after-noop/before.json create mode 100644 tests/e2e/fixtures/set-users-created-after-noop/maps.yaml create mode 100644 tests/e2e/fixtures/set-users-created-after/after.json create mode 100644 tests/e2e/fixtures/set-users-created-after/before.json create mode 100644 tests/e2e/fixtures/set-users-created-after/maps.yaml create mode 100644 tests/e2e/fixtures/set-users-sync-saml-orgs-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/set-users-without-explicit-perms/after.json create mode 100644 tests/e2e/fixtures/set-users-without-explicit-perms/before.json create mode 100644 tests/e2e/fixtures/set-users-without-explicit-perms/maps.yaml create mode 100644 tests/e2e/test_local_cases.py create mode 100644 tests/run.py create mode 100644 tests/tests.yaml diff --git a/.gitignore b/.gitignore index 3fe70d5..3518727 100644 --- a/.gitignore +++ b/.gitignore @@ -25,4 +25,5 @@ wheels/ !.env.example !.markdownlint-cli2.yaml !maps-example.yaml +!tests/tests.yaml !tests/e2e/fixtures/**/maps.yaml diff --git a/AGENTS.md b/AGENTS.md index 05440f8..406febb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,21 +26,40 @@ uv run src-auth-perms-sync --help ## Testing -- First run a dry-run (default behaviour, without `--apply` flag) against a Sourcegraph instance +All testing runs through one entrypoint: `tests/run.py`. Output goes to the +console and to a per-run log file under `logs/`. Each level runs only its +own checks. ```bash -uv run src-auth-perms-sync [--get] -uv run src-auth-perms-sync --set maps.yaml --full -uv run src-auth-perms-sync --restore backups///before.json +# Fast, no network (also what the pre-commit hook runs): +# lint, format, pyright, unit + fixture tests, CLI rejection matrix, +# randomized permission invariants +uv run tests/run.py + +# End-to-end runs against the .env test instance with independent GraphQL +# read-back verification, and a wheel install smoke test +uv run tests/run.py --live + +# Run a subset: comma-delimited test names, substring match +uv run tests/run.py --live full-overwrite-unions +uv run tests/run.py --live wheel,baseline + +# Repeated timed runs with Jaeger trace retention, RSS sampling, +# optional kubectl load monitoring, and baseline comparison +uv run tests/run.py --performance --repeat 3 +uv run tests/run.py --performance --baseline-command "uvx src-auth-perms-sync@latest" \ + --fail-on-memory-regression-percent 10 + +# Regenerate fixture goldens after editing tests/e2e/fixtures/ cases +uv run tests/run.py --update-golden ``` -- Read the output, and evaluate the expected changes -- If the expected changes look correct - - Run with the `--apply` flag against the test instance - - Read and evaluate the output for expected changes - - Run with the `--restore` flag against the test instance - - Always inspect the before / after snapshots in - `src-auth-perms-sync-runs//backups/` afterward to confirm the diff matches what you expected +- Fixture cases live in `tests/e2e/fixtures//` — see the README there + for the format. Add cases there to cover new mapping behaviors. +- For manual verification against a real instance, dry-run first (no + `--apply`), read the planned changes, then `--apply` on a scratch instance + and inspect the before/after snapshots under + `src-auth-perms-sync-runs//runs/`. ## Release process diff --git a/dev/hooks/pre-commit b/dev/hooks/pre-commit index 0518138..a577809 100755 --- a/dev/hooks/pre-commit +++ b/dev/hooks/pre-commit @@ -19,10 +19,6 @@ run git diff --cached --check run git diff --cached --stat run git diff --stat -run uv run ruff check . -run uv run ruff format . --check -run uv run pyright -run uv run python -m unittest discover -s tests -run uv run src-auth-perms-sync --help +run uv run tests/run.py --local printf '\nPre-commit quality checks passed.\n' diff --git a/dev/memory-efficiency.md b/dev/memory-efficiency.md index dbee6be..8c38ec9 100644 --- a/dev/memory-efficiency.md +++ b/dev/memory-efficiency.md @@ -226,6 +226,46 @@ slower under the large explicit-perms state. This reinforces that the CLI needs better Sourcegraph bulk read and write APIs for very large explicit permission sets. +## Concurrent-operator evidence (2026-06-10) + +Four `src-auth-perms-sync` processes ran full explicit-permissions captures +concurrently against the 10k-user / 50k-repo test instance (each at +`--parallelism 8`, `--explicit-permissions-batch-size 25`), while a fifth ran +a small `set` command. Instance: single `pgsql-0` on an 8-core node. + +Observed during the concurrent captures: + +- `pgsql-0` CPU (`kubectl top`): 7,636–7,683 millicores of 8,000 (saturated). +- `frontend` / `gitserver` CPU: 124–138m / 2–3m (idle bystanders). +- `pg_stat_activity`: 29 active statements, all + `permsStore.ListUserPermissions`, **zero wait events** — pure CPU, no lock + contention. +- `pg_stat_statements`: `permsStore.ListUserPermissions` at 24,026 calls, + 27,635.6s total, 1,150ms mean. +- Per-client capture throughput: 23 users/sec solo → 2–4 users/sec at 4-way + concurrency. +- Aggregate throughput: 8–16 users/sec at 4-way — **below the 23 users/sec a + single client achieves alone** (negative scaling). +- ALB (CloudWatch): no 5xx, no rejected connections — the edge and frontend + are not the bottleneck. +- Collateral failure: the fifth client's queries exceeded the 60s read + timeout under this load; 5 retry attempts exhausted; its run failed with + exit 1. + +Implications for the engineering request: + +- A single per-user `permissionsInfo.repositories(source: API)` read costs + roughly 0.3–0.4s of Postgres CPU at this state size (1,150ms mean execution + under contention), so one operator at modest parallelism can saturate the + database by itself, and two concurrent operators degrade each other below + single-operator throughput. +- Timeout/retry behavior amplifies the problem: once statements exceed the + client read timeout, retries re-run the same expensive queries, adding load + exactly when the database is saturated. +- A bulk read API (one query returning explicit grants for many users or for + whole repos) would replace ~10,000 × ~1s statements per capture with a + single scan, and would also make concurrent operators viable. + ## Sourcegraph engineering request `src-auth-perms-sync` needs to snapshot explicit API permissions for many diff --git a/dev/test-fixture-cases.py b/dev/test-fixture-cases.py deleted file mode 100644 index 106c6f2..0000000 --- a/dev/test-fixture-cases.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import annotations - -import sys -from pathlib import Path -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from tests.e2e.test_permission_fixture_cases import FixtureRunResult - -ROOT = Path(__file__).resolve().parents[1] -if str(ROOT) not in sys.path: - sys.path.insert(0, str(ROOT)) - - -def _format_delta(before: int, after: int) -> str: - return f"{after - before:+d}" - - -def _format_expected(value: int | None) -> str: - if value is None: - return "n/a" - return str(value) - - -def _print_result(result: FixtureRunResult) -> None: - status = "PASS" if result.passed else "FAIL" - permission_pair_delta = _format_delta( - result.before_counts.permission_pairs, - result.actual_counts.permission_pairs, - ) - print(f"{status} {result.name} — {result.description}") - print(f" scope: users={result.before_counts.users} repos={result.before_counts.repos}") - print( - " permission pairs: " - f"before={result.before_counts.permission_pairs} " - f"expected={result.expected_counts.permission_pairs} " - f"actual={result.actual_counts.permission_pairs} " - f"delta={permission_pair_delta}" - ) - print( - " changed repos: " - f"expected={result.expected_changed_repos} " - f"actual={result.actual_changed_repos}" - ) - print( - " mutations: " - f"expected={_format_expected(result.expected_mutations)} " - f"actual={result.actual_mutations}" - ) - if result.failure is not None: - print(f" failure: {result.failure}") - print() - - -def main() -> int: - from tests.e2e.test_permission_fixture_cases import fixture_case_dirs, run_fixture_case - - results = [run_fixture_case(case_dir) for case_dir in fixture_case_dirs()] - for result in results: - _print_result(result) - - passed = sum(1 for result in results if result.passed) - failed = len(results) - passed - print(f"Summary: {passed} passed, {failed} failed, {len(results)} total.") - return 0 if failed == 0 else 1 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/src/src_auth_perms_sync/cli.py b/src/src_auth_perms_sync/cli.py index 4308303..a59ef4c 100644 --- a/src/src_auth_perms_sync/cli.py +++ b/src/src_auth_perms_sync/cli.py @@ -326,12 +326,12 @@ class Config(src.SourcegraphClientConfig, src.LoggingConfig, src.OpenTelemetryCo help_group="Performance", ) http_timeout_seconds: float = src.config_field( - default=60.0, + default=300.0, env_var="SRC_AUTH_PERMS_SYNC_HTTP_TIMEOUT_SECONDS", cli_flag="--http-timeout-seconds", metavar="SECONDS", gt=0, - help="HTTP read timeout per request in seconds (default: 60)", + help="HTTP read timeout per request in seconds (default: 300)", help_group="Performance", ) sample_interval: float = src.config_field( diff --git a/src/src_auth_perms_sync/permissions/apply.py b/src/src_auth_perms_sync/permissions/apply.py index 2951d61..9992d41 100644 --- a/src/src_auth_perms_sync/permissions/apply.py +++ b/src/src_auth_perms_sync/permissions/apply.py @@ -28,25 +28,28 @@ @dataclass class CircuitBreaker: - """Sliding-window circuit breaker for the apply phase. + """Sliding-window circuit breaker for bulk GraphQL phases. - Tracks the most recent `window_size` mutation outcomes (success or + Tracks the most recent `window_size` request outcomes (success or failure). Once `failure_rate` over that window exceeds `failure_threshold` AND we have at least `min_samples` outcomes recorded, the breaker opens and `is_open()` returns True for the rest of the run (no half-open / reset logic — once we decide the backend is too unhealthy, we stay tripped). - Designed to bail out of a hopeless run (e.g., backend down or - severely rate-limiting) instead of grinding through every remaining - mutation, retrying each request repeatedly, and burning hours of - wall-clock in retries while making things worse for the server. - - Used by the apply helpers: each completed mutation calls - `breaker.record(success=...)`, then `is_open()` is checked between - completions; once open, the remaining queued futures are cancelled - and the loop exits, leaving the operator a clear ERROR log + a - non-zero exit code. + Designed to bail out of a hopeless run (e.g., backend down, severely + rate-limiting, or saturated to the point of read timeouts) instead of + grinding through every remaining request, retrying each one + repeatedly, and burning hours of wall-clock in retries while making + things worse for the server. + + Used by the apply helpers and the snapshot capture functions: each + completed request calls `breaker.record(success=...)`, then + `is_open()` is checked between completions; once open, the remaining + queued futures are cancelled and the loop exits. Apply phases finish + with the after-snapshot + validation and exit 1; capture phases raise + immediately, because a snapshot with silently-missing grants must + never be written (it could later drive an incorrect restore). """ window_size: int = 50 @@ -86,10 +89,9 @@ def record(self, success: bool) -> None: ) log.error( "Circuit breaker OPEN: %d/%d (%.0f%%) of last %d " - "mutations failed; halting apply to avoid hammering " - "a struggling instance. Remaining work will be " - "cancelled; the run will continue with the after-" - "snapshot+validation, then exit 1.", + "requests failed; halting this phase to avoid " + "hammering a struggling instance. Remaining work " + "will be cancelled and the run will fail.", failures, len(self._outcomes), 100 * rate, diff --git a/src/src_auth_perms_sync/permissions/snapshot.py b/src/src_auth_perms_sync/permissions/snapshot.py index 3b22b09..a22f5c6 100644 --- a/src/src_auth_perms_sync/permissions/snapshot.py +++ b/src/src_auth_perms_sync/permissions/snapshot.py @@ -8,7 +8,7 @@ import logging import time from collections.abc import Callable, Iterable, Sequence -from concurrent.futures import ThreadPoolExecutor +from concurrent.futures import CancelledError, ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path from typing import Any, Literal, TextIO, TypeAlias, TypedDict, cast @@ -17,12 +17,31 @@ from ..shared import run_context from ..shared import types as shared_types +from . import apply as permissions_apply from . import sourcegraph as permissions_sourcegraph from . import types as permission_types log = logging.getLogger(__name__) +def _raise_if_capture_circuit_open(breaker: permissions_apply.CircuitBreaker) -> None: + """Abort a capture whose circuit breaker has opened. + + Unlike the apply phases (which finish with an after-snapshot and exit 1), + a capture must raise: failed lookups are otherwise recorded as "no + grants", and a snapshot with silently-missing grants could later drive + an incorrect restore. + """ + if not breaker.is_open(): + return + raise RuntimeError( + "Permissions capture aborted: circuit breaker opened after " + f"{breaker.total_failures} failed grant lookup(s) " + f"({breaker.total_successes} succeeded). Refusing to build a " + "snapshot with missing grants. Re-run once the instance is healthy." + ) + + class RepoSnapshot(TypedDict): name: str users: list[str] @@ -196,6 +215,7 @@ def capture_explicit_grants( # Invert directly as each per-user fetch completes. Store only repo IDs # first, then hydrate each unique repo name once after all users complete. usernames_by_repository_id: dict[str, list[str]] = {} + breaker = permissions_apply.CircuitBreaker() def _fetch( batch_users: list[SnapshotUserInput], @@ -219,7 +239,9 @@ def _fetch( batch_size=explicit_permissions_batch_size, ) failures = 0 + breaker.record(success=True) except Exception as exception: + breaker.record(success=False) log.warning( "Failed to batch-fetch explicit grants for %d user(s): %s. " "Falling back to one query per user.", @@ -244,6 +266,11 @@ def _fetch_one_user_at_a_time( repository_ids_by_user_id: dict[str, list[str]] = {} failures = 0 for user in batch_users: + if breaker.is_open(): + # The whole capture is about to be aborted; don't grind + # through the rest of the batch (each lookup can burn + # minutes in retries against a saturated instance). + break try: repository_ids_by_user_id[user["id"]] = ( permissions_sourcegraph.list_user_explicit_repo_ids( @@ -251,8 +278,10 @@ def _fetch_one_user_at_a_time( user["id"], ) ) + breaker.record(success=True) except Exception as exception: failures += 1 + breaker.record(success=False) log.warning( "Failed to fetch explicit grants for user=%s: %s", user["username"], @@ -332,11 +361,16 @@ def _record_completed_batch( ) -> None: nonlocal capture_failures, completed, scanned_user_count submitted_batch = result.item + if isinstance(result.exception, CancelledError): + # Cancelled by the circuit breaker opening; the capture is + # about to be aborted, so don't count these as scanned. + return completed += len(submitted_batch) scanned_user_count += len(submitted_batch) if result.exception is not None: # Don't blow up the whole capture; warn so the operator can # see the users whose grants were treated as empty. + breaker.record(success=False) capture_failures += len(submitted_batch) log.warning( "Failed to fetch explicit grants for %d user(s): %s", @@ -371,12 +405,14 @@ def _record_completed_batch( parallelism=parallelism, worker_pool=worker_pool, handle_result=_record_completed_batch, + should_stop=breaker.is_open, max_pending=max_pending_batches, ) _log_progress(force=True) capture_event["scanned_user_count"] = scanned_user_count if capture_failures: capture_event["user_permission_lookup_failures"] = capture_failures + _raise_if_capture_circuit_open(breaker) # Stable sort: users alphabetical within each repo. for usernames in usernames_by_repository_id.values(): @@ -528,8 +564,9 @@ def capture_user_scoped_explicit_grants( ) -> dict[str, UserScopedUserSnapshot]: """Capture explicit API grants for only the supplied users.""" scoped_users: dict[str, UserScopedUserSnapshot] = {} + breaker = permissions_apply.CircuitBreaker() - def _fetch(user: SnapshotUser) -> tuple[SnapshotUser, list[permission_types.Repository]]: + def _fetch(user: SnapshotUser) -> list[permission_types.Repository]: with src.span( "user_scoped_explicit_repos_fetch", level="DEBUG", @@ -538,36 +575,44 @@ def _fetch(user: SnapshotUser) -> tuple[SnapshotUser, list[permission_types.Repo ) as fetch_event: repos = permissions_sourcegraph.list_user_explicit_repos(client, user["id"]) fetch_event["repo_count"] = len(repos) - return user, repos - - def _fetch_or_empty( - user: SnapshotUser, - ) -> tuple[SnapshotUser, list[permission_types.Repository]]: - try: - return _fetch(user) - except Exception as exception: + return repos + + def _record_result( + result: run_context.ParallelResult[SnapshotUser, list[permission_types.Repository]], + ) -> None: + user = result.item + if isinstance(result.exception, CancelledError): + return + if result.exception is not None: + breaker.record(success=False) log.warning( "Failed to fetch scoped explicit grants for user=%s: %s", user["username"], - exception, + result.exception, ) - return user, [] + scoped_users[user["username"]] = {"id": user["id"], "repos": []} + return + breaker.record(success=True) + repos = result.value if result.value is not None else [] + scoped_users[user["username"]] = { + "id": user["id"], + "repos": sorted(repos, key=lambda repo: repo["name"]), + } with src.span("capture_user_scoped_explicit_grants") as capture_event: - for fetched_user, repos in run_context.parallel_map( - _fetch_or_empty, + run_context.parallel_process( + _fetch, users, parallelism=parallelism, worker_pool=worker_pool, - ): - scoped_users[fetched_user["username"]] = { - "id": fetched_user["id"], - "repos": sorted(repos, key=lambda repo: repo["name"]), - } + handle_result=_record_result, + should_stop=breaker.is_open, + ) capture_event["scanned_user_count"] = len(scoped_users) capture_event["total_grants"] = sum( len(user_snapshot["repos"]) for user_snapshot in scoped_users.values() ) + _raise_if_capture_circuit_open(breaker) return dict(sorted(scoped_users.items())) diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..60f2a52 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,82 @@ +# Testing + +All testing is driven by one entrypoint and one case registry: + +| Path | Purpose | +| ---- | ------- | +| [run.py](./run.py) | The single test entrypoint: `uv run tests/run.py [--local \| --live \| --performance]` | +| [tests.yaml](./tests.yaml) | The case registry: what each case runs, where, and what it must produce (see its header comment for the full schema) | +| [e2e/fixtures/](./e2e/fixtures/) | Per-case state files, in a directory matching the case name | +| [e2e/case_runner.py](./e2e/case_runner.py) | The case execution engine: registry loader, in-memory Sourcegraph instance (`FakeSourcegraphClient`), full-command runs for state cases, in-process parser replays for replay cases | +| [e2e/test_local_cases.py](./e2e/test_local_cases.py) | `unittest` entrypoint: runs every local-mode registry case and validates ALL registry entries (including live/performance ones) | +| [unit/](./unit/), [integration/](./integration/) | Plain `unittest` suites, run by the local tier's gate | + +## How the pieces fit + +```text +tests.yaml ──registry──▶ e2e/case_runner.py ◀──imports── run.py + ▲ (--local/--live/--performance) + │ + e2e/test_local_cases.py + (unittest discovery: local cases + registry validation) +``` + +- `case_runner.py` is a library, not a test module: it executes registry + cases without any network. Both consumers above import it. +- `test_local_cases.py` exists so plain `uv run python -m unittest discover + -s tests` asserts every local case with no orchestrator — which is exactly + what run.py's "unit + fixture tests" gate, the release checklist, and CI + run. +- Live and performance execution (instance prerequisites, seed → apply → + verify → restore, traces, sampling) lives only in `run.py`. + +## Files in a fixture case directory + +A directory is only needed when the case uses files — a read-only non-set +command can be registered in tests.yaml with no directory at all. + +- `before.json`: Full instance state before the run: providers, services, users, + repos with `explicitPermissionsUsers`. Required for local mode and for + mutating (`--apply`) live/performance runs. +- `maps.yaml`: The mapping rules under test (same format as the real + `maps.yaml`). Required for `set` commands that do not pass their own + `--maps-path`. +- `after.json`: Expected full instance state after the run (golden file). Omit + it for cases where state must NOT change (no-op and expected-error cases). + +Live-capable cases must use REAL test-instance users/repos in their fixture +files (e.g. `test_user_09991`, `test-repo-49981`), and exact selectors only +(`usernames:`/`emails:` for users, `names:` for repos). + +## What each mode does with a case + +- **local** — runs the case's `cliCommand` through the real argument parser + (and `importConfig` through the Python import API, when present) against an + in-memory instance built from `before.json`, then asserts the full + resulting state against `after.json`. Replay-style cases + (`expectedExitCode`/`expectedOutput`) assert parser behavior instead and + need no files. +- **live** — runs `cliCommand` against the `.env` test instance. Read-only + commands assert exit code and output. Mutating `set --apply` commands run + the full cycle: seed the `before.json` state onto the involved repos, run, + verify the result with an independent GraphQL read-back, then restore the + original state. Cases may declare `live.involvedRepos` (extra repos to + capture/seed/restore; the ones absent from `after.json` are canaries that + must come back unchanged — this is how widened regex selectors get caught) + and `live.usersWithoutOtherGrants` (preflight: named users must hold no + grants outside the involved repos). +- **performance** — same as live, but timed and measured (traces, RSS + sampling, TSV row). + +## Workflow for adding or editing a case + +1. Register the case in [tests.yaml](./tests.yaml); create the fixture + directory with any required files (`before.json`, `maps.yaml`). +2. Either write `after.json` by hand (strongest: states your intent), or run + `uv run tests/run.py --update-golden` to generate it from the actual + result. +3. **Review `after.json` carefully** — it is the assertion. Confirm every + added/removed grant is what you intended before committing. +4. Run `uv run tests/run.py` to confirm the suite passes. The unit tests + fail on unregistered fixture directories, missing required files, or + malformed registry entries. diff --git a/tests/__init__.py b/tests/__init__.py index c25293f..4a06fca 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1,12 @@ """Tests for src-auth-perms-sync.""" + +import logging + +# Unit tests deliberately exercise failure paths, which emit operator-facing +# WARNING/ERROR logs such as "FAIL ...". Without any configured +# handler, logging.lastResort prints those to stderr, where they masquerade +# as real test failures in test-runner output. A NullHandler keeps expected +# log noise out of test output; a log line saying FAIL should only ever mean +# a test actually failed. Tests that care about log output should assert it +# explicitly with assertLogs. +logging.getLogger().addHandler(logging.NullHandler()) diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py index 4a166fb..5dfe559 100644 --- a/tests/e2e/__init__.py +++ b/tests/e2e/__init__.py @@ -1 +1,11 @@ """Fixture-backed end-to-end tests.""" + +import logging + +# Tests exercise failure paths that emit operator-facing WARNING/ERROR logs +# (e.g. "FAIL ..."). Without a handler, logging.lastResort prints them +# to stderr, where they masquerade as real failures in test-runner output. A +# log line saying FAIL must only ever mean a test actually failed. Installed +# here (not only in tests/__init__.py) because unittest discovery imports +# these subpackages as top-level packages, skipping the parent package. +logging.getLogger().addHandler(logging.NullHandler()) diff --git a/tests/e2e/test_permission_fixture_cases.py b/tests/e2e/case_runner.py similarity index 64% rename from tests/e2e/test_permission_fixture_cases.py rename to tests/e2e/case_runner.py index 8742b8f..7315f23 100644 --- a/tests/e2e/test_permission_fixture_cases.py +++ b/tests/e2e/case_runner.py @@ -1,7 +1,21 @@ +"""Execution engine for the tests/tests.yaml case registry. + +Loads the registry, builds in-memory Sourcegraph instances from +fixture state files (FakeSourcegraphClient), and runs cases through +the real CLI code paths: full command runs for state cases, and +in-process argument-parser replays for replay-style cases. + +Consumed by tests/run.py (local checks and randomized invariants) and +by tests/e2e/test_local_cases.py (unittest discovery entrypoint). +""" + from __future__ import annotations +import contextlib +import io import json -import unittest +import shlex +import sys from collections.abc import Iterator, Mapping, Sequence from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass @@ -9,11 +23,14 @@ from typing import Any, NotRequired, TypedDict, cast import src_py_lib as src +import yaml from src_auth_perms_sync import cli from src_auth_perms_sync.shared import types as shared_types FIXTURES_DIR = Path(__file__).with_name("fixtures") +E2E_TESTS_PATH = Path(__file__).resolve().parents[1] / "tests.yaml" +DEFAULT_CASE_MODES = ["local"] SITE_CONFIG = json.dumps( { "permissions.userMapping": {"enabled": True, "bindID": "username"}, @@ -57,6 +74,7 @@ class FixtureRepo(TypedDict): name: str externalServiceID: int explicitPermissionsUsers: list[str] + createdAt: NotRequired[str] # default: 2026-01-01T00:00:00Z class FixtureState(TypedDict): @@ -68,17 +86,22 @@ class FixtureState(TypedDict): pendingBindIDs: list[str] -class FixtureSetOptions(TypedDict, total=False): - full: bool - users: list[str] - usersWithoutExplicitPerms: bool - createdAfter: str - - class FixtureCase(TypedDict): + """One entry under `cases:` in tests.yaml. See that file for docs.""" + description: str - set: FixtureSetOptions + modes: NotRequired[list[str]] # local, live, performance (default: [local]) + cliCommand: NotRequired[str] # CLI arguments; --maps-path is appended for set + importConfig: NotRequired[dict[str, Any]] # Python-import-mode Config fields expectedMutations: NotRequired[int] + # When set, the command must fail, every listed substring must appear in + # the failure text, and the instance state must be left unchanged. + expectedErrors: NotRequired[list[str]] + # Either of these makes the case replay-style: assert exit code and + # output substrings instead of instance state. Locally, replay cases run + # the real argument parser in-process and need no fixture files. + expectedExitCode: NotRequired[int] + expectedOutput: NotRequired[list[str]] @dataclass(frozen=True, slots=True) @@ -102,9 +125,13 @@ class FixtureRunResult: expected_state: FixtureState actual_state: FixtureState command_failure: str | None = None + expected_errors: tuple[str, ...] = () + runner: str = "cli" # "cli" (parsed argv) or "import" (programmatic Config) @property def failure(self) -> str | None: + if self.expected_errors: + return self._expected_error_failure() if self.command_failure is not None: return self.command_failure if self.expected_state != self.actual_state: @@ -113,6 +140,21 @@ def failure(self) -> str | None: return f"expected {self.expected_mutations} mutation(s), got {self.actual_mutations}" return None + def _expected_error_failure(self) -> str | None: + if self.command_failure is None: + return "expected the command to fail validation, but it succeeded" + missing = [ + expected for expected in self.expected_errors if expected not in self.command_failure + ] + if missing: + return ( + f"command failure did not contain expected error(s) {missing}; " + f"got: {self.command_failure}" + ) + if self.expected_state != self.actual_state: + return "state changed during a run that was expected to fail validation" + return None + @property def passed(self) -> bool: return self.failure is None @@ -182,6 +224,15 @@ def graphql( return {"node": self._graphql_user_by_id(variable_values["id"])} if "query SiteUsers" in query: return {"site": {"users": self._site_users(variable_values)}} + if "query UserExplicitRepoExistsBatch" in query: + batch_data: dict[str, Any] = {} + index = 0 + while f"user{index}" in variable_values: + batch_data[f"user{index}"] = self._user_explicit_repo_exists( + variable_values[f"user{index}"] + ) + index += 1 + return batch_data if "query UserExplicitRepoExists" in query: return {"node": self._user_explicit_repo_exists(variable_values["id"])} if "query UserExplicitReposBatch" in query: @@ -221,7 +272,9 @@ def stream_connection_nodes( if path == ("externalServices",): return iter(self._graphql_external_services()) if path == ("repositories",): - return iter(self._repositories_for_external_service(variable_values["esID"])) + if "esID" in variable_values: + return iter(self._repositories_for_external_service(variable_values["esID"])) + return iter(self._repository_candidates(variable_values)) if path == ("node", "permissionsInfo", "repositories"): return iter(self._explicit_repository_nodes_for_user(variable_values["id"])) raise AssertionError(f"Unhandled fixture connection path: {path}") @@ -336,6 +389,41 @@ def _explicit_repository_nodes_for_user(self, user_id_value: object) -> list[dic if username in self._permissions_by_repository_id[repository["id"]] ] + def _repository_candidates(self, variables: dict[str, object]) -> list[dict[str, Any]]: + """Serve the repository-candidate queries (by names, all, by created-at). + + The created-at variant orders newest-first server-side and is filtered + client-side by the CLI, so no date filtering happens here. + """ + repositories = self._repos + names_value = variables.get("names") + if isinstance(names_value, list): + wanted_names = set(cast("list[str]", names_value)) + repositories = [ + repository for repository in repositories if repository["name"] in wanted_names + ] + else: + # The created-at candidate query returns newest first, and the CLI + # stops streaming at the first repo older than the threshold. + repositories = sorted( + repositories, + key=lambda repository: repository.get("createdAt", "2026-01-01T00:00:00Z"), + reverse=True, + ) + return [ + { + "id": self._repository_graphql_id(repository["id"]), + "name": repository["name"], + "createdAt": repository.get("createdAt", "2026-01-01T00:00:00Z"), + "externalServices": { + "nodes": [ + {"id": self._external_service_graphql_id(repository["externalServiceID"])} + ] + }, + } + for repository in repositories + ] + def _site_users(self, variables: dict[str, object]) -> dict[str, Any]: created_at_filter = variables.get("createdAt") created_after: str | None = None @@ -446,23 +534,162 @@ def _external_service_graphql_id(self, external_service_id: int) -> str: return src.encode_sourcegraph_node_id("ExternalService", external_service_id) -def fixture_case_dirs() -> list[Path]: - return sorted(path for path in FIXTURES_DIR.iterdir() if path.is_dir()) +def load_e2e_cases() -> dict[str, FixtureCase]: + """Load the case registry from tests.yaml, keyed by fixture dir name.""" + raw = cast("dict[str, Any]", yaml.safe_load(E2E_TESTS_PATH.read_text(encoding="utf-8"))) + return cast("dict[str, FixtureCase]", raw["cases"]) + + +def case_modes(case: FixtureCase) -> list[str]: + return case.get("modes", DEFAULT_CASE_MODES) + + +def case_runners(case: FixtureCase) -> list[str]: + """Return how a case runs in local mode: parsed argv and/or import API.""" + runners: list[str] = [] + if "cliCommand" in case: + runners.append("cli") + if "importConfig" in case: + runners.append("import") + return runners + + +def case_cli_arguments(case: FixtureCase, case_name: str) -> list[str]: + """Return cliCommand as argv, appending the case's maps file for set commands.""" + cli_command = case.get("cliCommand") + if cli_command is None: + raise ValueError(f"case {case_name!r} has no cliCommand") + argv = shlex.split(cli_command) + if argv and argv[0] == "set" and "--maps-path" not in argv: + argv += ["--maps-path", str(FIXTURES_DIR / case_name / "maps.yaml")] + return argv + + +def is_replay_case(case: FixtureCase) -> bool: + """Replay-style cases assert exit code and output rather than state.""" + return "expectedExitCode" in case or "expectedOutput" in case + +def expected_exit_code(case: FixtureCase) -> int: + return case.get("expectedExitCode", 1 if case.get("expectedErrors") else 0) -def run_fixture_case(case_dir: Path) -> FixtureRunResult: - case = load_case(case_dir / "case.json") + +def run_local_replay_case(case_name: str) -> str: + """Run one replay case through the real argument parser in-process. + + Covers parse-level behavior: argument rejection (exit 2), --help (exit 0), + and config validation errors. Returns a failure detail, or "" on success. + """ + case = load_e2e_cases()[case_name] + argv = case_cli_arguments(case, case_name) + # A bare invocation (empty cliCommand) must stay bare: appending + # credential flags would change the parse error under test. + if argv and "--help" not in argv and "-h" not in argv: + argv += [ + "--src-endpoint", + "https://fixture.sourcegraph.test", + "--src-access-token", + "fixture-token", + ] + output_buffer = io.StringIO() + exit_code = 0 + # argparse derives the usage `prog` from sys.argv[0]; pin it to the real + # entrypoint name so replay output matches what operators see. + original_argv0 = sys.argv[0] + sys.argv[0] = "src-auth-perms-sync" + try: + with contextlib.redirect_stdout(output_buffer), contextlib.redirect_stderr(output_buffer): + try: + cli.load_cli(argv) + except SystemExit as exception: + exit_code = exception.code if isinstance(exception.code, int) else 1 + finally: + sys.argv[0] = original_argv0 + output = output_buffer.getvalue() + expected_exit = expected_exit_code(case) + if exit_code != expected_exit: + return f"expected exit {expected_exit}, got {exit_code}; output: {output[-300:]!r}" + for substring in [*case.get("expectedOutput", []), *case.get("expectedErrors", [])]: + if substring not in output: + return f"output did not contain {substring!r}; output: {output[-300:]!r}" + return "" + + +def required_case_files(case: FixtureCase) -> set[str]: + """Return which files a case's fixture directory must contain. + + The directory itself is optional: a read-only non-set command needs no + files at all. before.json is needed wherever instance state is built + (local mode, and mutating live/performance runs); maps.yaml is needed by + set commands that do not pass their own --maps-path / maps_path. + Replay-style cases never get past argument parsing locally, so they need + no files. + """ + files: set[str] = set() + if is_replay_case(case): + return files + modes = case_modes(case) + argv = shlex.split(case["cliCommand"]) if "cliCommand" in case else [] + import_config = case.get("importConfig") + if "local" in modes: + files.add("before.json") + if ({"live", "performance"} & set(modes)) and "--apply" in argv: + files.add("before.json") + if argv[:1] == ["set"] and "--maps-path" not in argv: + files.add("maps.yaml") + if ( + import_config is not None + and import_config.get("command") == "set" + and "maps_path" not in import_config + ): + files.add("maps.yaml") + return files + + +def cli_input_for_case( + case: FixtureCase, case_name: str, endpoint: str, runner: str +) -> cli.CliInput: + """Build the parsed command for one case, via argv or the import API.""" + if runner == "cli": + argv = case_cli_arguments(case, case_name) + argv += ["--src-endpoint", endpoint, "--src-access-token", "fixture-token"] + return cli.load_cli(argv) + import_config = case.get("importConfig") + if import_config is None: + raise ValueError(f"case {case_name!r} has no importConfig") + options = dict(import_config) + command_name = cast(cli.CommandName, options.pop("command")) + updates: dict[str, object] = { + name: tuple(cast("list[object]", value)) if isinstance(value, list) else value + for name, value in options.items() + } + if command_name == "set" and "maps_path" not in updates: + updates["maps_path"] = FIXTURES_DIR / case_name / "maps.yaml" + config = cli.Config( + src_endpoint=endpoint, + src_access_token="fixture-token", + ).model_copy(update=updates) + return cli.CliInput(command_name=command_name, config=config) + + +def run_fixture_case(case_name: str, runner: str = "cli") -> FixtureRunResult: + case = load_e2e_cases()[case_name] + case_dir = FIXTURES_DIR / case_name before_state = load_state(case_dir / "before.json") - expected_state = FakeSourcegraphClient(load_state(case_dir / "after.json")).export_state() + # after.json is optional: cases that must not change anything (no-op and + # expected-validation-error cases) compare against the before state. + after_path = case_dir / "after.json" + expected_source = after_path if after_path.is_file() else case_dir / "before.json" + expected_state = FakeSourcegraphClient(load_state(expected_source)).export_state() client = FakeSourcegraphClient(before_state) command_failure: str | None = None try: - config = config_for_case(case, case_dir / "maps.yaml", client.endpoint) - command = cli.resolve_command("set", config) - with ThreadPoolExecutor(max_workers=config.parallelism) as worker_pool: + cli_input = cli_input_for_case(case, case_name, client.endpoint, runner) + command = cli.resolve_command(cli_input.command_name, cli_input.config) + with ThreadPoolExecutor(max_workers=cli_input.config.parallelism) as worker_pool: cli.run_command( - config, + cli_input.config, command, cast(src.SourcegraphClient, client), worker_pool, @@ -474,7 +701,7 @@ def run_fixture_case(case_dir: Path) -> FixtureRunResult: actual_state = client.export_state() return FixtureRunResult( - name=case_dir.name, + name=case_name, description=case["description"], before_counts=state_counts(before_state), expected_counts=state_counts(expected_state), @@ -486,6 +713,8 @@ def run_fixture_case(case_dir: Path) -> FixtureRunResult: expected_state=expected_state, actual_state=actual_state, command_failure=command_failure, + expected_errors=tuple(case.get("expectedErrors", [])), + runner=runner, ) @@ -516,44 +745,5 @@ def repo_permission_users_by_id(state: FixtureState) -> dict[int, tuple[str, ... } -def config_for_case(case: FixtureCase, maps_path: Path, endpoint: str) -> cli.Config: - set_options = case["set"] - updates: dict[str, object] = { - "maps_path": maps_path, - "apply": True, - "no_backup": True, - "parallelism": 1, - "full": bool(set_options.get("full", False)), - "users": tuple(set_options.get("users", [])), - "users_without_explicit_perms": bool(set_options.get("usersWithoutExplicitPerms", False)), - "created_after": set_options.get("createdAfter"), - } - return cli.Config( - src_endpoint=endpoint, - src_access_token="fixture-token", - ).model_copy(update=updates) - - -def load_case(path: Path) -> FixtureCase: - return cast(FixtureCase, json.loads(path.read_text(encoding="utf-8"))) - - def load_state(path: Path) -> FixtureState: return cast(FixtureState, json.loads(path.read_text(encoding="utf-8"))) - - -class PermissionFixtureCaseTests(unittest.TestCase): - maxDiff = None - - def test_permission_fixture_cases(self) -> None: - for case_dir in fixture_case_dirs(): - with self.subTest(case=case_dir.name): - result = run_fixture_case(case_dir) - self.assertIsNone(result.command_failure) - self.assertEqual(result.expected_state, result.actual_state) - if result.expected_mutations is not None: - self.assertEqual(result.expected_mutations, result.actual_mutations) - - -if __name__ == "__main__": - unittest.main(verbosity=2) diff --git a/tests/e2e/fixtures/add-users-preserves-existing/after.json b/tests/e2e/fixtures/add-users-preserves-existing/after.json index 6e9e1ab..14cb19c 100644 --- a/tests/e2e/fixtures/add-users-preserves-existing/after.json +++ b/tests/e2e/fixtures/add-users-preserves-existing/after.json @@ -22,41 +22,62 @@ "users": [ { "id": 1, - "username": "alice", + "username": "test_user_09991", "builtinAuth": true, "createdAt": "2026-01-01T00:00:00Z", - "emails": [{ "email": "alice@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 2, - "username": "bob", + "username": "test_user_09992", "builtinAuth": true, "createdAt": "2026-01-02T00:00:00Z", - "emails": [{ "email": "bob@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 3, - "username": "carol", + "username": "test_user_09993", "builtinAuth": true, "createdAt": "2026-01-03T00:00:00Z", - "emails": [{ "email": "carol@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] } ], "repos": [ { "id": 101, - "name": "github.com/acme/app", + "name": "test-repo-49981", "externalServiceID": 1, - "explicitPermissionsUsers": ["alice", "bob"] + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] }, { "id": 102, - "name": "github.com/acme/lib", + "name": "test-repo-49982", "externalServiceID": 1, - "explicitPermissionsUsers": ["bob", "carol"] + "explicitPermissionsUsers": [ + "test_user_09992", + "test_user_09993" + ] } ], "pendingBindIDs": [] diff --git a/tests/e2e/fixtures/add-users-preserves-existing/before.json b/tests/e2e/fixtures/add-users-preserves-existing/before.json index 901bd3a..0477656 100644 --- a/tests/e2e/fixtures/add-users-preserves-existing/before.json +++ b/tests/e2e/fixtures/add-users-preserves-existing/before.json @@ -22,41 +22,60 @@ "users": [ { "id": 1, - "username": "alice", + "username": "test_user_09991", "builtinAuth": true, "createdAt": "2026-01-01T00:00:00Z", - "emails": [{ "email": "alice@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 2, - "username": "bob", + "username": "test_user_09992", "builtinAuth": true, "createdAt": "2026-01-02T00:00:00Z", - "emails": [{ "email": "bob@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 3, - "username": "carol", + "username": "test_user_09993", "builtinAuth": true, "createdAt": "2026-01-03T00:00:00Z", - "emails": [{ "email": "carol@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] } ], "repos": [ { "id": 101, - "name": "github.com/acme/app", + "name": "test-repo-49981", "externalServiceID": 1, - "explicitPermissionsUsers": ["alice"] + "explicitPermissionsUsers": [ + "test_user_09991" + ] }, { "id": 102, - "name": "github.com/acme/lib", + "name": "test-repo-49982", "externalServiceID": 1, - "explicitPermissionsUsers": ["carol"] + "explicitPermissionsUsers": [ + "test_user_09993" + ] } ], "pendingBindIDs": [] diff --git a/tests/e2e/fixtures/add-users-preserves-existing/case.json b/tests/e2e/fixtures/add-users-preserves-existing/case.json deleted file mode 100644 index 37967f2..0000000 --- a/tests/e2e/fixtures/add-users-preserves-existing/case.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "description": "Additive --users mode grants mapped repos to one user without dropping existing repo users.", - "set": { - "users": ["bob"] - }, - "expectedMutations": 2 -} diff --git a/tests/e2e/fixtures/add-users-preserves-existing/maps.yaml b/tests/e2e/fixtures/add-users-preserves-existing/maps.yaml index 06de7d9..c903bfc 100644 --- a/tests/e2e/fixtures/add-users-preserves-existing/maps.yaml +++ b/tests/e2e/fixtures/add-users-preserves-existing/maps.yaml @@ -2,9 +2,9 @@ maps: - name: Grant app and lib to selected users users: usernames: - - alice - - bob + - test_user_09991 + - test_user_09992 repos: names: - - github.com/acme/app - - github.com/acme/lib + - test-repo-49981 + - test-repo-49982 diff --git a/tests/e2e/fixtures/and-filters-intersect/after.json b/tests/e2e/fixtures/and-filters-intersect/after.json new file mode 100644 index 0000000..c7e6f0b --- /dev/null +++ b/tests/e2e/fixtures/and-filters-intersect/after.json @@ -0,0 +1,103 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/and-filters-intersect/before.json b/tests/e2e/fixtures/and-filters-intersect/before.json new file mode 100644 index 0000000..6ef40cd --- /dev/null +++ b/tests/e2e/fixtures/and-filters-intersect/before.json @@ -0,0 +1,101 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/and-filters-intersect/maps.yaml b/tests/e2e/fixtures/and-filters-intersect/maps.yaml new file mode 100644 index 0000000..718283e --- /dev/null +++ b/tests/e2e/fixtures/and-filters-intersect/maps.yaml @@ -0,0 +1,11 @@ +maps: + - name: Engineering members on the allowlist get test-repo-49981 + users: + authProvider: + configID: okta + samlGroup: engineering + emails: + - marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json new file mode 100644 index 0000000..5485d5b --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json @@ -0,0 +1,75 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json new file mode 100644 index 0000000..49449d7 --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json @@ -0,0 +1,75 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09993" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/maps.yaml b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/maps.yaml new file mode 100644 index 0000000..4876aa1 --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/maps.yaml @@ -0,0 +1,9 @@ +maps: + - name: test_user_09991 and test_user_09992 own test-repo-49981 + users: + usernames: + - test_user_09991 + - test_user_09992 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/full-overwrite-unions/after.json b/tests/e2e/fixtures/full-overwrite-unions/after.json index 639f441..6f86372 100644 --- a/tests/e2e/fixtures/full-overwrite-unions/after.json +++ b/tests/e2e/fixtures/full-overwrite-unions/after.json @@ -22,47 +22,69 @@ "users": [ { "id": 1, - "username": "alice", + "username": "test_user_09991", "builtinAuth": true, "createdAt": "2026-01-01T00:00:00Z", - "emails": [{ "email": "alice@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 2, - "username": "bob", + "username": "test_user_09992", "builtinAuth": true, "createdAt": "2026-01-02T00:00:00Z", - "emails": [{ "email": "bob@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 3, - "username": "mallory", + "username": "test_user_09994", "builtinAuth": true, "createdAt": "2026-01-03T00:00:00Z", - "emails": [{ "email": "mallory@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09994-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] } ], "repos": [ { "id": 101, - "name": "github.com/acme/app", + "name": "test-repo-49981", "externalServiceID": 1, - "explicitPermissionsUsers": ["alice"] + "explicitPermissionsUsers": [ + "test_user_09991" + ] }, { "id": 102, - "name": "github.com/acme/lib", + "name": "test-repo-49982", "externalServiceID": 1, - "explicitPermissionsUsers": ["alice", "bob"] + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] }, { "id": 103, - "name": "github.com/acme/archive", + "name": "test-repo-49983", "externalServiceID": 1, - "explicitPermissionsUsers": ["mallory"] + "explicitPermissionsUsers": [ + "test_user_09994" + ] } ], "pendingBindIDs": [] diff --git a/tests/e2e/fixtures/full-overwrite-unions/before.json b/tests/e2e/fixtures/full-overwrite-unions/before.json index 0aae9d6..d929f39 100644 --- a/tests/e2e/fixtures/full-overwrite-unions/before.json +++ b/tests/e2e/fixtures/full-overwrite-unions/before.json @@ -22,47 +22,68 @@ "users": [ { "id": 1, - "username": "alice", + "username": "test_user_09991", "builtinAuth": true, "createdAt": "2026-01-01T00:00:00Z", - "emails": [{ "email": "alice@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 2, - "username": "bob", + "username": "test_user_09992", "builtinAuth": true, "createdAt": "2026-01-02T00:00:00Z", - "emails": [{ "email": "bob@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] }, { "id": 3, - "username": "mallory", + "username": "test_user_09994", "builtinAuth": true, "createdAt": "2026-01-03T00:00:00Z", - "emails": [{ "email": "mallory@example.com", "verified": true }], + "emails": [ + { + "email": "marc.leblanc+test09994-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], "externalAccounts": [] } ], "repos": [ { "id": 101, - "name": "github.com/acme/app", + "name": "test-repo-49981", "externalServiceID": 1, - "explicitPermissionsUsers": ["mallory"] + "explicitPermissionsUsers": [ + "test_user_09994" + ] }, { "id": 102, - "name": "github.com/acme/lib", + "name": "test-repo-49982", "externalServiceID": 1, - "explicitPermissionsUsers": ["mallory"] + "explicitPermissionsUsers": [ + "test_user_09994" + ] }, { "id": 103, - "name": "github.com/acme/archive", + "name": "test-repo-49983", "externalServiceID": 1, - "explicitPermissionsUsers": ["mallory"] + "explicitPermissionsUsers": [ + "test_user_09994" + ] } ], "pendingBindIDs": [] diff --git a/tests/e2e/fixtures/full-overwrite-unions/case.json b/tests/e2e/fixtures/full-overwrite-unions/case.json deleted file mode 100644 index 88a865d..0000000 --- a/tests/e2e/fixtures/full-overwrite-unions/case.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "description": "Full set mode unions users across rules, overwrites mapped repos, and leaves unmapped repos alone.", - "set": { - "full": true - }, - "expectedMutations": 2 -} diff --git a/tests/e2e/fixtures/full-overwrite-unions/maps.yaml b/tests/e2e/fixtures/full-overwrite-unions/maps.yaml index 2665776..897196a 100644 --- a/tests/e2e/fixtures/full-overwrite-unions/maps.yaml +++ b/tests/e2e/fixtures/full-overwrite-unions/maps.yaml @@ -1,16 +1,16 @@ maps: - - name: Alice owns app and lib + - name: test_user_09991 owns test-repo-49981 and test-repo-49982 users: usernames: - - alice + - test_user_09991 repos: names: - - github.com/acme/app - - github.com/acme/lib - - name: Bob also owns lib + - test-repo-49981 + - test-repo-49982 + - name: test_user_09992 also owns test-repo-49982 users: usernames: - - bob + - test_user_09992 repos: names: - - github.com/acme/lib + - test-repo-49982 diff --git a/tests/e2e/fixtures/invalid-bad-regex/before.json b/tests/e2e/fixtures/invalid-bad-regex/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/invalid-bad-regex/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-bad-regex/maps.yaml b/tests/e2e/fixtures/invalid-bad-regex/maps.yaml new file mode 100644 index 0000000..c8330c2 --- /dev/null +++ b/tests/e2e/fixtures/invalid-bad-regex/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Broken regex + users: + usernameRegexes: + - '[unclosed' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/invalid-missing-repos-section/before.json b/tests/e2e/fixtures/invalid-missing-repos-section/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/invalid-missing-repos-section/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-missing-repos-section/maps.yaml b/tests/e2e/fixtures/invalid-missing-repos-section/maps.yaml new file mode 100644 index 0000000..d3641bc --- /dev/null +++ b/tests/e2e/fixtures/invalid-missing-repos-section/maps.yaml @@ -0,0 +1,5 @@ +maps: + - name: No repos section + users: + usernames: + - test_user_09991 diff --git a/tests/e2e/fixtures/invalid-set-created-after-date/before.json b/tests/e2e/fixtures/invalid-set-created-after-date/before.json new file mode 100644 index 0000000..568cbbe --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-created-after-date/before.json @@ -0,0 +1,46 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-set-created-after-date/maps.yaml b/tests/e2e/fixtures/invalid-set-created-after-date/maps.yaml new file mode 100644 index 0000000..945a7fa --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-created-after-date/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Valid rule that must never run because the date value is rejected + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json b/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json new file mode 100644 index 0000000..568cbbe --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json @@ -0,0 +1,46 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-set-repos-created-after-date/maps.yaml b/tests/e2e/fixtures/invalid-set-repos-created-after-date/maps.yaml new file mode 100644 index 0000000..945a7fa --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-repos-created-after-date/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Valid rule that must never run because the date value is rejected + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/invalid-unknown-selector-field/before.json b/tests/e2e/fixtures/invalid-unknown-selector-field/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/invalid-unknown-selector-field/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-unknown-selector-field/maps.yaml b/tests/e2e/fixtures/invalid-unknown-selector-field/maps.yaml new file mode 100644 index 0000000..9a5c29b --- /dev/null +++ b/tests/e2e/fixtures/invalid-unknown-selector-field/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Typo in users selector + users: + userNames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/no-match-noop/before.json b/tests/e2e/fixtures/no-match-noop/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/no-match-noop/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/no-match-noop/maps.yaml b/tests/e2e/fixtures/no-match-noop/maps.yaml new file mode 100644 index 0000000..a5d1598 --- /dev/null +++ b/tests/e2e/fixtures/no-match-noop/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: No user matches this rule + users: + usernames: + - username_doesnt_exist_01 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/regex-filters-scope/after.json b/tests/e2e/fixtures/regex-filters-scope/after.json new file mode 100644 index 0000000..6f704d7 --- /dev/null +++ b/tests/e2e/fixtures/regex-filters-scope/after.json @@ -0,0 +1,97 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket LOB1", + "url": "https://bitbucket.test/", + "config": "{\"username\": \"LOB1-SA1\"}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + }, + { + "id": 201, + "name": "test-repo-00010", + "externalServiceID": 2, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/regex-filters-scope/before.json b/tests/e2e/fixtures/regex-filters-scope/before.json new file mode 100644 index 0000000..6785f7a --- /dev/null +++ b/tests/e2e/fixtures/regex-filters-scope/before.json @@ -0,0 +1,91 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket LOB1", + "url": "https://bitbucket.test/", + "config": "{\"username\": \"LOB1-SA1\"}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 201, + "name": "test-repo-00010", + "externalServiceID": 2, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/regex-filters-scope/maps.yaml b/tests/e2e/fixtures/regex-filters-scope/maps.yaml new file mode 100644 index 0000000..2870d2a --- /dev/null +++ b/tests/e2e/fixtures/regex-filters-scope/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Email-regex-matched users get test-repo-49981 and test-repo-49982 + users: + emailRegexes: + - '\+test0999[12]-' + repos: + nameRegexes: + - '^test-repo-4998[12]$' diff --git a/tests/e2e/fixtures/restore-missing-file/before.json b/tests/e2e/fixtures/restore-missing-file/before.json new file mode 100644 index 0000000..2612a8e --- /dev/null +++ b/tests/e2e/fixtures/restore-missing-file/before.json @@ -0,0 +1,49 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/saml-group-filter/after.json b/tests/e2e/fixtures/saml-group-filter/after.json new file mode 100644 index 0000000..15a6abd --- /dev/null +++ b/tests/e2e/fixtures/saml-group-filter/after.json @@ -0,0 +1,140 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket LOB1", + "url": "https://bitbucket.test/", + "config": "{\"username\": \"LOB1-SA1\"}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + }, + { + "Value": "lob1" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 201, + "name": "test-repo-49982", + "externalServiceID": 2, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + }, + { + "id": 202, + "name": "test-repo-49983", + "externalServiceID": 2, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/saml-group-filter/before.json b/tests/e2e/fixtures/saml-group-filter/before.json new file mode 100644 index 0000000..4fd2193 --- /dev/null +++ b/tests/e2e/fixtures/saml-group-filter/before.json @@ -0,0 +1,136 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket LOB1", + "url": "https://bitbucket.test/", + "config": "{\"username\": \"LOB1-SA1\"}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + }, + { + "Value": "lob1" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 201, + "name": "test-repo-49982", + "externalServiceID": 2, + "explicitPermissionsUsers": [] + }, + { + "id": 202, + "name": "test-repo-49983", + "externalServiceID": 2, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/saml-group-filter/maps.yaml b/tests/e2e/fixtures/saml-group-filter/maps.yaml new file mode 100644 index 0000000..676a745 --- /dev/null +++ b/tests/e2e/fixtures/saml-group-filter/maps.yaml @@ -0,0 +1,9 @@ +maps: + - name: Okta lob1 members get Bitbucket repos + users: + authProvider: + configID: okta + samlGroup: lob1 + repos: + codeHostConnection: + kind: BITBUCKETSERVER diff --git a/tests/e2e/fixtures/set-repos-created-after-noop/before.json b/tests/e2e/fixtures/set-repos-created-after-noop/before.json new file mode 100644 index 0000000..aeb72a8 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-created-after-noop/before.json @@ -0,0 +1,49 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09941", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09941-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49941", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09941" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-created-after-noop/maps.yaml b/tests/e2e/fixtures/set-repos-created-after-noop/maps.yaml new file mode 100644 index 0000000..f481bdd --- /dev/null +++ b/tests/e2e/fixtures/set-repos-created-after-noop/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Mapped repos for whoever the date filter selects + users: + usernames: + - test_user_09941 + repos: + names: + - test-repo-49941 diff --git a/tests/e2e/fixtures/set-repos-created-after/after.json b/tests/e2e/fixtures/set-repos-created-after/after.json new file mode 100644 index 0000000..d8add10 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-created-after/after.json @@ -0,0 +1,70 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ], + "createdAt": "2026-03-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-created-after/before.json b/tests/e2e/fixtures/set-repos-created-after/before.json new file mode 100644 index 0000000..0c5d31b --- /dev/null +++ b/tests/e2e/fixtures/set-repos-created-after/before.json @@ -0,0 +1,67 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-03-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-created-after/maps.yaml b/tests/e2e/fixtures/set-repos-created-after/maps.yaml new file mode 100644 index 0000000..e1f4c58 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-created-after/maps.yaml @@ -0,0 +1,10 @@ +maps: + - name: Both users own test-repo-49981 and test-repo-49982 + users: + usernames: + - test_user_09991 + - test_user_09992 + repos: + names: + - test-repo-49981 + - test-repo-49982 diff --git a/tests/e2e/fixtures/set-repos-filter/after.json b/tests/e2e/fixtures/set-repos-filter/after.json new file mode 100644 index 0000000..b6f2af0 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-filter/after.json @@ -0,0 +1,72 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09971", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09971-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09972", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09972-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49971", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09971", + "test_user_09972" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49972", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09974" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-filter/before.json b/tests/e2e/fixtures/set-repos-filter/before.json new file mode 100644 index 0000000..5c92ffc --- /dev/null +++ b/tests/e2e/fixtures/set-repos-filter/before.json @@ -0,0 +1,71 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09971", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09971-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09972", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09972-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49971", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09974" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49972", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09974" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-filter/maps.yaml b/tests/e2e/fixtures/set-repos-filter/maps.yaml new file mode 100644 index 0000000..856fad5 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-filter/maps.yaml @@ -0,0 +1,10 @@ +maps: + - name: Both users own test-repo-49971 and test-repo-49972 + users: + usernames: + - test_user_09971 + - test_user_09972 + repos: + names: + - test-repo-49971 + - test-repo-49972 diff --git a/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json b/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json new file mode 100644 index 0000000..2b3a4e7 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json @@ -0,0 +1,72 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json b/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json new file mode 100644 index 0000000..29b5a24 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json @@ -0,0 +1,69 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-repos-without-explicit-perms/maps.yaml b/tests/e2e/fixtures/set-repos-without-explicit-perms/maps.yaml new file mode 100644 index 0000000..e1f4c58 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-without-explicit-perms/maps.yaml @@ -0,0 +1,10 @@ +maps: + - name: Both users own test-repo-49981 and test-repo-49982 + users: + usernames: + - test_user_09991 + - test_user_09992 + repos: + names: + - test-repo-49981 + - test-repo-49982 diff --git a/tests/e2e/fixtures/set-users-created-after-noop/before.json b/tests/e2e/fixtures/set-users-created-after-noop/before.json new file mode 100644 index 0000000..70f5790 --- /dev/null +++ b/tests/e2e/fixtures/set-users-created-after-noop/before.json @@ -0,0 +1,49 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09951", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09951-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49951", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09951" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-users-created-after-noop/maps.yaml b/tests/e2e/fixtures/set-users-created-after-noop/maps.yaml new file mode 100644 index 0000000..0a7ea85 --- /dev/null +++ b/tests/e2e/fixtures/set-users-created-after-noop/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Mapped repos for whoever the date filter selects + users: + usernames: + - test_user_09951 + repos: + names: + - test-repo-49951 diff --git a/tests/e2e/fixtures/set-users-created-after/after.json b/tests/e2e/fixtures/set-users-created-after/after.json new file mode 100644 index 0000000..8f4f705 --- /dev/null +++ b/tests/e2e/fixtures/set-users-created-after/after.json @@ -0,0 +1,85 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-02-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-03-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992", + "test_user_09993" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09992", + "test_user_09993" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-users-created-after/before.json b/tests/e2e/fixtures/set-users-created-after/before.json new file mode 100644 index 0000000..3cb9a19 --- /dev/null +++ b/tests/e2e/fixtures/set-users-created-after/before.json @@ -0,0 +1,80 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-02-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-03-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-users-created-after/maps.yaml b/tests/e2e/fixtures/set-users-created-after/maps.yaml new file mode 100644 index 0000000..6cfd734 --- /dev/null +++ b/tests/e2e/fixtures/set-users-created-after/maps.yaml @@ -0,0 +1,9 @@ +maps: + - name: Everyone gets the mapped repos + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 + - test-repo-49982 diff --git a/tests/e2e/fixtures/set-users-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-users-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-users-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/after.json b/tests/e2e/fixtures/set-users-without-explicit-perms/after.json new file mode 100644 index 0000000..29ea002 --- /dev/null +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/after.json @@ -0,0 +1,72 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09961", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09961-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09962", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09962-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49961", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09961", + "test_user_09962" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49962", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09962" + ], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/before.json b/tests/e2e/fixtures/set-users-without-explicit-perms/before.json new file mode 100644 index 0000000..cd010a7 --- /dev/null +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/before.json @@ -0,0 +1,69 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09961", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09961-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09962", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09962-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49961", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09961" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49962", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/maps.yaml b/tests/e2e/fixtures/set-users-without-explicit-perms/maps.yaml new file mode 100644 index 0000000..3ac4326 --- /dev/null +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/maps.yaml @@ -0,0 +1,10 @@ +maps: + - name: Both users own test-repo-49961 and test-repo-49962 + users: + usernames: + - test_user_09961 + - test_user_09962 + repos: + names: + - test-repo-49961 + - test-repo-49962 diff --git a/tests/e2e/test_local_cases.py b/tests/e2e/test_local_cases.py new file mode 100644 index 0000000..1bf3afd --- /dev/null +++ b/tests/e2e/test_local_cases.py @@ -0,0 +1,89 @@ +"""Assert every tests.yaml case that runs locally, and validate the registry. + +Live and performance execution happens in tests/run.py; here, all local-mode +cases run without any network — state cases against an in-memory instance, +replay cases through the real argument parser — and every registry entry is +structurally validated, including the live/performance ones. +""" + +from __future__ import annotations + +import shlex +import unittest + +from tests.e2e.case_runner import ( + FIXTURES_DIR, + case_modes, + case_runners, + is_replay_case, + load_e2e_cases, + required_case_files, + run_fixture_case, + run_local_replay_case, +) + + +class LocalCaseTests(unittest.TestCase): + maxDiff = None + + def test_registry_matches_fixture_directories(self) -> None: + """Every fixture directory must be registered; directories are optional.""" + case_names = set(load_e2e_cases()) + directory_names = {path.name for path in FIXTURES_DIR.iterdir() if path.is_dir()} + unregistered = directory_names - case_names + self.assertFalse( + unregistered, + f"fixture directories without a tests.yaml entry: {sorted(unregistered)}", + ) + + def test_registry_cases_are_runnable(self) -> None: + """Every case declares a runner, known modes, and the files it needs.""" + for case_name, case in load_e2e_cases().items(): + with self.subTest(case=case_name): + self.assertTrue(case_runners(case), "case needs cliCommand or importConfig") + self.assertTrue( + set(case_modes(case)) <= {"local", "live", "performance"}, + f"unknown mode in {case_modes(case)}", + ) + for file_name in sorted(required_case_files(case)): + path = FIXTURES_DIR / case_name / file_name + self.assertTrue(path.is_file(), f"case requires {path}") + cli_command = case.get("cliCommand", "") + if "{user}" in cli_command: + self.assertNotIn( + "local", + case_modes(case), + "{user} resolves to the live --user; local mode cannot use it", + ) + argv = shlex.split(cli_command) + if argv[:1] == ["restore"]: + self.assertNotIn( + "--apply", + argv, + "registry cases must not run a bare restore --apply; restores " + "are managed by the seeded set-apply cycle", + ) + + def test_local_replay_cases(self) -> None: + """Replay-style cases assert parser exit codes and output substrings.""" + for case_name, case in load_e2e_cases().items(): + if "local" not in case_modes(case) or not is_replay_case(case): + continue + with self.subTest(case=case_name): + self.assertEqual("", run_local_replay_case(case_name)) + + def test_local_state_cases(self) -> None: + for case_name, case in load_e2e_cases().items(): + if "local" not in case_modes(case) or is_replay_case(case): + continue + for runner in case_runners(case): + with self.subTest(case=case_name, runner=runner): + result = run_fixture_case(case_name, runner) + if not result.expected_errors: + self.assertIsNone(result.command_failure) + self.assertEqual(result.expected_state, result.actual_state) + self.assertIsNone(result.failure) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index 77ced4d..782cbc3 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -1 +1,11 @@ """Integration-style tests that exercise command-line entry points.""" + +import logging + +# Tests exercise failure paths that emit operator-facing WARNING/ERROR logs +# (e.g. "FAIL ..."). Without a handler, logging.lastResort prints them +# to stderr, where they masquerade as real failures in test-runner output. A +# log line saying FAIL must only ever mean a test actually failed. Installed +# here (not only in tests/__init__.py) because unittest discovery imports +# these subpackages as top-level packages, skipping the parent package. +logging.getLogger().addHandler(logging.NullHandler()) diff --git a/tests/run.py b/tests/run.py new file mode 100644 index 0000000..8a3a51c --- /dev/null +++ b/tests/run.py @@ -0,0 +1,2872 @@ +#!/usr/bin/env python3 +"""Single entrypoint for all src-auth-perms-sync testing. + +Levels (each level runs only its own checks): + + --local Fast, no network. Lint, format, types, unit + fixture-case + tests, CLI argument rejection matrix, and randomized + permission-invariant checks. Suitable for a pre-commit hook. + --live End-to-end runs against the Sourcegraph test instance + configured in .env, with independent GraphQL read-back + verification of the actual permission state, and a + pip-install smoke test of the wheel. + --performance Repeated timed runs of the expensive paths against the test + instance, with Sourcegraph trace retention and resource + sampling, reported as a TSV and median summary. + +--live and --performance optionally take a comma-delimited list of test +names (substring match) to run a subset, e.g. --live full-overwrite-unions. + +Other commands: + + --update-golden Re-run every fixture case in tests/e2e/fixtures/ and + rewrite its after.json from the actual result. Review the + diff carefully before committing: after.json is the + assertion. + +Examples: + + uv run tests/run.py + uv run tests/run.py --live + uv run tests/run.py --performance --repeat 3 + uv run tests/run.py --update-golden +""" + +from __future__ import annotations + +import argparse +import base64 +import datetime +import json +import logging +import os +import random +import re +import shlex +import statistics +import subprocess +import sys +import tempfile +import threading +import time +import urllib.error +import urllib.request +from collections.abc import Callable, Sequence +from concurrent.futures import ThreadPoolExecutor +from dataclasses import dataclass, field +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast +from urllib.parse import urlsplit + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +if TYPE_CHECKING: + from tests.e2e.case_runner import FixtureRunResult, FixtureState + +FIXTURES_DIR = ROOT / "tests" / "e2e" / "fixtures" +TEST_LOGS_DIR = ROOT / "logs" +LOG_PATH_PATTERN = re.compile(r"Writing log events to (.+?/log\.json)\.") +STRUCTURED_EVENT_LINE_PATTERN = re.compile(r"^[.]*event=\S+\s*$") +READ_BACK_PAGE_SIZE = 100 +FULL_APPLY_READ_BACK_USER_SAMPLE = 5 +DEFAULT_PROPERTY_ITERATIONS = 25 +DEFAULT_PROPERTY_SEED = 20260610 +DEFAULT_PERFORMANCE_REPEAT = 1 + +EXPLICIT_REPOS_READ_BACK_QUERY = """ +query TestExplicitRepoReadBack($username: String!, $first: Int!, $after: String) { + user(username: $username) { + id + permissionsInfo { + repositories(source: API, first: $first, after: $after) { + nodes { repository { name } } + pageInfo { hasNextPage endCursor } + } + } + } +} +""" + +REPOSITORY_USERS_READ_BACK_QUERY = """ +query TestRepositoryUsersReadBack($name: String!, $first: Int!, $after: String) { + repository(name: $name) { + id + permissionsInfo { + users(first: $first, after: $after) { + nodes { reasons user { username } } + pageInfo { hasNextPage endCursor } + } + } + } +} +""" + +EXPLICIT_API_PERMISSION_REASON = "Explicit API" +SITE_ADMIN_PERMISSION_REASON = "Site Admin" + +log = logging.getLogger("test") +command_output_log = logging.getLogger("test.command_output") + + +# --------------------------------------------------------------------------- +# Logging: everything goes to the console and to one log file per run +# --------------------------------------------------------------------------- + + +# During the randomized invariant checks, the package emits thousands of +# identical structured records; this flag drops them from BOTH handlers. +SUPPRESS_PACKAGE_LOGS = threading.Event() + +# With --quiet, package chatter stays out of the console entirely — including +# the expected warnings produced by intentionally-failing cases. Runner +# failures are still shown (they log at ERROR), and the file keeps everything. +CONSOLE_QUIET = threading.Event() + + +def is_structured_event(record: logging.LogRecord) -> bool: + """src_py_lib structured span records (emitted on the root logger). + + Their message is just "event="; the payload lives in record + attributes that a text formatter never renders, so the rendered line + carries no information. CLI subprocess runs write the full JSON versions + to their own log.json. + """ + return isinstance(record.msg, str) and record.msg.startswith("event=") + + +class PackageNoiseFilter(logging.Filter): + """Drop unrenderable structured events; keep package chatter in the file. + + Console: hide package chatter below WARNING (entirely with --quiet). + While SUPPRESS_PACKAGE_LOGS is set, hide package chatter below ERROR + everywhere (including the log file). + """ + + def __init__(self, for_console: bool) -> None: + super().__init__() + self.for_console = for_console + + def filter(self, record: logging.LogRecord) -> bool: + if is_structured_event(record): + return False + if not record.name.startswith(("src_auth_perms_sync", "src_py_lib")): + return True + if self.for_console and CONSOLE_QUIET.is_set(): + return False + if SUPPRESS_PACKAGE_LOGS.is_set(): + return record.levelno >= logging.ERROR + if self.for_console: + return record.levelno >= logging.WARNING + return True + + +def configure_logging(log_file: Path, quiet: bool = False) -> None: + """Send output to the console and the log file. + + With `quiet`, the console only shows warnings, errors, and failed checks; + the log file always gets everything. + """ + log_file.parent.mkdir(parents=True, exist_ok=True) + root = logging.getLogger() + root.setLevel(logging.INFO) + + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(logging.Formatter("%(message)s")) + console_handler.addFilter(PackageNoiseFilter(for_console=True)) + if quiet: + CONSOLE_QUIET.set() + console_handler.setLevel(logging.WARNING) + root.addHandler(console_handler) + + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s %(message)s")) + file_handler.addFilter(PackageNoiseFilter(for_console=False)) + root.addHandler(file_handler) + + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class TestArguments: + """Parsed command-line options for this test run.""" + + level: str # "local" | "live" | "performance" + test_filter: tuple[str, ...] # empty = run everything in the level + quiet: bool + update_golden: bool + env_file: Path + user: str | None + repeat: int + seed: int + property_iterations: int + allow_non_test_endpoint: bool + candidate_command: str + baseline_command: str | None + fail_on_memory_regression_percent: float | None + fail_on_memory_regression_mib: float | None + jaeger_trace_limit: int + external_sample_interval: float + monitor_sourcegraph_load: bool + monitor_namespace: str + monitor_frontend_target: str + monitor_postgres_target: str + monitor_psql_command: str + monitor_interval_seconds: int + monitor_postgres_interval_seconds: int + monitor_statements_interval_seconds: int + + +def parse_arguments(argv: Sequence[str] | None = None) -> TestArguments: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + level_group = parser.add_mutually_exclusive_group() + level_group.add_argument( + "--local", action="store_true", help="Fast checks with no network (default)" + ) + level_group.add_argument( + "--live", + nargs="?", + const="", + default=None, + metavar="TESTS", + help="Tests against the .env instance. Optionally pass a comma-delimited " + "list of test names (substring match) to run only those, " + "e.g. --live full-overwrite-unions or --live wheel,baseline", + ) + level_group.add_argument( + "--performance", + nargs="?", + const="", + default=None, + metavar="TESTS", + help="Repeated timed runs against the .env instance with traces and resource " + "sampling. Optionally pass a comma-delimited list of test names (substring match)", + ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Console shows only the log file path and any warnings, errors, or " + "failed checks; the log file still gets everything", + ) + parser.add_argument( + "--update-golden", + action="store_true", + help="Rewrite tests/e2e/fixtures/*/after.json from actual results, then exit", + ) + parser.add_argument( + "--env-file", + type=Path, + default=ROOT / ".env", + help="Env file providing SRC_ENDPOINT and SRC_ACCESS_TOKEN for live runs (default: .env)", + ) + parser.add_argument( + "--user", + default=None, + help="Sourcegraph username for user-scoped live cases " + "(default: $SRC_AUTH_PERMS_SYNC_TEST_USER or $USER)", + ) + parser.add_argument( + "--repeat", + type=int, + default=DEFAULT_PERFORMANCE_REPEAT, + help=f"Repetitions per performance case (default: {DEFAULT_PERFORMANCE_REPEAT})", + ) + parser.add_argument( + "--seed", + type=int, + default=DEFAULT_PROPERTY_SEED, + help=f"Random seed for invariant checks (default: {DEFAULT_PROPERTY_SEED})", + ) + parser.add_argument( + "--property-iterations", + type=int, + default=DEFAULT_PROPERTY_ITERATIONS, + help=f"Random worlds per invariant check (default: {DEFAULT_PROPERTY_ITERATIONS})", + ) + parser.add_argument( + "--allow-non-test-endpoint", + action="store_true", + help="Allow live runs against endpoints that do not look like test instances", + ) + performance_group = parser.add_argument_group("performance") + performance_group.add_argument( + "--candidate-command", + default="uv run src-auth-perms-sync", + help="Command used to invoke the CLI (default: uv run src-auth-perms-sync)", + ) + performance_group.add_argument( + "--baseline-command", + default=None, + help="Optional baseline CLI command; when set, performance cases run for both " + "variants and medians are compared", + ) + performance_group.add_argument( + "--fail-on-memory-regression-percent", + type=float, + default=None, + help="Fail if candidate median peak RSS regresses by more than this percent", + ) + performance_group.add_argument( + "--fail-on-memory-regression-mib", + type=float, + default=None, + help="Fail if candidate median peak RSS regresses by more than this many MiB", + ) + performance_group.add_argument( + "--jaeger-trace-limit", + type=int, + default=10, + help="Fetch up to this many slowest Sourcegraph Jaeger traces per performance case; " + "0 disables trace fetching (default: 10)", + ) + performance_group.add_argument( + "--external-sample-interval", + type=float, + default=1.0, + help="Seconds between external process-tree RSS samples during performance cases; " + "0 disables (default: 1.0)", + ) + monitor_group = parser.add_argument_group("sourcegraph load monitor") + monitor_group.add_argument( + "--monitor-sourcegraph-load", + action="store_true", + help="Sample Sourcegraph pod and Postgres load via kubectl during performance cases", + ) + monitor_group.add_argument("--monitor-namespace", default="m") + monitor_group.add_argument( + "--monitor-frontend-target", default="deployment/sourcegraph-frontend" + ) + monitor_group.add_argument("--monitor-postgres-target", default="pod/pgsql-0") + monitor_group.add_argument("--monitor-psql-command", default="psql -X -U sg -d sg") + monitor_group.add_argument("--monitor-interval-seconds", type=int, default=5) + monitor_group.add_argument("--monitor-postgres-interval-seconds", type=int, default=10) + monitor_group.add_argument("--monitor-statements-interval-seconds", type=int, default=30) + options = parser.parse_args(argv) + level = "local" + test_filter: tuple[str, ...] = () + if options.live is not None: + level = "live" + test_filter = parse_test_filter(cast(str, options.live)) + if options.performance is not None: + level = "performance" + test_filter = parse_test_filter(cast(str, options.performance)) + return TestArguments( + level=level, + test_filter=test_filter, + quiet=bool(options.quiet), + update_golden=bool(options.update_golden), + env_file=cast(Path, options.env_file), + user=cast("str | None", options.user), + repeat=int(options.repeat), + seed=int(options.seed), + property_iterations=int(options.property_iterations), + allow_non_test_endpoint=bool(options.allow_non_test_endpoint), + candidate_command=str(options.candidate_command), + baseline_command=cast("str | None", options.baseline_command), + fail_on_memory_regression_percent=cast( + "float | None", options.fail_on_memory_regression_percent + ), + fail_on_memory_regression_mib=cast("float | None", options.fail_on_memory_regression_mib), + jaeger_trace_limit=int(options.jaeger_trace_limit), + external_sample_interval=float(options.external_sample_interval), + monitor_sourcegraph_load=bool(options.monitor_sourcegraph_load), + monitor_namespace=str(options.monitor_namespace), + monitor_frontend_target=str(options.monitor_frontend_target), + monitor_postgres_target=str(options.monitor_postgres_target), + monitor_psql_command=str(options.monitor_psql_command), + monitor_interval_seconds=int(options.monitor_interval_seconds), + monitor_postgres_interval_seconds=int(options.monitor_postgres_interval_seconds), + monitor_statements_interval_seconds=int(options.monitor_statements_interval_seconds), + ) + + +def parse_test_filter(value: str) -> tuple[str, ...]: + return tuple(token.strip() for token in value.split(",") if token.strip()) + + +def with_suffix_name(prefix: Path, suffix: str) -> Path: + """Return the prefix path with a suffix appended to its file name.""" + return prefix.with_name(prefix.name + suffix) + + +def read_env_file(path: Path) -> dict[str, str]: + """Parse KEY=VALUE lines from an env file, ignoring comments and blanks.""" + values: dict[str, str] = {} + if not path.is_file(): + return values + for raw_line in path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + if line.startswith("export "): + line = line[len("export ") :] + key, _, value = line.partition("=") + value = value.strip() + if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: + value = value[1:-1] + values[key.strip()] = value + return values + + +def resolve_secret_reference(value: str) -> str: + """Resolve 1Password op:// references so the read-back client gets a real token.""" + if not value.startswith("op://"): + return value + completed = subprocess.run( + ["op", "read", value], + capture_output=True, + text=True, + check=False, + ) + if completed.returncode != 0: + raise SystemExit(f"Failed to resolve {value!r} via `op read`: {completed.stderr.strip()}") + return completed.stdout.strip() + + +def assert_test_endpoint(endpoint: str, allow_non_test_endpoint: bool) -> None: + """Refuse mutating live runs against endpoints that do not look like test instances.""" + if allow_non_test_endpoint: + return + hostname = (urlsplit(endpoint).hostname or "").lower() + if hostname in {"localhost", "127.0.0.1", "::1"}: + return + if hostname.endswith(".sgdev.org") or ".sgdev." in hostname: + return + raise SystemExit( + f"Refusing live tests against non-test-looking endpoint {endpoint!r}. " + "Pass --allow-non-test-endpoint if this is intentional." + ) + + +# --------------------------------------------------------------------------- +# Check bookkeeping +# --------------------------------------------------------------------------- + + +@dataclass +class CheckResult: + name: str + level: str + passed: bool + seconds: float + detail: str = "" + + +@dataclass(frozen=True) +class CliCase: + """One real CLI invocation and the conditions it must satisfy.""" + + name: str + arguments: tuple[str, ...] + expected_exit_code: int = 0 + must_contain: tuple[str, ...] = () + must_contain_one_of: tuple[str, ...] = () + + +@dataclass +class CliResult: + case: CliCase + return_code: int + output: str + elapsed_seconds: float + log_path: Path | None + run_directory: Path | None + external_peak_rss_mb: float | None = None + external_sample_count: int = 0 + + def assertion_failure(self) -> str | None: + if self.return_code != self.case.expected_exit_code: + return f"expected exit {self.case.expected_exit_code}, got {self.return_code}" + for substring in self.case.must_contain: + if substring not in self.output: + return f"output did not contain {substring!r}" + if self.case.must_contain_one_of and not any( + substring in self.output for substring in self.case.must_contain_one_of + ): + expected = ", ".join(repr(substring) for substring in self.case.must_contain_one_of) + return f"output did not contain any of: {expected}" + return None + + +class LiveAbort(RuntimeError): + """Raised when a live prerequisite fails and dependent checks must be skipped.""" + + +@dataclass(frozen=True) +class CommandExecution: + """Captured result of one streamed subprocess.""" + + return_code: int + output: str + external_peak_rss_mb: float | None = None + external_sample_count: int = 0 + + +def process_tree_rss_mb(root_process_identifier: int) -> float | None: + """Return current RSS for the process and its descendants, in MiB.""" + try: + process_result = subprocess.run( + ["ps", "-axo", "pid=,ppid=,rss="], + capture_output=True, + text=True, + timeout=2, + check=False, + ) + except (OSError, subprocess.SubprocessError): + return None + if process_result.returncode != 0: + return None + parent_by_process: dict[int, int] = {} + resident_kib_by_process: dict[int, int] = {} + for raw_line in process_result.stdout.splitlines(): + fields = raw_line.split() + if len(fields) != 3: + continue + try: + process_identifier = int(fields[0]) + parent_process_identifier = int(fields[1]) + resident_kib = int(fields[2]) + except ValueError: + continue + parent_by_process[process_identifier] = parent_process_identifier + resident_kib_by_process[process_identifier] = resident_kib + if root_process_identifier not in resident_kib_by_process: + return None + children_by_parent: dict[int, list[int]] = {} + for process_identifier, parent_process_identifier in parent_by_process.items(): + children_by_parent.setdefault(parent_process_identifier, []).append(process_identifier) + total_kib = 0 + pending = [root_process_identifier] + seen: set[int] = set() + while pending: + process_identifier = pending.pop() + if process_identifier in seen: + continue + seen.add(process_identifier) + total_kib += resident_kib_by_process.get(process_identifier, 0) + pending.extend(children_by_parent.get(process_identifier, [])) + return total_kib / 1024.0 + + +class ExternalProcessSampler: + """Sample RSS for a child process tree from outside the child process.""" + + def __init__(self, root_process_identifier: int, interval_seconds: float) -> None: + self.root_process_identifier = root_process_identifier + self.interval_seconds = interval_seconds + self.peak_rss_mb: float | None = None + self.sample_count = 0 + self._stop = threading.Event() + self._thread: threading.Thread | None = None + + def start(self) -> None: + if self.interval_seconds <= 0: + return + self._thread = threading.Thread( + target=self._loop, name="ExternalProcessSampler", daemon=True + ) + self._thread.start() + self.sample_once() + + def stop(self) -> None: + if self.interval_seconds <= 0: + return + self.sample_once() + self._stop.set() + if self._thread is not None: + self._thread.join(timeout=2.0) + + def _loop(self) -> None: + while not self._stop.wait(self.interval_seconds): + self.sample_once() + + def sample_once(self) -> None: + rss_mb = process_tree_rss_mb(self.root_process_identifier) + if rss_mb is None: + return + self.sample_count += 1 + if self.peak_rss_mb is None or rss_mb > self.peak_rss_mb: + self.peak_rss_mb = rss_mb + + +@dataclass(frozen=True) +class RunLogSummary: + """Resource usage and the run end record from one CLI run's structured log.""" + + run_record: dict[str, Any] | None + sampled_peak_rss_mb: float | None + resource_sample_count: int + max_num_fds: int | None + max_num_threads: int | None + max_process_cpu_percent: float | None + + +def float_field(record: dict[str, Any], *names: str) -> float | None: + for name in names: + value = record.get(name) + if isinstance(value, (int, float)) and not isinstance(value, bool): + return float(value) + return None + + +def int_field(record: dict[str, Any], name: str) -> int | None: + value = record.get(name) + if isinstance(value, int) and not isinstance(value, bool): + return value + return None + + +def read_run_log_summary(log_path: Path | None) -> RunLogSummary: + """Parse a CLI run's log.json for the run end record and resource samples.""" + empty = RunLogSummary(None, None, 0, None, None, None) + if log_path is None or not log_path.is_file(): + return empty + run_record: dict[str, Any] | None = None + sampled_peak_rss_mb: float | None = None + resource_sample_count = 0 + max_num_fds: int | None = None + max_num_threads: int | None = None + max_process_cpu_percent: float | None = None + with log_path.open(encoding="utf-8") as log_file: + for line in log_file: + if not line.strip(): + continue + try: + record = cast("dict[str, Any]", json.loads(line)) + except json.JSONDecodeError: + continue + if record.get("event") == "resource_sample": + resource_sample_count += 1 + sample_rss = float_field(record, "peak_rss_mb", "rss_mb", "process_rss_mb") + if sample_rss is not None and ( + sampled_peak_rss_mb is None or sample_rss > sampled_peak_rss_mb + ): + sampled_peak_rss_mb = sample_rss + sample_fds = int_field(record, "num_fds") + if sample_fds is not None and (max_num_fds is None or sample_fds > max_num_fds): + max_num_fds = sample_fds + sample_threads = int_field(record, "num_threads") + if sample_threads is not None and ( + max_num_threads is None or sample_threads > max_num_threads + ): + max_num_threads = sample_threads + sample_cpu = float_field(record, "process_cpu_percent", "cpu_percent") + if sample_cpu is not None and ( + max_process_cpu_percent is None or sample_cpu > max_process_cpu_percent + ): + max_process_cpu_percent = sample_cpu + if record.get("event") == "run" and record.get("phase") == "end": + run_record = record + return RunLogSummary( + run_record=run_record, + sampled_peak_rss_mb=sampled_peak_rss_mb, + resource_sample_count=resource_sample_count, + max_num_fds=max_num_fds, + max_num_threads=max_num_threads, + max_process_cpu_percent=max_process_cpu_percent, + ) + + +# --------------------------------------------------------------------------- +# The suite +# --------------------------------------------------------------------------- + + +@dataclass +class TestSuite: + arguments: TestArguments + # Path stem for this run's outputs: .log, and for performance runs + # -results.tsv, -jaeger-traces[.jsonl], -sourcegraph-load. + artifact_prefix: Path + results: list[CheckResult] = field(default_factory=list[CheckResult]) + endpoint: str = "" + access_token: str = "" + test_user: str = "" + + # -- bookkeeping -------------------------------------------------------- + + def record(self, name: str, level: str, passed: bool, seconds: float, detail: str = "") -> None: + self.results.append(CheckResult(name, level, passed, seconds, detail)) + marker = "✓" if passed else "✗" + suffix = f" — {detail}" if detail and not passed else "" + log.log( + logging.INFO if passed else logging.ERROR, + "%s [%s] %s (%.1fs)%s", + marker, + level, + name, + seconds, + suffix, + ) + + @property + def failed(self) -> bool: + return any(not result.passed for result in self.results) + + def test_selected(self, *names: str) -> bool: + """Return whether any given name matches the --live/--performance filter. + + With no filter, everything is selected. Filter tokens match + case-insensitively as substrings, so `--live full-overwrite-unions` + runs one fixture case and `--live wheel,baseline` runs two checks. + """ + if not self.arguments.test_filter: + return True + return any( + token.lower() in name.lower() for token in self.arguments.test_filter for name in names + ) + + # -- subprocess helpers -------------------------------------------------- + + def stream_command( + self, + command: Sequence[str], + environment: dict[str, str] | None = None, + external_sample_interval: float = 0.0, + ) -> CommandExecution: + """Run a command, mirroring its output to the console and log file. + + When `external_sample_interval` is positive, the child's process-tree + RSS is sampled from outside while it runs. + """ + command_output_log.info("$ %s", shlex.join(command)) + process = subprocess.Popen( + list(command), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + env=environment, + cwd=str(ROOT), + ) + sampler = ExternalProcessSampler(process.pid, external_sample_interval) + sampler.start() + output_lines: list[str] = [] + assert process.stdout is not None + for line in process.stdout: + output_lines.append(line) + # Bare structured-event records leaking from in-process test runs + # carry no information once rendered; keep them in the captured + # output for assertions, but not in our logs. + if not STRUCTURED_EVENT_LINE_PATTERN.match(line): + command_output_log.info("%s", line.rstrip("\n")) + return_code = process.wait() + sampler.stop() + return CommandExecution( + return_code=return_code, + output="".join(output_lines), + external_peak_rss_mb=sampler.peak_rss_mb, + external_sample_count=sampler.sample_count, + ) + + def gate(self, name: str, command: Sequence[str], level: str = "local") -> bool: + started = time.monotonic() + execution = self.stream_command(command) + passed = execution.return_code == 0 + self.record( + name, level, passed, time.monotonic() - started, f"exit {execution.return_code}" + ) + return passed + + # -- local: toolchain gates ---------------------------------------------- + + def run_toolchain_gates(self) -> None: + log.info("\n=== Local: toolchain gates ===") + self.gate("ruff check", ["uv", "run", "ruff", "check", "."]) + self.gate("ruff format --check", ["uv", "run", "ruff", "format", "--check", "."]) + self.gate("pyright", ["uv", "run", "pyright"]) + self.gate( + "unit + fixture tests", + ["uv", "run", "python", "-m", "unittest", "discover", "-s", "tests"], + ) + + # -- local: fixture cases ------------------------------------------------- + + def run_fixture_checks(self, update_golden: bool) -> None: + from tests.e2e.case_runner import ( + case_modes, + case_runners, + is_replay_case, + load_e2e_cases, + run_fixture_case, + run_local_replay_case, + ) + + log.info("\n=== Local: tests.yaml cases ===") + for case_name, case in load_e2e_cases().items(): + if "local" not in case_modes(case): + continue + if is_replay_case(case): + if update_golden: + continue + log.info("— %s (parse) —", case_name) + started = time.monotonic() + failure = run_local_replay_case(case_name) + self.record( + f"fixture: {case_name} (parse)", + "local", + not failure, + time.monotonic() - started, + failure, + ) + continue + runners = case_runners(case) + if update_golden: + result = run_fixture_case(case_name, runners[0]) + self._update_golden_after(FIXTURES_DIR / case_name, result) + continue + for runner in runners: + log.info("— %s (%s) —", case_name, runner) + started = time.monotonic() + result = run_fixture_case(case_name, runner) + self.record( + f"fixture: {case_name} ({runner})", + "local", + result.passed, + time.monotonic() - started, + result.failure or "", + ) + + def _update_golden_after(self, case_directory: Path, result: FixtureRunResult) -> None: + from tests.e2e.case_runner import FakeSourcegraphClient, load_state + + if result.expected_errors: + log.info("golden: %s expects errors; no after.json needed", case_directory.name) + return + if result.command_failure is not None: + log.error( + "golden: %s command FAILED (%s); not writing after.json", + case_directory.name, + result.command_failure, + ) + self.record(f"golden: {case_directory.name}", "local", False, 0.0) + return + before_state = FakeSourcegraphClient( + load_state(case_directory / "before.json") + ).export_state() + after_path = case_directory / "after.json" + if result.actual_state == before_state and not after_path.is_file(): + log.info("golden: %s is a no-op case; after.json stays omitted", case_directory.name) + return + if after_path.is_file(): + existing_state = FakeSourcegraphClient(load_state(after_path)).export_state() + if existing_state == result.actual_state: + log.info("golden: %s after.json unchanged", case_directory.name) + return + after_path.write_text(json.dumps(result.actual_state, indent=2) + "\n", encoding="utf-8") + log.info( + "golden: %s after.json updated — review the diff before committing", + case_directory.name, + ) + + # -- local: randomized permission invariants ------------------------------- + + def run_property_checks(self) -> None: + log.info( + "\n=== Local: randomized permission invariants (seed=%d, iterations=%d) ===", + self.arguments.seed, + self.arguments.property_iterations, + ) + SUPPRESS_PACKAGE_LOGS.set() + try: + self._run_property_checks_quietly() + finally: + SUPPRESS_PACKAGE_LOGS.clear() + + def _run_property_checks_quietly(self) -> None: + for outcome in run_property_checks( + seed=self.arguments.seed, + iterations=self.arguments.property_iterations, + ): + self.record( + f"invariant: {outcome.name}", + "local", + outcome.passed, + outcome.seconds, + outcome.detail, + ) + + # -- live helpers ---------------------------------------------------------- + + def cli_environment(self, endpoint: str, token: str) -> dict[str, str]: + environment = { + name: value + for name, value in os.environ.items() + if not name.startswith("SRC_AUTH_PERMS_SYNC_") + } + environment["SRC_ENDPOINT"] = endpoint + environment["SRC_ACCESS_TOKEN"] = token + return environment + + @property + def cli_executable(self) -> tuple[str, ...]: + return tuple(shlex.split(self.arguments.candidate_command)) + + def run_cli_case( + self, + case: CliCase, + environment: dict[str, str], + level: str, + extra_arguments: tuple[str, ...] = (), + executable: tuple[str, ...] | None = None, + external_sample_interval: float = 0.0, + ) -> CliResult: + command = [ + *(executable if executable is not None else self.cli_executable), + *case.arguments, + *extra_arguments, + ] + started = time.monotonic() + execution = self.stream_command( + command, environment, external_sample_interval=external_sample_interval + ) + elapsed = time.monotonic() - started + log_path: Path | None = None + matches = LOG_PATH_PATTERN.findall(execution.output) + if matches: + log_path = Path(matches[-1]) + result = CliResult( + case=case, + return_code=execution.return_code, + output=execution.output, + elapsed_seconds=elapsed, + log_path=log_path, + run_directory=log_path.parent if log_path is not None else None, + external_peak_rss_mb=execution.external_peak_rss_mb, + external_sample_count=execution.external_sample_count, + ) + failure = result.assertion_failure() + self.record(case.name, level, failure is None, elapsed, failure or "") + return result + + def graphql(self, query: str, variables: dict[str, object]) -> dict[str, Any]: + """Independent GraphQL read path: stdlib urllib only, no package code.""" + payload = json.dumps({"query": query, "variables": variables}).encode("utf-8") + request = urllib.request.Request( + f"{self.endpoint}/.api/graphql", + data=payload, + headers={ + "Authorization": f"token {self.access_token}", + "Content-Type": "application/json", + }, + method="POST", + ) + with urllib.request.urlopen(request, timeout=60) as response: + body = cast("dict[str, Any]", json.load(response)) + errors = body.get("errors") + if errors: + raise RuntimeError(f"GraphQL errors: {errors}") + return cast("dict[str, Any]", body["data"]) + + def read_back_explicit_repo_names(self, username: str) -> set[str] | None: + """Query the instance directly for a user's explicit-API repo names.""" + names: set[str] = set() + after_cursor: str | None = None + while True: + data = self.graphql( + EXPLICIT_REPOS_READ_BACK_QUERY, + {"username": username, "first": READ_BACK_PAGE_SIZE, "after": after_cursor}, + ) + user = cast("dict[str, Any] | None", data.get("user")) + if user is None: + return None + permissions_info = cast("dict[str, Any] | None", user.get("permissionsInfo")) + if permissions_info is None: + return names + connection = cast("dict[str, Any]", permissions_info["repositories"]) + for node in cast("list[dict[str, Any]]", connection["nodes"]): + repository = cast("dict[str, Any] | None", node.get("repository")) + if repository is not None: + names.add(cast(str, repository["name"])) + page_info = cast("dict[str, Any]", connection["pageInfo"]) + if not page_info.get("hasNextPage"): + return names + after_cursor = cast("str | None", page_info.get("endCursor")) + + def read_back_repository_explicit_users( + self, repository_name: str + ) -> tuple[int, set[str]] | None: + """Return (database id, explicit-API usernames) for one repo, or None if missing. + + Repo-centric `permissionsInfo.users` has no source filter, so usernames + are taken from the "Explicit API" reason. Site admins are reported with + only a "Site Admin" reason even when they also hold an explicit grant, + so those users are disambiguated with a user-centric source:API query. + """ + repository_id: int | None = None + explicit_usernames: set[str] = set() + ambiguous_usernames: set[str] = set() + after_cursor: str | None = None + while True: + data = self.graphql( + REPOSITORY_USERS_READ_BACK_QUERY, + {"name": repository_name, "first": READ_BACK_PAGE_SIZE, "after": after_cursor}, + ) + repository = cast("dict[str, Any] | None", data.get("repository")) + if repository is None: + return None + repository_id = decode_repository_node_id(cast(str, repository["id"])) + permissions_info = cast("dict[str, Any] | None", repository.get("permissionsInfo")) + if permissions_info is None: + return (repository_id, explicit_usernames) + connection = cast("dict[str, Any]", permissions_info["users"]) + for node in cast("list[dict[str, Any]]", connection["nodes"]): + user = cast("dict[str, Any] | None", node.get("user")) + if user is None: + continue + username = cast(str, user["username"]) + reasons = cast("list[str]", node.get("reasons", [])) + if EXPLICIT_API_PERMISSION_REASON in reasons: + explicit_usernames.add(username) + elif SITE_ADMIN_PERMISSION_REASON in reasons: + ambiguous_usernames.add(username) + page_info = cast("dict[str, Any]", connection["pageInfo"]) + if not page_info.get("hasNextPage"): + break + after_cursor = cast("str | None", page_info.get("endCursor")) + for username in sorted(ambiguous_usernames): + user_repository_names = self.read_back_explicit_repo_names(username) + if user_repository_names and repository_name in user_repository_names: + explicit_usernames.add(username) + assert repository_id is not None + return (repository_id, explicit_usernames) + + def check_read_back(self, name: str, username: str, expected_names: set[str]) -> None: + started = time.monotonic() + try: + actual_names = self.read_back_explicit_repo_names(username) + except (urllib.error.URLError, RuntimeError, OSError) as error: + self.record(name, "live", False, time.monotonic() - started, str(error)) + return + if actual_names is None: + self.record( + name, "live", False, time.monotonic() - started, f"user {username!r} not found" + ) + return + if actual_names == expected_names: + self.record( + name, + "live", + True, + time.monotonic() - started, + f"{len(actual_names)} repo(s) match", + ) + return + missing = sorted(expected_names - actual_names)[:5] + unexpected = sorted(actual_names - expected_names)[:5] + self.record( + name, + "live", + False, + time.monotonic() - started, + f"read-back mismatch for {username}: missing={missing} unexpected={unexpected}", + ) + + # -- live ------------------------------------------------------------------ + + def prepare_live(self) -> dict[str, str]: + env_values = read_env_file(self.arguments.env_file) + endpoint = env_values.get("SRC_ENDPOINT") or os.environ.get("SRC_ENDPOINT") or "" + token = env_values.get("SRC_ACCESS_TOKEN") or os.environ.get("SRC_ACCESS_TOKEN") or "" + if not endpoint or not token: + raise LiveAbort( + f"SRC_ENDPOINT and SRC_ACCESS_TOKEN are required for live runs; " + f"set them in {self.arguments.env_file} or the environment" + ) + self.endpoint = endpoint.rstrip("/") + self.access_token = resolve_secret_reference(token) + assert_test_endpoint(self.endpoint, self.arguments.allow_non_test_endpoint) + self.test_user = ( + self.arguments.user + or os.environ.get("SRC_AUTH_PERMS_SYNC_TEST_USER") + or os.environ.get("USER") + or "" + ) + if not self.test_user: + raise LiveAbort("--user is required when SRC_AUTH_PERMS_SYNC_TEST_USER and USER unset") + user_repos = self.read_back_explicit_repo_names(self.test_user) + if user_repos is None: + raise LiveAbort(f"user {self.test_user!r} does not exist on {self.endpoint}") + log.info( + "Live instance: %s user: %s (%d explicit repo grant(s) currently)", + self.endpoint, + self.test_user, + len(user_repos), + ) + return self.cli_environment(self.endpoint, self.access_token) + + def run_live(self) -> None: + log.info("\n=== Live: %s ===", self.endpoint or "(loading .env)") + try: + environment = self.prepare_live() + except (LiveAbort, SystemExit) as error: + self.record("live prerequisites", "live", False, 0.0, str(error)) + return + self.record("live prerequisites", "live", True, 0.0) + + if self.test_selected("wheel install smoke"): + self.run_wheel_install_smoke() + self.run_live_fixture_cases(environment) + self.run_live_permission_cycles(environment) + + def run_wheel_install_smoke(self) -> None: + log.info("\n--- Live: wheel build + pip install smoke ---") + started = time.monotonic() + with tempfile.TemporaryDirectory(prefix="src-auth-perms-sync-wheel-") as temporary: + temporary_path = Path(temporary) + dist_directory = temporary_path / "dist" + venv_directory = temporary_path / "venv" + steps: list[list[str]] = [ + [ + "uv", + "build", + "--wheel", + "--out-dir", + str(dist_directory), + "--no-create-gitignore", + ], + [sys.executable, "-m", "venv", str(venv_directory)], + ] + for step in steps: + execution = self.stream_command(step) + if execution.return_code != 0: + self.record( + "wheel install smoke", + "live", + False, + time.monotonic() - started, + f"{step[0]} exit {execution.return_code}", + ) + return + wheels = sorted(dist_directory.glob("*.whl")) + if not wheels: + self.record( + "wheel install smoke", + "live", + False, + time.monotonic() - started, + "no wheel produced", + ) + return + venv_python = venv_directory / "bin" / "python" + for step in ( + [str(venv_python), "-m", "pip", "install", "--quiet", str(wheels[0])], + [str(venv_directory / "bin" / "src-auth-perms-sync"), "--help"], + ): + execution = self.stream_command(step) + if execution.return_code != 0: + self.record( + "wheel install smoke", + "live", + False, + time.monotonic() - started, + f"{step[0]} exit {execution.return_code}", + ) + return + self.record("wheel install smoke", "live", True, time.monotonic() - started) + + def run_live_fixture_cases(self, environment: dict[str, str]) -> None: + log.info("\n--- Live: tests.yaml cases against the real instance ---") + for case_name, case in self.fixture_cases_for_mode("live"): + if self.test_selected(f"live fixture: {case_name}"): + self.run_fixture_case_on_instance(case_name, case, environment, level="live") + + def fixture_cases_for_mode(self, mode: str) -> list[tuple[str, dict[str, Any]]]: + """Return registry cases that opted into a real-instance mode.""" + from tests.e2e.case_runner import case_modes, load_e2e_cases + + return [ + (case_name, cast("dict[str, Any]", case)) + for case_name, case in load_e2e_cases().items() + if mode in case_modes(case) + ] + + def run_fixture_case_on_instance( + self, + case_name: str, + case: dict[str, Any], + environment: dict[str, str], + level: str, + run_main_case: Callable[[CliCase], CliResult] | None = None, + ) -> None: + """Run one registry case against the real instance. + + Only mutating `set` commands run the full seed -> apply -> verify -> + restore cycle (their fixture files must reference real instance + users/repos). Everything else replays directly: read-only commands, + and convergent commands like `sync-saml-orgs --apply` that validate + their own outcome. `{user}` in the command resolves to the configured + test user. + """ + from tests.e2e.case_runner import case_cli_arguments, expected_exit_code + + label = f"{level} fixture: {case_name}" + if "cliCommand" not in case: + self.record(label, level, False, 0.0, f"{level} mode requires a cliCommand") + return + typed_case = cast("Any", case) + arguments = tuple( + token.replace("{user}", self.test_user) + for token in case_cli_arguments(typed_case, case_name) + ) + if arguments[:1] == ("restore",) and "--apply" in arguments: + self.record( + label, + level, + False, + 0.0, + "registry cases must not run a bare restore --apply", + ) + return + if arguments[:1] == ("set",) and "--apply" in arguments: + self.run_seeded_fixture_apply(case_name, case, environment, level, run_main_case) + return + expected_errors = tuple(cast("list[str]", case.get("expectedErrors", []))) + expected_output = tuple(cast("list[str]", case.get("expectedOutput", []))) + replay_case = CliCase( + label, + arguments, + expected_exit_code(typed_case), + expected_errors + expected_output, + ) + if run_main_case is not None: + run_main_case(replay_case) + else: + self.run_cli_case(replay_case, environment, level=level) + + def run_seeded_fixture_apply( + self, + case_name: str, + case: dict[str, Any], + environment: dict[str, str], + level: str, + run_main_case: Callable[[CliCase], CliResult] | None = None, + ) -> None: + """Seed the case's before-state, run it with --apply, verify, restore. + + Every involved repo — fixture state repos, exact rule names, and any + declared `live.involvedRepos` — is captured, seeded, verified, and + restored. Involved repos absent from after.json are canaries: they + are seeded to their before-state (empty when undeclared) and must + read back unchanged, which catches selectors matching wider than the + case intends. + """ + from tests.e2e.case_runner import case_cli_arguments + + label = f"{level} fixture: {case_name}" + expected_errors = tuple(cast("list[str]", case.get("expectedErrors", []))) + expected_mutations = cast("int | None", case.get("expectedMutations")) + live_settings = cast("dict[str, Any]", case.get("live") or {}) + declared_repository_names = cast("list[str]", live_settings.get("involvedRepos") or []) + + before_grants = fixture_grants(case_name, "before.json") + if before_grants is None: + self.record(label, level, False, 0.0, "missing before.json") + return + after_grants = fixture_grants(case_name, "after.json") or before_grants + rule_repository_names, selector_error = fixture_maps_repo_scope( + case_name, has_declared_repository_names=bool(declared_repository_names) + ) + if selector_error: + self.record(label, level, False, 0.0, selector_error) + return + + involved_names = sorted( + set(before_grants) + | set(after_grants) + | rule_repository_names + | set(declared_repository_names) + ) + original_state: dict[str, tuple[int, set[str]]] = {} + for repository_name in involved_names: + read_back = self.read_back_repository_explicit_users(repository_name) + if read_back is None: + self.record( + label, + level, + False, + 0.0, + f"repo {repository_name!r} does not exist on {self.endpoint}; live " + "cases must use real instance repo/user names in their fixture files", + ) + return + original_state[repository_name] = read_back + repository_ids = {name: state[0] for name, state in original_state.items()} + + # Preflight: some modes (e.g. --users-without-explicit-perms) are only + # deterministic when the named users hold no grants beyond the + # involved repos. Assert that instance-wide before mutating anything. + for username in cast("list[str]", live_settings.get("usersWithoutOtherGrants") or []): + grant_names = self.read_back_explicit_repo_names(username) + if grant_names is None: + self.record(label, level, False, 0.0, f"user {username!r} not found") + return + outside_grants = sorted(grant_names - set(involved_names)) + if outside_grants: + self.record( + label, + level, + False, + 0.0, + f"precondition not met: {username} holds explicit grants outside " + f"the involved repos: {outside_grants[:5]}", + ) + return + + # Repos in scope but absent from after.json must come back exactly as + # seeded — these are the canaries that detect widened selectors. + expected_after = { + name: after_grants.get(name, before_grants.get(name, set())) for name in involved_names + } + + with tempfile.TemporaryDirectory(prefix=f"src-auth-perms-sync-live-{case_name}-") as tmp: + seed_path = Path(tmp) / "seed-before.json" + cleanup_path = Path(tmp) / "cleanup.json" + write_state_snapshot( + seed_path, + self.endpoint, + { + name: (repository_ids[name], sorted(before_grants.get(name, set()))) + for name in involved_names + }, + ) + write_state_snapshot( + cleanup_path, + self.endpoint, + { + name: (repository_ids[name], sorted(original_state[name][1])) + for name in involved_names + }, + ) + try: + self.run_cli_case( + CliCase( + f"{label} [seed before-state]", + restore_arguments(seed_path), + 0, + must_contain_one_of=RESTORE_SUCCESS_MARKERS, + ), + environment, + level=level, + ) + self.check_repository_states( + f"{label} [seed verified]", + level, + {name: before_grants.get(name, set()) for name in involved_names}, + ) + + main_case = CliCase( + label, + tuple(case_cli_arguments(cast("Any", case), case_name)), + 1 if expected_errors else 0, + expected_errors, + ) + if run_main_case is not None: + result = run_main_case(main_case) + else: + result = self.run_cli_case(main_case, environment, level=level) + if expected_mutations is not None: + actual_mutations = mutations_succeeded_from_log(result.log_path) or 0 + self.record( + f"{label} [mutation count]", + level, + actual_mutations == expected_mutations, + 0.0, + f"expected {expected_mutations}, got {actual_mutations}", + ) + self.check_repository_states(f"{label} [state verified]", level, expected_after) + finally: + self.run_cli_case( + CliCase( + f"{label} [restore original state]", + restore_arguments(cleanup_path), + 0, + must_contain_one_of=RESTORE_SUCCESS_MARKERS, + ), + environment, + level=level, + ) + self.check_repository_states( + f"{label} [restore verified]", + level, + {name: state[1] for name, state in original_state.items()}, + ) + + def check_repository_states( + self, name: str, level: str, expected_grants: dict[str, set[str]] + ) -> None: + """Independently read back involved repos and compare explicit users.""" + started = time.monotonic() + mismatches: list[str] = [] + for repository_name, expected_usernames in sorted(expected_grants.items()): + read_back = self.read_back_repository_explicit_users(repository_name) + if read_back is None: + mismatches.append(f"{repository_name}: repo not found") + continue + actual_usernames = read_back[1] + if actual_usernames != expected_usernames: + missing = sorted(expected_usernames - actual_usernames)[:5] + unexpected = sorted(actual_usernames - expected_usernames)[:5] + mismatches.append(f"{repository_name}: missing={missing} unexpected={unexpected}") + self.record( + name, + level, + not mismatches, + time.monotonic() - started, + "; ".join(mismatches) if mismatches else f"{len(expected_grants)} repo(s) match", + ) + + def run_live_permission_cycles(self, environment: dict[str, str]) -> None: + # The baseline get is a prerequisite for both cycles, so it runs when + # any of them is selected. + want_user_cycle = self.test_selected("live: set --users apply", "user cycle") + want_full_cycle = self.test_selected("live: set --full", "full cycle") + want_baseline = ( + self.test_selected("live: get user baseline", "baseline") + or want_user_cycle + or want_full_cycle + ) + if not want_baseline: + return + log.info("\n--- Live: permission cycles with independent read-back ---") + baseline = self.run_cli_case( + CliCase( + "live: get user baseline", + ("get", "--users", self.test_user), + 0, + ("Wrote before-snapshot",), + ), + environment, + level="live", + ) + baseline_names = self.user_scoped_snapshot_repo_names(baseline, self.test_user) + if baseline_names is None: + self.record("live: baseline artifact", "live", False, 0.0, "missing before.json") + return + self.check_read_back("live: baseline read-back", self.test_user, baseline_names) + if want_user_cycle: + self.run_user_scoped_cycle(environment, baseline_names) + if want_full_cycle: + self.run_full_cycle(environment, baseline_names) + + def run_user_scoped_cycle(self, environment: dict[str, str], baseline: set[str]) -> None: + apply_result = self.run_cli_case( + CliCase( + "live: set --users apply", + ("set", "--users", self.test_user, "--apply"), + 0, + must_contain_one_of=( + "VALIDATION OK", + "All selected users already have the mapped explicit grants", + ), + ), + environment, + level="live", + ) + try: + expected = self.user_scoped_snapshot_repo_names(apply_result, self.test_user) + if expected is None: + self.record("live: set --users read-back", "live", False, 0.0, "missing after.json") + else: + self.check_read_back("live: set --users read-back", self.test_user, expected) + finally: + if apply_result.run_directory is not None: + snapshot_path = apply_result.run_directory / "before.json" + # Dry run first: it must plan without mutating. The apply + # restore plus the baseline read-back below prove that. + self.run_cli_case( + CliCase( + "live: restore user scope dry-run", + ("restore", "--restore-path", str(snapshot_path)), + 0, + must_contain_one_of=( + "Dry run complete", + "Scoped restore target already matches current state", + ), + ), + environment, + level="live", + ) + self.run_cli_case( + CliCase( + "live: restore user scope", + ("restore", "--restore-path", str(snapshot_path), "--apply"), + 0, + must_contain_one_of=( + "VALIDATION OK", + "Scoped restore target already matches current state", + ), + ), + environment, + level="live", + ) + self.check_read_back("live: post-restore equals baseline", self.test_user, baseline) + + def run_full_cycle(self, environment: dict[str, str], baseline: set[str]) -> None: + dry_run = self.run_cli_case( + CliCase( + "live: set --full dry-run", + ("set", "--full"), + 0, + ("Dry run complete",), + ), + environment, + level="live", + ) + if dry_run.run_directory is None: + self.record("live: full cycle", "live", False, 0.0, "dry run produced no artifacts") + return + baseline_snapshot = dry_run.run_directory / "before.json" + projected_after = dry_run.run_directory / "after.json" + + self.run_cli_case( + CliCase( + "live: set --full apply", + ("set", "--full", "--apply", "--no-backup"), + 0, + must_contain_one_of=("VALIDATION OK", "Apply done"), + ), + environment, + level="live", + ) + try: + self.check_full_apply_read_back(projected_after) + finally: + # Dry run first: it must plan without mutating. The apply + # restore plus the baseline read-back below prove that. + self.run_cli_case( + CliCase( + "live: restore full baseline dry-run", + ( + "restore", + "--restore-path", + str(baseline_snapshot), + "--no-backup", + "--parallelism", + "1", + ), + 0, + must_contain_one_of=( + "Dry run complete", + "Nothing to restore", + ), + ), + environment, + level="live", + ) + self.run_cli_case( + CliCase( + "live: restore full baseline", + ( + "restore", + "--restore-path", + str(baseline_snapshot), + "--apply", + "--no-backup", + "--parallelism", + "1", + ), + 0, + must_contain_one_of=( + "VALIDATION OK", + "Restore done", + "Nothing to restore", + ), + ), + environment, + level="live", + ) + self.check_read_back("live: post-full-restore equals baseline", self.test_user, baseline) + + def check_full_apply_read_back(self, projected_after: Path) -> None: + if not projected_after.is_file(): + self.record( + "live: full apply read-back", "live", False, 0.0, f"missing {projected_after}" + ) + return + snapshot = cast("dict[str, Any]", json.loads(projected_after.read_text(encoding="utf-8"))) + repos = cast("dict[str, dict[str, Any]]", snapshot.get("repos", {})) + repo_names_by_user: dict[str, set[str]] = {} + for repo in repos.values(): + for username in cast("list[str]", repo.get("users", [])): + repo_names_by_user.setdefault(username, set()).add(cast(str, repo["name"])) + sampled_users = [self.test_user] + [ + username + for username, _ in sorted( + repo_names_by_user.items(), key=lambda entry: len(entry[1]), reverse=True + ) + if username != self.test_user + ][: FULL_APPLY_READ_BACK_USER_SAMPLE - 1] + for username in sampled_users: + expected = repo_names_by_user.get(username, set()) + self.check_read_back(f"live: full apply read-back ({username})", username, expected) + + def user_scoped_snapshot_repo_names(self, result: CliResult, username: str) -> set[str] | None: + """Read one user's repo names from a run's snapshot artifact. + + Handles both artifact shapes: user-scoped snapshots (`set --users`, + keyed by username) and repo-keyed snapshots (`get`, keyed by repo ID + with per-repo user lists). + """ + if result.run_directory is None: + return None + # `set --users` writes after.json; `get --users` writes only before.json. + for artifact_name in ("after.json", "before.json"): + artifact_path = result.run_directory / artifact_name + if not artifact_path.is_file(): + continue + snapshot = cast("dict[str, Any]", json.loads(artifact_path.read_text(encoding="utf-8"))) + if snapshot.get("snapshot_kind") == "user_scope": + users = cast("dict[str, dict[str, Any]]", snapshot.get("users", {})) + user_entry = users.get(username) + if user_entry is None: + return set() + return { + cast(str, repo["name"]) + for repo in cast("list[dict[str, Any]]", user_entry["repos"]) + } + repos = cast("dict[str, dict[str, Any]]", snapshot.get("repos", {})) + return { + cast(str, repo["name"]) + for repo in repos.values() + if username in cast("list[str]", repo.get("users", [])) + } + return None + + # -- performance ------------------------------------------------------------- + + def performance_variants(self) -> list[tuple[str, tuple[str, ...]]]: + candidate = ("candidate", self.cli_executable) + if not self.arguments.baseline_command: + return [candidate] + baseline = ("baseline", tuple(shlex.split(self.arguments.baseline_command))) + return [baseline, candidate] + + def run_performance(self) -> None: + log.info( + "\n=== Performance: repeat=%d, jaeger_trace_limit=%d ===", + self.arguments.repeat, + self.arguments.jaeger_trace_limit, + ) + try: + environment = self.prepare_live() + except (LiveAbort, SystemExit) as error: + self.record("performance prerequisites", "performance", False, 0.0, str(error)) + return + trace_fetcher: JaegerTraceFetcher | None = None + if self.arguments.jaeger_trace_limit > 0: + trace_fetcher = JaegerTraceFetcher( + endpoint=self.endpoint, + access_token=self.access_token, + artifact_prefix=self.artifact_prefix, + limit=self.arguments.jaeger_trace_limit, + ) + load_monitor: SourcegraphLoadMonitor | None = None + if self.arguments.monitor_sourcegraph_load: + load_monitor = SourcegraphLoadMonitor( + self.arguments, with_suffix_name(self.artifact_prefix, "-sourcegraph-load") + ) + rows: list[dict[str, object]] = [] + try: + if load_monitor is not None: + load_monitor.start() + for variant_name, variant_executable in self.performance_variants(): + for iteration in range(1, self.arguments.repeat + 1): + rows.extend( + self.run_performance_iteration( + environment, + variant_name, + variant_executable, + iteration, + trace_fetcher, + ) + ) + finally: + if load_monitor is not None: + load_monitor.stop() + self.write_performance_report(rows) + self.check_memory_regressions(rows) + + def run_performance_iteration( + self, + environment: dict[str, str], + variant_name: str, + variant_executable: tuple[str, ...], + iteration: int, + trace_fetcher: JaegerTraceFetcher | None, + ) -> list[dict[str, object]]: + performance_flags = ("--fetch-sg-traces", "--sample-interval", "1") + rows: list[dict[str, object]] = [] + + def measure(case: CliCase) -> CliResult: + result = self.run_cli_case( + case, + environment, + level="performance", + extra_arguments=performance_flags, + executable=variant_executable, + external_sample_interval=self.arguments.external_sample_interval, + ) + jaeger_found = 0 + jaeger_requested = 0 + if trace_fetcher is not None and result.log_path is not None: + jaeger_found, jaeger_requested = trace_fetcher.collect_for_run( + f"{variant_name}-{strip_iteration_suffix(case.name)}", result.log_path + ) + rows.append( + self.performance_row( + case.name, variant_name, iteration, result, jaeger_found, jaeger_requested + ) + ) + return result + + # The dry run is also the baseline snapshot source for the apply + + # restore pair, so selecting the apply implies running the dry run. + want_apply = self.test_selected("perf: set --full apply", "perf: restore full") + want_dry_run = want_apply or self.test_selected("perf: set --full dry-run") + + if want_dry_run: + dry_run = measure( + CliCase(f"perf: set --full dry-run [{iteration}]", ("set", "--full"), 0) + ) + if want_apply and dry_run.run_directory is not None: + baseline_snapshot = dry_run.run_directory / "before.json" + measure( + CliCase( + f"perf: set --full apply [{iteration}]", + ("set", "--full", "--apply", "--no-backup"), + 0, + ) + ) + measure( + CliCase( + f"perf: restore full [{iteration}]", + ( + "restore", + "--restore-path", + str(baseline_snapshot), + "--apply", + "--no-backup", + "--parallelism", + "1", + ), + 0, + ) + ) + for case_name, case in self.fixture_cases_for_mode("performance"): + if self.test_selected(f"performance fixture: {case_name}"): + self.run_fixture_case_on_instance( + case_name, + case, + environment, + level="performance", + run_main_case=measure, + ) + return rows + + def performance_row( + self, + case_name: str, + variant_name: str, + iteration: int, + result: CliResult, + jaeger_found: int, + jaeger_requested: int, + ) -> dict[str, object]: + summary = read_run_log_summary(result.log_path) + duration_ms: float | None = None + peak_rss_mb: float | None = None + if summary.run_record is not None: + duration_ms = float_field(summary.run_record, "duration_ms") + peak_rss_mb = float_field(summary.run_record, "peak_rss_mb") + return { + "case": strip_iteration_suffix(case_name), + "variant": variant_name, + "iteration": iteration, + "exit_code": result.return_code, + "elapsed_seconds": round(result.elapsed_seconds, 3), + "duration_ms": duration_ms if duration_ms is not None else "", + "peak_rss_mb": peak_rss_mb if peak_rss_mb is not None else "", + "sampled_peak_rss_mb": ( + summary.sampled_peak_rss_mb if summary.sampled_peak_rss_mb is not None else "" + ), + "external_peak_rss_mb": ( + round(result.external_peak_rss_mb, 1) + if result.external_peak_rss_mb is not None + else "" + ), + "max_num_fds": summary.max_num_fds if summary.max_num_fds is not None else "", + "max_num_threads": ( + summary.max_num_threads if summary.max_num_threads is not None else "" + ), + "max_process_cpu_percent": ( + summary.max_process_cpu_percent + if summary.max_process_cpu_percent is not None + else "" + ), + "jaeger_traces_found": jaeger_found, + "jaeger_traces_requested": jaeger_requested, + "log_path": str(result.log_path) if result.log_path is not None else "", + } + + def write_performance_report(self, rows: list[dict[str, object]]) -> None: + if not rows: + return + report_path = with_suffix_name(self.artifact_prefix, "-results.tsv") + columns = list(rows[0].keys()) + lines = ["\t".join(columns)] + lines.extend("\t".join(str(row[column]) for column in columns) for row in rows) + report_path.write_text("\n".join(lines) + "\n", encoding="utf-8") + log.info("Wrote performance results: %s", report_path) + + log.info("\nMedians per case and variant:") + for case_name, variant_name in sorted( + {(cast(str, row["case"]), cast(str, row["variant"])) for row in rows} + ): + elapsed = performance_median(rows, case_name, variant_name, "elapsed_seconds") + peak_rss = performance_median(rows, case_name, variant_name, "peak_rss_mb") + log.info( + " %-28s %-10s elapsed=%ss peak_rss=%sMiB", + case_name, + variant_name, + f"{elapsed:.1f}" if elapsed is not None else "n/a", + f"{peak_rss:.1f}" if peak_rss is not None else "n/a", + ) + + def check_memory_regressions(self, rows: list[dict[str, object]]) -> None: + """Compare candidate vs baseline median peak RSS against the thresholds.""" + if not self.arguments.baseline_command: + return + threshold_percent = self.arguments.fail_on_memory_regression_percent + threshold_mib = self.arguments.fail_on_memory_regression_mib + for case_name in sorted({cast(str, row["case"]) for row in rows}): + baseline_rss = performance_median(rows, case_name, "baseline", "peak_rss_mb") + candidate_rss = performance_median(rows, case_name, "candidate", "peak_rss_mb") + if baseline_rss is None or candidate_rss is None: + continue + delta_mib = candidate_rss - baseline_rss + delta_percent = (delta_mib / baseline_rss * 100.0) if baseline_rss else 0.0 + log.info( + " regression check %-28s baseline=%.1fMiB candidate=%.1fMiB " + "delta=%+.1fMiB (%+.1f%%)", + case_name, + baseline_rss, + candidate_rss, + delta_mib, + delta_percent, + ) + exceeded_percent = threshold_percent is not None and delta_percent > threshold_percent + exceeded_mib = threshold_mib is not None and delta_mib > threshold_mib + if exceeded_percent or exceeded_mib: + self.record( + f"memory regression: {case_name}", + "performance", + False, + 0.0, + f"candidate peak RSS {candidate_rss:.1f}MiB vs baseline " + f"{baseline_rss:.1f}MiB ({delta_mib:+.1f}MiB, {delta_percent:+.1f}%)", + ) + + # -- summary ------------------------------------------------------------------- + + def print_summary(self) -> int: + log.info("\n%s", "=" * 72) + passed = sum(1 for result in self.results if result.passed) + failed = len(self.results) - passed + for result in self.results: + if not result.passed: + log.error("FAILED [%s] %s — %s", result.level, result.name, result.detail) + log.log( + logging.ERROR if failed else logging.INFO, + "Summary: %d passed, %d failed, %d total.", + passed, + failed, + len(self.results), + ) + return 1 if failed else 0 + + +# --------------------------------------------------------------------------- +# Randomized permission invariants +# +# Each check generates random instance states and mapping rules, runs the +# REAL `set --full --apply` code path against the in-memory fixture client, +# and asserts a safety property that must hold for every input: +# +# 1. Grants for combined rules equal the union of each rule's grants. +# 2. Adding a filter to a rule never widens the grant set (README: +# "adding multiple filters casts a smaller net"). +# 3. Applying the same maps twice is idempotent (zero second-run mutations). +# 4. The final state matches an independent oracle computed directly from +# the mapping layer; unmapped repos are untouched. +# --------------------------------------------------------------------------- + +PROPERTY_GROUPS = ("engineering", "lob1", "admins") +PROPERTY_EMAIL_DOMAINS = ("example.com", "other.test") +PROPERTY_OKTA_SERVICE_ID = "http://www.okta.com/test123" +PROPERTY_OKTA_CLIENT_ID = "https://sourcegraph.test/.auth/saml/metadata" + + +@dataclass(frozen=True) +class PropertyCheckOutcome: + name: str + passed: bool + seconds: float + detail: str = "" + + +def random_fixture_state(rng: random.Random, with_grants: bool) -> FixtureState: + """Generate a random in-memory instance: providers, users, repos, grants.""" + builtin_provider = { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": True, + "configID": "", + } + okta_provider = { + "serviceType": "saml", + "serviceID": PROPERTY_OKTA_SERVICE_ID, + "clientID": PROPERTY_OKTA_CLIENT_ID, + "displayName": "Okta", + "isBuiltin": False, + "configID": "okta", + } + services = [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}", + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket", + "url": "https://bitbucket.test/", + "config": '{"username": "LOB1-SA1"}', + }, + ] + + usernames: list[str] = [] + users: list[dict[str, Any]] = [] + for index in range(1, rng.randint(4, 9) + 1): + username = f"user{index:02d}" + usernames.append(username) + accounts: list[dict[str, Any]] = [] + if rng.random() < 0.7: + groups = [group for group in PROPERTY_GROUPS if rng.random() < 0.5] + accounts.append( + { + "serviceType": "saml", + "serviceID": PROPERTY_OKTA_SERVICE_ID, + "clientID": PROPERTY_OKTA_CLIENT_ID, + "accountData": { + "Values": {"groups": {"Values": [{"Value": group} for group in groups]}} + }, + } + ) + users.append( + { + "id": index, + "username": username, + "builtinAuth": not accounts, + "createdAt": f"2026-01-{index:02d}T00:00:00Z", + "emails": [ + { + "email": f"{username}@{rng.choice(PROPERTY_EMAIL_DOMAINS)}", + "verified": True, + } + ], + "externalAccounts": accounts, + } + ) + + repos: list[dict[str, Any]] = [] + for index in range(1, rng.randint(5, 12) + 1): + service_id = rng.choice((1, 2)) + host = "github.com" if service_id == 1 else "bitbucket.test" + organization = rng.choice(("acme", "lob1")) + grants = [username for username in usernames if rng.random() < 0.25] if with_grants else [] + repos.append( + { + "id": 100 + index, + "name": f"{host}/{organization}/repo{index:02d}", + "externalServiceID": service_id, + "explicitPermissionsUsers": grants, + } + ) + + return cast( + "FixtureState", + { + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [builtin_provider, okta_provider], + "externalServices": services, + "users": users, + "repos": repos, + "pendingBindIDs": [], + }, + ) + + +def random_mapping_rule( + rng: random.Random, state: FixtureState, rule_number: int +) -> dict[str, Any]: + """Generate one random mapping rule referencing the generated state.""" + usernames = [user["username"] for user in state["users"]] + repo_names = [repository["name"] for repository in state["repos"]] + emails = [user["emails"][0]["email"] for user in state["users"]] + + auth_provider_matcher: dict[str, str] = {"configID": "okta"} + if rng.random() < 0.7: + auth_provider_matcher["samlGroup"] = rng.choice(PROPERTY_GROUPS) + user_filter_choices: list[tuple[str, object]] = [ + ("usernames", rng.sample(usernames, rng.randint(1, min(3, len(usernames))))), + ("usernameRegexes", [f"^user0[{rng.randint(1, 9)}-9]"]), + ("emails", rng.sample(emails, rng.randint(1, min(2, len(emails))))), + ("emailRegexes", [f"@{re.escape(rng.choice(PROPERTY_EMAIL_DOMAINS))}$"]), + ("authProvider", auth_provider_matcher), + ] + repo_filter_choices: list[tuple[str, object]] = [ + ("names", rng.sample(repo_names, rng.randint(1, min(3, len(repo_names))))), + ( + "nameRegexes", + [f"^{re.escape(rng.choice(('github.com/', 'bitbucket.test/', 'github.com/acme/')))}"], + ), + ("codeHostConnection", {"kind": rng.choice(("GITHUB", "BITBUCKETSERVER"))}), + ] + return { + "name": f"Random rule {rule_number}", + "users": dict(rng.sample(user_filter_choices, rng.randint(1, 2))), + "repos": dict(rng.sample(repo_filter_choices, rng.randint(1, 2))), + } + + +def run_set_full_in_memory( + state: FixtureState, rules: list[dict[str, Any]], maps_path: Path +) -> tuple[FixtureState, int]: + """Run the real `set --full --apply` code path against an in-memory instance. + + Backups stay enabled (redirected into the maps temp directory) so the runs + exercise the real snapshot capture and the short-circuit filter that skips + repos already at the desired state. + """ + import src_py_lib as src + import yaml + + from src_auth_perms_sync import cli + from src_auth_perms_sync.shared import backups + from tests.e2e.case_runner import FakeSourcegraphClient + + maps_path.write_text(yaml.safe_dump({"maps": rules}, sort_keys=False), encoding="utf-8") + client = FakeSourcegraphClient(state) + config = cli.Config( + src_endpoint=state["endpoint"], + src_access_token="invariant-token", + ).model_copy( + update={ + "maps_path": maps_path, + "apply": True, + "no_backup": False, + "parallelism": 1, + "full": True, + } + ) + command = cli.resolve_command("set", config) + artifacts_directory = maps_path.parent / f"artifacts-{time.monotonic_ns()}" + with ( + backups.run_artifacts_context(artifacts_directory, backups.backup_timestamp()), + ThreadPoolExecutor(max_workers=1) as worker_pool, + ): + cli.run_command(config, command, cast("src.SourcegraphClient", client), worker_pool) + return client.export_state(), client.mutation_count + + +def grant_pairs(state: FixtureState) -> set[tuple[int, str]]: + return { + (repository["id"], username) + for repository in state["repos"] + for username in repository["explicitPermissionsUsers"] + } + + +def oracle_expected_grants(state: FixtureState, rules: list[dict[str, Any]]) -> dict[int, set[str]]: + """Independently compute per-repo grants straight from the mapping layer.""" + import src_py_lib as src + + from src_auth_perms_sync.permissions import mapping + from src_auth_perms_sync.permissions import types as permission_types + from src_auth_perms_sync.shared import types as shared_types + + users = [ + cast( + "shared_types.User", + { + "id": f"user-{user['id']}", + "username": user["username"], + "builtinAuth": user["builtinAuth"], + "externalAccounts": {"nodes": list(user["externalAccounts"])}, + "emails": list(user["emails"]), + }, + ) + for user in state["users"] + ] + services_by_id = { + service["id"]: cast( + "permission_types.ExternalService", + { + "id": src.encode_sourcegraph_node_id("ExternalService", service["id"]), + "kind": service["kind"], + "displayName": service["displayName"], + "url": service["url"], + "config": service["config"], + }, + ) + for service in state["externalServices"] + } + repos_by_service: dict[int, list[permission_types.Repository]] = {} + all_repos_by_id: dict[str, permission_types.Repository] = {} + for repository in state["repos"]: + graphql_repository: permission_types.Repository = { + "id": src.encode_repository_id(repository["id"]), + "name": repository["name"], + } + repos_by_service.setdefault(repository["externalServiceID"], []).append(graphql_repository) + all_repos_by_id[graphql_repository["id"]] = graphql_repository + + expected: dict[int, set[str]] = {} + for rule in rules: + matched_users = mapping.resolve_users( + cast("permission_types.UserSelector", rule["users"]), + users, + state["authProviders"], + None, + ) + if not matched_users: + continue + matched_repos = mapping.resolve_repos( + cast("permission_types.RepositorySelector", rule["repos"]), + services_by_id, + repos_by_service, + all_repos_by_id, + ) + for repository in matched_repos: + expected.setdefault(src.decode_repository_id(repository["id"]), set()).update( + user["username"] for user in matched_users + ) + return expected + + +def check_union_across_rules(rng: random.Random, maps_path: Path) -> str: + state = random_fixture_state(rng, with_grants=False) + rule_one = random_mapping_rule(rng, state, 1) + rule_two = random_mapping_rule(rng, state, 2) + combined, _ = run_set_full_in_memory(state, [rule_one, rule_two], maps_path) + separate_one, _ = run_set_full_in_memory(state, [rule_one], maps_path) + separate_two, _ = run_set_full_in_memory(state, [rule_two], maps_path) + expected = grant_pairs(separate_one) | grant_pairs(separate_two) + actual = grant_pairs(combined) + if actual != expected: + return ( + "combined grants are not the union of per-rule grants: " + f"extra={sorted(actual - expected)[:5]} missing={sorted(expected - actual)[:5]}" + ) + return "" + + +def with_extra_user_filter( + rng: random.Random, state: FixtureState, rule: dict[str, Any] +) -> dict[str, Any] | None: + """Return the rule with one additional user filter, or None if all are taken.""" + usernames = [user["username"] for user in state["users"]] + users_selector = dict(cast("dict[str, Any]", rule["users"])) + additional = [ + choice + for choice in ( + ("usernames", [rng.choice(usernames)]), + ("usernameRegexes", ["^user0[13579]"]), + ("emails", [f"{rng.choice(usernames)}@example.com"]), + ) + if choice[0] not in users_selector + ] + if not additional: + return None + field_name, value = rng.choice(additional) + users_selector[field_name] = value + return {**rule, "users": users_selector} + + +def check_narrowing_monotonicity(rng: random.Random, maps_path: Path) -> str: + state = random_fixture_state(rng, with_grants=False) + rule = random_mapping_rule(rng, state, 1) + narrowed_rule = with_extra_user_filter(rng, state, rule) + if narrowed_rule is None: + return "" + base_state, _ = run_set_full_in_memory(state, [rule], maps_path) + narrowed_state, _ = run_set_full_in_memory(state, [narrowed_rule], maps_path) + widened = grant_pairs(narrowed_state) - grant_pairs(base_state) + if widened: + return f"adding a user filter widened the grant set: {sorted(widened)[:5]}" + return "" + + +def check_apply_idempotency(rng: random.Random, maps_path: Path) -> str: + state = random_fixture_state(rng, with_grants=True) + rules = [random_mapping_rule(rng, state, 1)] + first_state, _ = run_set_full_in_memory(state, rules, maps_path) + second_state, second_mutations = run_set_full_in_memory(first_state, rules, maps_path) + if second_mutations != 0: + return f"second identical run performed {second_mutations} mutation(s)" + if grant_pairs(second_state) != grant_pairs(first_state): + return "second identical run changed the grant set" + return "" + + +def check_oracle_equivalence(rng: random.Random, maps_path: Path) -> str: + state = random_fixture_state(rng, with_grants=True) + rules = [random_mapping_rule(rng, state, number) for number in (1, 2)] + final_state, _ = run_set_full_in_memory(state, rules, maps_path) + expected_by_repo = oracle_expected_grants(state, rules) + before_by_repo = { + repository["id"]: set(repository["explicitPermissionsUsers"]) + for repository in state["repos"] + } + for repository in final_state["repos"]: + actual_users = set(repository["explicitPermissionsUsers"]) + expected_users = expected_by_repo.get(repository["id"]) + if expected_users is None: + if actual_users != before_by_repo[repository["id"]]: + return f"unmapped repo {repository['name']} changed: {sorted(actual_users)}" + elif actual_users != expected_users: + return ( + f"repo {repository['name']}: expected {sorted(expected_users)}, " + f"got {sorted(actual_users)}" + ) + return "" + + +def run_property_checks(seed: int, iterations: int) -> list[PropertyCheckOutcome]: + checks: list[tuple[str, Callable[[random.Random, Path], str]]] = [ + ("grants for combined rules union per-rule grants", check_union_across_rules), + ("adding filters never widens the grant set", check_narrowing_monotonicity), + ("apply is idempotent", check_apply_idempotency), + ("grants match the mapping-layer oracle", check_oracle_equivalence), + ] + outcomes: list[PropertyCheckOutcome] = [] + with tempfile.TemporaryDirectory(prefix="src-auth-perms-sync-invariants-") as temporary: + maps_path = Path(temporary) / "maps.yaml" + for name, check in checks: + rng = random.Random(seed) + started = time.monotonic() + passed = True + detail = "" + for iteration in range(1, iterations + 1): + try: + failure = check(rng, maps_path) + except Exception as exception: # noqa: BLE001 - record, don't kill the suite. + failure = f"crashed: {type(exception).__name__}: {exception}" + if failure: + passed = False + detail = f"iteration {iteration} (seed {seed}): {failure}" + break + outcomes.append(PropertyCheckOutcome(name, passed, time.monotonic() - started, detail)) + return outcomes + + +# --------------------------------------------------------------------------- +# Live fixture-case helpers: identity translation, seed/cleanup snapshots +# --------------------------------------------------------------------------- + +RESTORE_SUCCESS_MARKERS = ( + "VALIDATION OK", + "Restore done", + "Nothing to restore", +) +EXACT_REPOSITORY_SELECTOR_FIELDS = {"names"} + + +def fixture_grants(case_name: str, file_name: str) -> dict[str, set[str]] | None: + """Return {repo name: usernames} from one fixture state file.""" + path = FIXTURES_DIR / case_name / file_name + if not path.is_file(): + return None + state = cast("dict[str, Any]", json.loads(path.read_text(encoding="utf-8"))) + return { + cast(str, repository["name"]): set( + cast("list[str]", repository["explicitPermissionsUsers"]) + ) + for repository in cast("list[dict[str, Any]]", state["repos"]) + } + + +def fixture_maps_repo_scope( + case_name: str, has_declared_repository_names: bool +) -> tuple[set[str], str]: + """Return (exact repo names used by rules, error). + + Mutating instance runs must be able to enumerate every repo a rule can + touch, so they capture and restore exactly that set. Exact `names:` + selectors enumerate themselves; any other repo selector (regexes, + code-host matchers) requires the case to declare `live.involvedRepos` + covering everything the selector can match — undeclared matches are + mutated without restore and only detected by the canary checks. + + User-side selectors are unrestricted: whatever users a rule matches, the + mutations stay confined to the involved repos, and the post-run state + verification catches wrong user matching. + """ + import yaml + + maps_text = (FIXTURES_DIR / case_name / "maps.yaml").read_text(encoding="utf-8") + loaded = cast("dict[str, Any]", yaml.safe_load(maps_text)) + rule_repository_names: set[str] = set() + for rule in cast("list[dict[str, Any]]", loaded.get("maps") or []): + repository_selector = cast("dict[str, Any]", rule.get("repos") or {}) + non_exact_fields = sorted(set(repository_selector) - EXACT_REPOSITORY_SELECTOR_FIELDS) + if non_exact_fields and not has_declared_repository_names: + return ( + rule_repository_names, + f"rule {rule.get('name')!r} uses non-exact repo selectors " + f"{non_exact_fields}; declare live.involvedRepos covering every repo " + f"they can match, or use exact names", + ) + rule_repository_names.update(cast("list[str]", repository_selector.get("names") or [])) + return (rule_repository_names, "") + + +def write_state_snapshot( + path: Path, endpoint: str, grants: dict[str, tuple[int, list[str]]] +) -> None: + """Write a restore-compatible snapshot file describing exact repo states.""" + repos = { + str(repository_id): {"name": repository_name, "users": usernames} + for repository_name, (repository_id, usernames) in sorted(grants.items()) + } + users_with_grants = {username for _, usernames in grants.values() for username in usernames} + snapshot: dict[str, Any] = { + "schema_version": 5, + "captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"), + "endpoint": endpoint, + "bindID_mode": "USERNAME", + "config_file": None, + "config_sha256": None, + "pending_bindIDs": [], + "stats": { + "total_users_scanned": len(users_with_grants), + "users_with_explicit_grants": len(users_with_grants), + "repos_with_explicit_grants": sum(1 for _, usernames in grants.values() if usernames), + "total_grants": sum(len(usernames) for _, usernames in grants.values()), + }, + "repos": repos, + } + path.write_text(json.dumps(snapshot, indent=2) + "\n", encoding="utf-8") + + +def restore_arguments(snapshot_path: Path) -> tuple[str, ...]: + # Parallelism 8: the dominant cost of seed/cleanup restores is the full + # explicit-permissions capture (10k users in batches), which serializes + # painfully at parallelism 1; the mutation counts here are tiny. + return ( + "restore", + "--restore-path", + str(snapshot_path), + "--apply", + "--no-backup", + "--parallelism", + "8", + ) + + +def decode_repository_node_id(graphql_id: str) -> int: + """Decode a base64 GraphQL Repository node ID to its integer database ID.""" + decoded = base64.b64decode(graphql_id, validate=True).decode() + kind, _, database_id = decoded.partition(":") + if kind != "Repository": + raise ValueError(f"not a Repository node ID: {decoded!r}") + return int(database_id) + + +def mutations_succeeded_from_log(log_path: Path | None) -> int | None: + """Return the last mutations_succeeded count from a run's structured log.""" + if log_path is None or not log_path.is_file(): + return None + succeeded: int | None = None + with log_path.open(encoding="utf-8") as log_file: + for line in log_file: + if '"mutations_succeeded"' not in line: + continue + try: + record = cast("dict[str, Any]", json.loads(line)) + except json.JSONDecodeError: + continue + value = record.get("mutations_succeeded") + if isinstance(value, int): + succeeded = value + return succeeded + + +def strip_iteration_suffix(case_name: str) -> str: + return re.sub(r" \[\d+\]$", "", case_name) + + +def performance_median( + rows: list[dict[str, object]], case_name: str, variant_name: str, column: str +) -> float | None: + values = [ + float(value) + for row in rows + if row["case"] == case_name and row["variant"] == variant_name + for value in (row.get(column),) + if isinstance(value, (int, float)) and not isinstance(value, bool) + ] + return statistics.median(values) if values else None + + +# --------------------------------------------------------------------------- +# Jaeger trace collection (performance level) +# +# After each performance case, the CLI run's structured log is scanned for +# GraphQL requests with Sourcegraph trace metadata (the CLI is run with +# --fetch-sg-traces so the server retains traces). The slowest traces are +# fetched from Sourcegraph's Jaeger API and written to the test run directory: +# summaries to jaeger-trace-summaries.jsonl, full traces under jaeger-traces/. +# --------------------------------------------------------------------------- + +JAEGER_INITIAL_DELAY_SECONDS = 15.0 +JAEGER_RETRY_DELAYS_SECONDS = (5.0, 10.0, 20.0, 30.0, 60.0) +JAEGER_FETCH_PARALLELISM = 4 + + +def string_headers(headers: object) -> dict[str, str]: + if not isinstance(headers, dict): + return {} + values: dict[str, str] = {} + for header_name, value in cast("dict[object, object]", headers).items(): + if not isinstance(header_name, str): + continue + if isinstance(value, str): + values[header_name] = value + elif isinstance(value, list): + string_values = [item for item in cast("list[object]", value) if isinstance(item, str)] + if string_values: + values[header_name] = string_values[0] + return values + + +def header_value(headers: dict[str, str], name: str) -> str | None: + lower_name = name.lower() + for header_name, value in headers.items(): + if header_name.lower() == lower_name: + return value + return None + + +def trace_id_from_traceparent(traceparent: str | None) -> str | None: + if traceparent is None: + return None + parts = traceparent.split("-") + if len(parts) != 4: + return None + trace_id = parts[1] + if len(trace_id) != 32 or not all(character in "0123456789abcdef" for character in trace_id): + return None + return trace_id + + +def graphql_trace_request_from_record(record: dict[str, Any]) -> dict[str, Any] | None: + """Return Sourcegraph trace metadata from one structured http_request record.""" + import src_py_lib as src + from src_py_lib.clients.sourcegraph import sourcegraph_trace_from_headers + + if record.get("event") != "http_request" or record.get("phase") != "end": + return None + if not str(record.get("url", "")).endswith("/.api/graphql"): + return None + request_headers = string_headers(record.get("request_headers")) + response_headers = string_headers(record.get("response_headers")) + trace = sourcegraph_trace_from_headers(response_headers, request_headers) + if trace is None: + trace_id = trace_id_from_traceparent(header_value(request_headers, "traceparent")) + if trace_id is None: + return None + trace = src.SourcegraphTrace( + trace_id=trace_id, + trace_url=header_value(response_headers, "x-trace-url"), + ) + return trace.to_json() | { + "duration_ms": float_field(record, "duration_ms") or 0.0, + "timestamp": record.get("ts"), + "status": record.get("status"), + "status_code": record.get("status_code"), + "error_type": record.get("error_type"), + } + + +def trace_requests_from_log(log_path: Path, limit: int) -> list[dict[str, Any]]: + """Return the slowest unique GraphQL trace requests from one run log.""" + requests_by_trace_id: dict[str, dict[str, Any]] = {} + with log_path.open(encoding="utf-8") as log_file: + for line in log_file: + if not line.strip(): + continue + try: + record = cast("dict[str, Any]", json.loads(line)) + except json.JSONDecodeError: + continue + trace_request = graphql_trace_request_from_record(record) + if trace_request is None: + continue + trace_id = str(trace_request["trace_id"]) + existing = requests_by_trace_id.get(trace_id) + if existing is None or cast(float, trace_request["duration_ms"]) > cast( + float, existing["duration_ms"] + ): + requests_by_trace_id[trace_id] = trace_request + slowest_first = sorted( + requests_by_trace_id.values(), + key=lambda trace_request: cast(float, trace_request["duration_ms"]), + reverse=True, + ) + return slowest_first[:limit] + + +class JaegerTraceFetcher: + """Fetch the slowest Sourcegraph Jaeger traces for each performance case.""" + + def __init__(self, endpoint: str, access_token: str, artifact_prefix: Path, limit: int) -> None: + import src_py_lib as src + + self.limit = limit + self.summaries_path = with_suffix_name(artifact_prefix, "-jaeger-traces.jsonl") + self.traces_directory = with_suffix_name(artifact_prefix, "-jaeger-traces") + http = src.HTTPClient( + user_agent="src-auth-perms-sync-tests/0.1 (+python)", + max_attempts=1, + max_connections=JAEGER_FETCH_PARALLELISM, + ) + self._client = src.SourcegraphClient(endpoint=endpoint, token=access_token, http=http) + + def collect_for_run(self, case_label: str, log_path: Path) -> tuple[int, int]: + """Fetch traces for one run. Returns (fetched, requested).""" + if not log_path.is_file(): + return (0, 0) + trace_requests = trace_requests_from_log(log_path, self.limit) + if not trace_requests: + return (0, 0) + log.info( + "Fetching %d slowest Jaeger trace(s) for %s (waiting %.0fs for trace ingestion) ...", + len(trace_requests), + case_label, + JAEGER_INITIAL_DELAY_SECONDS, + ) + time.sleep(JAEGER_INITIAL_DELAY_SECONDS) + fetched = 0 + + def fetch_one(trace_request: dict[str, Any]) -> dict[str, Any]: + return self._fetch_one(case_label, trace_request) + + with ThreadPoolExecutor(max_workers=JAEGER_FETCH_PARALLELISM) as fetch_pool: + summaries = list(fetch_pool.map(fetch_one, trace_requests)) + for summary in summaries: + if summary.get("jaeger_found") is True: + fetched += 1 + self._append_summary(summary) + self._log_summary(summary) + return (fetched, len(trace_requests)) + + def _fetch_one(self, case_label: str, trace_request: dict[str, Any]) -> dict[str, Any]: + import src_py_lib as src + from src_py_lib.clients.sourcegraph import summarize_jaeger_trace + + trace = src.SourcegraphTrace( + trace_id=str(trace_request["trace_id"]), + span_id=optional_string(trace_request.get("span_id")), + trace_url=optional_string(trace_request.get("trace_url")), + parent_trace_id=optional_string(trace_request.get("parent_trace_id")), + parent_span_id=optional_string(trace_request.get("parent_span_id")), + ) + try: + jaeger_trace = self._client.fetch_jaeger_trace( + trace.trace_id, + retry_delays_seconds=JAEGER_RETRY_DELAYS_SECONDS, + ) + summary = summarize_jaeger_trace(trace, jaeger_trace).to_json() + trace_path = self._write_complete_trace(case_label, trace_request, jaeger_trace) + if trace_path is not None: + summary["jaeger_trace_path"] = str(trace_path) + return trace_request | summary | {"case": case_label} + except Exception as exception: # noqa: BLE001 - keep evidence collection alive. + return trace_request | { + "case": case_label, + "jaeger_found": False, + "error": f"{type(exception).__name__}: {exception}", + } + + def _write_complete_trace( + self, case_label: str, trace_request: dict[str, Any], jaeger_trace: dict[str, Any] + ) -> Path | None: + trace_id = str(trace_request["trace_id"]) + path = self.traces_directory / case_label / f"{trace_id}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps( + {"trace_request": trace_request, "jaeger_trace": jaeger_trace}, + indent=2, + sort_keys=True, + ) + + "\n", + encoding="utf-8", + ) + return path + + def _append_summary(self, summary: dict[str, Any]) -> None: + self.summaries_path.parent.mkdir(parents=True, exist_ok=True) + with self.summaries_path.open("a", encoding="utf-8") as summaries_file: + summaries_file.write(json.dumps(summary, sort_keys=True, default=str) + "\n") + + def _log_summary(self, summary: dict[str, Any]) -> None: + duration_ms = float(cast("int | float", summary.get("duration_ms") or 0)) + if summary.get("jaeger_found") is not True: + log.info(" %0.0fms %s: %s", duration_ms, summary.get("trace_id"), summary.get("error")) + return + log.info( + " %0.0fms %s: %s span(s)", + duration_ms, + summary.get("trace_id"), + summary.get("span_count", 0), + ) + + +def optional_string(value: object) -> str | None: + return value if isinstance(value, str) else None + + +# --------------------------------------------------------------------------- +# Sourcegraph load monitor (performance level, optional) +# +# Python port of dev/memory-efficiency-monitor-sourcegraph.sh: samples +# Sourcegraph pod and Postgres load via kubectl while performance cases run. +# --------------------------------------------------------------------------- + +POSTGRES_ACTIVITY_SQL = """ +select + pid, + now() - query_start as age, + state, + wait_event_type, + wait_event, + left(query, 220) as query +from pg_stat_activity +where state <> 'idle' +order by age desc +limit 30; + +select + wait_event_type, + wait_event, + state, + count(*) +from pg_stat_activity +group by 1,2,3 +order by count(*) desc; + +select + locktype, + mode, + granted, + count(*) +from pg_locks +group by 1,2,3 +order by count(*) desc; +""" + +POSTGRES_STATEMENTS_SETUP_SQL = """ +select current_database(), current_user; +show shared_preload_libraries; +show track_io_timing; +create extension if not exists pg_stat_statements; +select pg_stat_statements_reset(); +""" + +POSTGRES_STATEMENTS_SQL = """ +select + calls, + round(total_exec_time::numeric, 1) as total_ms, + round(mean_exec_time::numeric, 1) as mean_ms, + rows, + left(query, 260) as query +from pg_stat_statements +order by total_exec_time desc +limit 25; +""" + +POD_PROCESS_SAMPLE_SCRIPT = """ +echo "--- top CPU ---" +ps auxww | sort -nrk3 | head -30 +echo "--- top RSS ---" +ps auxww | sort -nrk4 | head -30 +""" + + +class SourcegraphLoadMonitor: + """Sample Sourcegraph pod and Postgres load via kubectl in background threads.""" + + def __init__(self, arguments: TestArguments, output_directory: Path) -> None: + self.arguments = arguments + self.output_directory = output_directory + self._stop = threading.Event() + self._threads: list[threading.Thread] = [] + + def start(self) -> None: + self.output_directory.mkdir(parents=True, exist_ok=True) + log.info("Starting Sourcegraph load monitor: %s", self.output_directory) + self._run_psql("postgres-statements-setup.log", POSTGRES_STATEMENTS_SETUP_SQL) + self._snapshot_pod_descriptions() + samplers: list[tuple[str, float, Callable[[], None]]] = [ + ("kubectl-top", self.arguments.monitor_interval_seconds, self._sample_kubectl_top), + ("processes", self.arguments.monitor_interval_seconds, self._sample_pod_processes), + ( + "postgres-activity", + self.arguments.monitor_postgres_interval_seconds, + self._sample_postgres_activity, + ), + ( + "postgres-statements", + self.arguments.monitor_statements_interval_seconds, + self._sample_postgres_statements, + ), + ] + for name, interval_seconds, sample in samplers: + thread = threading.Thread( + target=self._loop, + args=(float(interval_seconds), sample), + name=f"SourcegraphLoadMonitor-{name}", + daemon=True, + ) + thread.start() + self._threads.append(thread) + + def stop(self) -> None: + self._stop.set() + for thread in self._threads: + thread.join(timeout=10.0) + self._snapshot_pod_descriptions() + log.info("Stopped Sourcegraph load monitor. Output: %s", self.output_directory) + + def _loop(self, interval_seconds: float, sample: Callable[[], None]) -> None: + while not self._stop.is_set(): + sample() + if self._stop.wait(interval_seconds): + return + + def _append(self, file_name: str, title: str, text: str) -> None: + timestamp = datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds") + with (self.output_directory / file_name).open("a", encoding="utf-8") as output_file: + output_file.write(f"\n===== {timestamp} {title} =====\n{text}") + + def _run_capture(self, command: list[str], stdin_text: str | None = None) -> str: + try: + completed = subprocess.run( + command, + input=stdin_text, + capture_output=True, + text=True, + timeout=60, + check=False, + ) + except (OSError, subprocess.SubprocessError) as error: + return f"\n" + return completed.stdout + completed.stderr + + def _kubectl(self, *kubectl_arguments: str) -> list[str]: + return ["kubectl", "-n", self.arguments.monitor_namespace, *kubectl_arguments] + + def _sample_kubectl_top(self) -> None: + output = self._run_capture(self._kubectl("top", "pods", "--containers")) + self._append("kubectl-top-pods-containers.log", "kubectl top pods --containers", output) + + def _sample_pod_processes(self) -> None: + for label, target in ( + ("frontend", self.arguments.monitor_frontend_target), + ("postgres", self.arguments.monitor_postgres_target), + ): + output = self._run_capture( + self._kubectl("exec", target, "--", "sh", "-lc", POD_PROCESS_SAMPLE_SCRIPT) + ) + self._append(f"{label}-processes.log", f"{target} process CPU/RSS", output) + + def _run_psql(self, file_name: str, sql: str) -> None: + output = self._run_capture( + self._kubectl( + "exec", + "-i", + self.arguments.monitor_postgres_target, + "--", + "sh", + "-lc", + f"{self.arguments.monitor_psql_command} -P pager=off", + ), + stdin_text=sql, + ) + self._append(file_name, "psql", output) + + def _sample_postgres_activity(self) -> None: + self._run_psql("postgres-activity.log", POSTGRES_ACTIVITY_SQL) + + def _sample_postgres_statements(self) -> None: + self._run_psql("postgres-statements.log", POSTGRES_STATEMENTS_SQL) + + def _snapshot_pod_descriptions(self) -> None: + for target in ( + self.arguments.monitor_frontend_target, + self.arguments.monitor_postgres_target, + ): + output = self._run_capture(self._kubectl("describe", target)) + self._append("pod-descriptions.log", f"kubectl describe {target}", output) + + +def main() -> None: + arguments = parse_arguments() + stamp = datetime.datetime.now(datetime.UTC).strftime("%Y%m%d-%H%M%S") + artifact_prefix = TEST_LOGS_DIR / f"{stamp}-{arguments.level}" + log_path = with_suffix_name(artifact_prefix, ".log") + configure_logging(log_path, quiet=arguments.quiet) + if arguments.quiet: + # The console only shows warnings and failures in quiet mode; the log + # file path must stay visible. + print(f"Writing test output to {log_path}") + log.info("Writing test output to %s", log_path) + + suite = TestSuite(arguments=arguments, artifact_prefix=artifact_prefix) + + if arguments.update_golden: + suite.run_fixture_checks(update_golden=True) + log.info("\nGolden files regenerated. Review `git diff tests/e2e/fixtures/` carefully.") + sys.exit(suite.print_summary()) + + if arguments.level == "local": + suite.run_toolchain_gates() + suite.run_fixture_checks(update_golden=False) + suite.run_property_checks() + elif arguments.level == "live": + suite.run_live() + else: + suite.run_performance() + + exit_code = suite.print_summary() + log.info("Full log: %s", log_path) + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/tests/tests.yaml b/tests/tests.yaml new file mode 100644 index 0000000..13cb5c0 --- /dev/null +++ b/tests/tests.yaml @@ -0,0 +1,670 @@ +# Registry of end-to-end test cases. +# +# Each key under `cases:` may have a matching directory in tests/e2e/fixtures/ +# holding the case's state files. The directory is only required when the +# case actually uses files: +# before.json instance state before the run (providers, services, users, +# repos with explicitPermissionsUsers). Required for local +# mode and for mutating (--apply) live/performance runs. +# maps.yaml the mapping rules under test. Required for set commands +# that do not pass their own --maps-path. +# after.json expected state after the run (omit when state must NOT +# change: no-op cases and expected-error cases) +# A read-only non-set command (e.g. `get --users x`) needs no directory. +# +# Case fields: +# description What the case proves. +# modes Where the case runs: local, live, performance. +# local run the CLI in-process against an +# in-memory instance built from before.json; +# assert the full resulting state +# live run cliCommand against the .env test +# instance. Read-only commands just assert +# exit code and expectedErrors. Mutating +# commands (--apply) first seed the +# before.json state onto the involved +# repos, run, verify the result with an +# independent GraphQL read-back, and +# finally restore the original state. +# Live cases must use REAL instance +# users/repos in their fixture files, and +# exact selectors only (users: +# usernames/emails, repos: names). +# performance same as live, but timed and measured +# (traces, RSS sampling, TSV row) +# cliCommand CLI arguments to run, parsed by the real argument +# parser. For set commands, --maps-path /maps.yaml +# is appended automatically when not supplied. +# importConfig Optional. Also run the case through the Python import +# API (cli.Config + cli.resolve_command + cli.run_command) +# like a library consumer would. Keys are Config field +# names plus `command`; maps_path defaults to the case +# directory's maps.yaml for set commands. +# expectedMutations Exact number of permission mutations the run must make. +# expectedErrors The command must FAIL, every substring must appear in +# the failure, and state must be unchanged. +# live.involvedRepos For mutating instance runs: extra repos to +# capture, seed, verify, and restore beyond the fixture +# state and exact rule names. Required when rules use +# non-exact repo selectors (regexes): declare every repo +# the selector can match. Involved repos absent from +# after.json are canaries that must read back unchanged. +# live.usersWithoutOtherGrants Preflight: these users must hold no +# explicit grants outside the involved repos, making +# globally-scoped modes (--users-without-explicit-perms) +# deterministic. +# expectedExitCode Either of these makes the case replay-style: assert the +# expectedOutput exit code and output substrings instead of instance +# state. Locally, replay cases run the real argument +# parser in-process and need no fixture files. +# +# `{user}` in a cliCommand resolves to the configured --user on the test +# instance (live/performance modes only). + +cases: + + # ── Local parse replays: argument validation, in-process, no files (fastest) ── + reject-bare-invocation: + description: A bare invocation without a command prints usage and exits 2. + modes: + - local + cliCommand: "" + expectedExitCode: 2 + expectedOutput: + - "the following arguments are required: COMMAND" + + reject-unknown-command: + description: An unknown subcommand is rejected with the valid choices. + modes: + - local + cliCommand: bogus + expectedExitCode: 2 + expectedOutput: + - "invalid choice: 'bogus'" + + reject-two-commands: + description: Two subcommands in one invocation are rejected. + modes: + - local + cliCommand: get set + expectedExitCode: 2 + expectedOutput: + - unrecognized arguments + + reject-get-apply: + description: get is read-only; --apply is rejected. + modes: + - local + cliCommand: get --apply + expectedExitCode: 2 + expectedOutput: + - unrecognized arguments + + reject-get-full: + description: --full requires the set command. + modes: + - local + cliCommand: get --full + expectedExitCode: 2 + expectedOutput: + - unrecognized arguments + + reject-malformed-date: + description: dates must match YYYY-MM-DD before any network call. + modes: + - local + cliCommand: get --created-after 2026-1-01 + expectedExitCode: 2 + expectedOutput: + - string_pattern_mismatch + + reject-user-filter-conflict: + description: user filters are mutually exclusive. + modes: + - local + cliCommand: get --users test_user_09991 --users-without-explicit-perms + expectedExitCode: 2 + expectedOutput: + - choose only one of --users + + reject-user-and-repo-filters: + description: user filters and repo filters cannot be combined. + modes: + - local + cliCommand: get --users test_user_09991 --repos test-repo-49981 + expectedExitCode: 2 + expectedOutput: + - choose either user filters or repo filters + + reject-repo-filter-conflict: + description: repo filters are mutually exclusive. + modes: + - local + cliCommand: get --repos test-repo-49981 --repos-without-explicit-perms + expectedExitCode: 2 + expectedOutput: + - choose only one of --repos + + reject-repos-created-after-malformed: + description: repo creation dates must match YYYY-MM-DD at parse time. + modes: + - local + cliCommand: get --repos-created-after 2026-1-01 + expectedExitCode: 2 + expectedOutput: + - string_pattern_mismatch + + reject-get-removed-repositories-created-after: + description: >- + The removed --repositories-created-after spelling stays removed; the + flag is --repos-created-after. + modes: + - local + cliCommand: get --repositories-created-after 2026-01-01 + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --repositories-created-after" + + reject-verbosity-conflict: + description: the --verbose and --quiet log-level aliases are mutually exclusive. + modes: + - local + cliCommand: get --users username_doesnt_exist_01 --verbose --quiet + expectedExitCode: 2 + expectedOutput: + - choose only one of --verbose/-v, --quiet/-q, or --silent/-s + + reject-bare-set: + description: set requires an explicit mode flag. + modes: + - local + cliCommand: set + expectedExitCode: 2 + expectedOutput: + - set requires one of --full + + reject-set-full-and-users: + description: set modes are mutually exclusive. + modes: + - local + cliCommand: set --full --users test_user_09991 + expectedExitCode: 2 + expectedOutput: + - choose at most one + + reject-set-user-filter-conflict: + description: set user filters are mutually exclusive. + modes: + - local + cliCommand: set --users test_user_09991 --users-without-explicit-perms + expectedExitCode: 2 + expectedOutput: + - choose only one of --users + + reject-set-full-and-created-after: + description: full overwrite cannot be combined with the additive date filter. + modes: + - local + cliCommand: set --full --created-after 2099-01-01 + expectedExitCode: 2 + expectedOutput: + - "--full cannot be combined with --created-after" + + reject-set-restore-path: + description: "--restore-path belongs to restore; set does not accept it." + modes: + - local + cliCommand: set --restore-path definitely-missing.json + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --restore-path" + + reject-bare-restore: + description: restore requires a snapshot path. + modes: + - local + cliCommand: restore + expectedExitCode: 2 + expectedOutput: + - restore requires --restore-path + + reject-restore-with-users: + description: restore does not take user filters. + modes: + - local + cliCommand: restore --restore-path definitely-missing.json --users test_user_09991 + expectedExitCode: 2 + expectedOutput: + - unrecognized arguments + + reject-restore-repos: + description: restore does not take repo filters. + modes: + - local + cliCommand: restore --restore-path definitely-missing.json --repos test-repo-49981 + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --repos" + + reject-restore-sync-saml-orgs: + description: "--sync-saml-orgs belongs to set; restore does not accept it." + modes: + - local + cliCommand: restore --restore-path definitely-missing.json --sync-saml-orgs + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --sync-saml-orgs" + + reject-sync-saml-orgs-created-after: + description: sync-saml-orgs does not take user filters. + modes: + - local + cliCommand: sync-saml-orgs --created-after 2099-01-01 + expectedExitCode: 2 + expectedOutput: + - unrecognized arguments + + reject-sync-saml-orgs-users: + description: sync-saml-orgs does not take a user list. + modes: + - local + cliCommand: sync-saml-orgs --users username_doesnt_exist_01 + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --users" + + reject-sync-saml-orgs-full: + description: "--full belongs to set; sync-saml-orgs does not accept it." + modes: + - local + cliCommand: sync-saml-orgs --full + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --full" + + reject-sync-saml-orgs-restore-path: + description: "--restore-path belongs to restore; sync-saml-orgs does not accept it." + modes: + - local + cliCommand: sync-saml-orgs --restore-path definitely-missing.json + expectedExitCode: 2 + expectedOutput: + - "unrecognized arguments: --restore-path" + + # ── Local state cases: full CLI runs against an in-memory instance ── + and-filters-intersect: + description: >- + Multiple user filters AND together: both users are in the SAML group, + but only test_user_09991 matches the email filter. + modes: + - local + cliCommand: set --full --apply --no-backup --parallelism 1 + expectedMutations: 1 + + regex-filters-scope: + description: >- + Email and repo-name regex filters scope grants to matching users and + repos only. Live, the declared involvedRepos cover the full closed + regex range; the unmatched repos in it are canaries that must read + back unchanged, which catches a widened repo regex. + modes: + - local + - live + - performance + cliCommand: set --full --apply --no-backup --parallelism 1 + live: + involvedRepos: + - test-repo-49980 + - test-repo-49981 + - test-repo-49982 + - test-repo-49983 + - test-repo-49984 + - test-repo-49985 + - test-repo-49986 + - test-repo-49987 + - test-repo-49988 + - test-repo-49989 + expectedMutations: 2 + + saml-group-filter: + description: >- + authProvider samlGroup filter grants Bitbucket repos only to users whose + SAML assertion includes the group. + modes: + - local + cliCommand: set --full --apply --no-backup --parallelism 1 + expectedMutations: 2 + + set-users-created-after: + description: >- + createdAfter mode additively grants mapped repos to users created + on/after the date, preserving existing grants. + modes: + - local + cliCommand: >- + set --created-after 2026-02-01 --apply --no-backup --parallelism 1 + + importConfig: + command: set + created_after: "2026-02-01" + apply: true + no_backup: true + parallelism: 1 + expectedMutations: 4 + + set-users-without-explicit-perms: + description: >- + --users-without-explicit-perms additively grants mapped repos only to + users who currently hold no explicit grants anywhere. Live, the + preflight asserts the named users hold no grants outside the involved + repos, which makes the selection deterministic. Also pins + --explicit-permissions-batch-size by forcing the smallest batch. + modes: + - local + - live + - performance + live: + usersWithoutOtherGrants: + - test_user_09961 + - test_user_09962 + cliCommand: >- + set --users-without-explicit-perms --apply --no-backup --parallelism 1 + --explicit-permissions-batch-size 1 + expectedMutations: 2 + + set-repos-without-explicit-perms: + description: >- + --repos-without-explicit-perms overwrites only repos that currently + have no explicit grants. Runs in the performance tier on the instance + because the mode requires a full before-snapshot (a 10k-user scan). + modes: + - local + - performance + cliCommand: set --repos-without-explicit-perms --apply --no-backup --parallelism 1 + expectedMutations: 1 + + set-repos-created-after: + description: >- + --repos-created-after scopes the overwrite to repos created on/after + the date. + modes: + - local + cliCommand: set --repos-created-after 2026-02-01 --apply --no-backup --parallelism 1 + expectedMutations: 1 + + # ── Local + live: expected-error cases, replayed read-only on the instance ── + invalid-bad-regex: + description: >- + An invalid Python regex in a filter is rejected by structural validation + before any mutation. + modes: + - local + - live + cliCommand: set --full + expectedMutations: 0 + expectedErrors: + - is not a valid Python regex + + invalid-missing-repos-section: + description: >- + A rule without a repos section is rejected by structural validation + before any mutation. + modes: + - local + - live + cliCommand: set --full + expectedMutations: 0 + expectedErrors: + - "`repos:` section is missing" + + invalid-unknown-selector-field: + description: >- + A typo'd selector field is rejected by structural validation before any + mutation. + modes: + - local + - live + cliCommand: set --full + expectedMutations: 0 + expectedErrors: + - unknown users field 'userNames' + + invalid-set-created-after-date: + description: >- + An impossible calendar date passes the YYYY-MM-DD shape check but set + rejects it post-parse, before any mutation. + modes: + - local + - live + cliCommand: set --created-after 2026-02-31 + expectedMutations: 0 + expectedErrors: + - "--created-after must use YYYY-MM-DD" + + invalid-set-repos-created-after-date: + description: >- + An impossible calendar date passes the YYYY-MM-DD shape check but the + repo-scoped set rejects it post-parse, before any mutation. + modes: + - local + - live + cliCommand: set --repos-created-after 2026-02-31 + expectedMutations: 0 + expectedErrors: + - "--repos-created-after must use YYYY-MM-DD" + + restore-missing-file: + description: >- + restore with a snapshot path that does not exist fails without changing + any state. + modes: + - local + - live + cliCommand: restore --restore-path definitely-missing-restore.json + expectedMutations: 0 + expectedErrors: + - "restore snapshot file does not exist" + + # ── Local + live: mutating cases, seeded and restored on the instance ── + no-match-noop: + description: >- + A rule matching no users produces zero mutations and leaves existing + grants untouched. + modes: + - local + - live + cliCommand: set --full --apply --no-backup --parallelism 1 + expectedMutations: 0 + + set-repos-filter: + description: >- + --repos scopes the full overwrite to the listed repos; other mapped + repos keep their existing grants. Also pins --src-log-level DEBUG: + maximum verbosity must not change the run's behavior. + modes: + - local + - live + - performance + cliCommand: >- + set --repos test-repo-49971 --apply --no-backup --parallelism 1 + --src-log-level DEBUG + expectedMutations: 1 + + add-users-preserves-existing: + description: >- + Additive --users mode grants mapped repos to one user without dropping + existing repo users. + modes: + - local + - live + cliCommand: set --users test_user_09992 --apply --no-backup --parallelism 1 + importConfig: + command: set + users: + - test_user_09992 + apply: true + no_backup: true + parallelism: 1 + expectedMutations: 2 + + full-overwrite-removes-stale-grant: + description: >- + Full set mode overwrites a mapped repo's explicit users, removing grants + that no rule justifies. + modes: + - local + - live + cliCommand: set --full --apply --no-backup --parallelism 1 + expectedMutations: 1 + + # ── Live only: real-instance validation and organization sync ── + invalid-created-after-date: + description: >- + An impossible calendar date passes the YYYY-MM-DD shape check but is + rejected by date validation. + modes: + - live + cliCommand: get --created-after 2026-02-31 + expectedErrors: + - "--created-after must use YYYY-MM-DD" + + invalid-missing-maps-file: + description: >- + A maps path that does not exist is rejected with a pointer to the + command that creates the default maps file. Also pins the --quiet + alias: the error must reach the operator even at WARNING verbosity. + modes: + - live + cliCommand: set --full --maps-path definitely-missing-maps.yaml --quiet + expectedErrors: + - set input file does not exist + + get-created-after-future: + description: >- + A far-future --created-after selects no users on the real instance. + modes: + - live + cliCommand: get --created-after 2099-01-01 + expectedExitCode: 0 + expectedOutput: + - Selected 0 user(s) for get output. + + get-user-created-after-future: + description: >- + --users combined with a far-future --created-after filters the named + user out of the selection. + modes: + - live + cliCommand: get --users {user} --created-after 2099-01-01 + expectedExitCode: 0 + expectedOutput: + - no user metadata selected + + get-users-without-perms-created-after-future: + description: >- + --users-without-explicit-perms combined with a far-future + --created-after selects no users. + modes: + - live + cliCommand: get --users-without-explicit-perms --created-after 2099-01-01 + expectedExitCode: 0 + expectedOutput: + - Selected 0 user(s) for get output. + + get-repos-filter: + description: get scoped to one repo by exact name. + modes: + - live + cliCommand: get --repos test-repo-49981 + expectedExitCode: 0 + expectedOutput: + - Selected 1 repo(s) by exact name. + + get-repos-created-after-future: + description: >- + A far-future --repos-created-after selects no repos. Also pins the + --verbose alias: DEBUG verbosity must not hide the INFO summary. + modes: + - live + cliCommand: get --repos-created-after 2099-01-01 --verbose + expectedExitCode: 0 + expectedOutput: + - Selected 0 Sourcegraph repo(s) created on or after 2099-01-01. + + set-users-created-after-noop: + description: >- + A far-future --created-after selects no users on the real instance: + zero mutations, seeded state untouched. + modes: + - live + - performance + cliCommand: set --created-after 2099-01-01 --apply --no-backup --parallelism 1 + expectedMutations: 0 + + set-repos-created-after-noop: + description: >- + A far-future --repos-created-after selects no repos on the real + instance: zero mutations, seeded state untouched. + modes: + - live + - performance + cliCommand: set --repos-created-after 2099-01-01 --apply --no-backup --parallelism 1 + expectedMutations: 0 + + sync-saml-orgs-dry-run: + description: >- + Standalone organization sync dry run. Also pins the explicit + --env-file flag against its default value. + modes: + - live + cliCommand: sync-saml-orgs --env-file .env + expectedExitCode: 0 + expectedOutput: + - Dry run complete + + set-users-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, user-scoped, dry run + only. + modes: + - live + cliCommand: set --users {user} --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete + + sync-saml-orgs-apply: + description: >- + Org membership sync converges to SAML group data and validates its own + outcome; it is safe to re-run. + modes: + - live + cliCommand: sync-saml-orgs --apply + expectedExitCode: 0 + expectedOutput: + - "VALIDATION OK: all target org memberships match" + + # ── Live + performance: timed, measured runs (slowest) ── + full-overwrite-unions: + description: >- + Full set mode unions users across rules, overwrites mapped repos, and + leaves unmapped repos alone. + modes: + - local + - live + - performance + cliCommand: set --full --apply --no-backup --parallelism 1 + importConfig: + command: set + full: true + apply: true + no_backup: true + parallelism: 1 + expectedMutations: 2 + + get-user-baseline: + description: Timed read-only baseline of one user's explicit grants. + modes: + - performance + cliCommand: get --users {user} + expectedExitCode: 0 \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index c9c7270..143640a 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1 +1,11 @@ """Fast unit tests for pure helpers.""" + +import logging + +# Tests exercise failure paths that emit operator-facing WARNING/ERROR logs +# (e.g. "FAIL ..."). Without a handler, logging.lastResort prints them +# to stderr, where they masquerade as real failures in test-runner output. A +# log line saying FAIL must only ever mean a test actually failed. Installed +# here (not only in tests/__init__.py) because unittest discovery imports +# these subpackages as top-level packages, skipping the parent package. +logging.getLogger().addHandler(logging.NullHandler()) diff --git a/tests/unit/test_cli_config.py b/tests/unit/test_cli_config.py index ffca6b2..646c472 100644 --- a/tests/unit/test_cli_config.py +++ b/tests/unit/test_cli_config.py @@ -559,7 +559,7 @@ def test_run_fields_include_command_arguments_without_command_duplicates(self) - self.assertEqual(25, fields["explicit_permissions_batch_size"]) self.assertEqual(False, fields["fetch_sg_traces"]) self.assertEqual(False, fields["open_telemetry"]) - self.assertEqual(60.0, fields["http_timeout_seconds"]) + self.assertEqual(300.0, fields["http_timeout_seconds"]) def test_run_fields_omit_irrelevant_false_flags(self) -> None: configuration = make_config() diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index b8af9ce..710133e 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -135,6 +135,107 @@ def list_repo_ids( self.assertTrue(pending_counts) self.assertLessEqual(max(pending_counts), 4) + def test_capture_explicit_grants_aborts_when_circuit_breaker_opens(self) -> None: + users: list[permission_snapshot.SnapshotUser] = [ + {"id": f"user-{index}", "username": f"user-{index}"} for index in range(60) + ] + lookup_attempts: list[str] = [] + + def failing_batch_lookup( + _client: src.SourcegraphClient, + user_ids: Sequence[str], + *, + batch_size: int, + ) -> dict[str, list[str]]: + raise src.GraphQLError("HTTP request timed out") + + def failing_user_lookup(_client: src.SourcegraphClient, user_id: str) -> list[str]: + lookup_attempts.append(user_id) + raise src.GraphQLError("HTTP request timed out") + + with ( + patch.object( + permission_snapshot.permissions_sourcegraph, + "list_users_explicit_repo_ids", + side_effect=failing_batch_lookup, + ), + patch.object( + permission_snapshot.permissions_sourcegraph, + "list_user_explicit_repo_ids", + side_effect=failing_user_lookup, + ), + self.assertRaisesRegex(RuntimeError, "circuit breaker"), + ): + permission_snapshot.capture_explicit_grants( + cast(src.SourcegraphClient, object()), + users, + parallelism=1, + explicit_permissions_batch_size=1, + expected_user_count=len(users), + ) + + # The breaker must stop the capture early instead of grinding + # through every user's lookup + retries. + self.assertLess(len(lookup_attempts), len(users)) + + def test_capture_user_scoped_grants_tolerates_isolated_failures(self) -> None: + users: list[permission_snapshot.SnapshotUser] = [ + {"id": "user-1", "username": "test_user_09991"}, + {"id": "user-2", "username": "test_user_09992"}, + ] + + def user_lookup( + _client: src.SourcegraphClient, user_id: str + ) -> list[permission_types.Repository]: + if user_id == "user-1": + raise src.GraphQLError("transient failure") + return [{"id": src.encode_repository_id(1), "name": "test-repo-49981"}] + + with patch.object( + permission_snapshot.permissions_sourcegraph, + "list_user_explicit_repos", + side_effect=user_lookup, + ): + scoped_users = permission_snapshot.capture_user_scoped_explicit_grants( + cast(src.SourcegraphClient, object()), + users, + parallelism=1, + ) + + self.assertEqual([], scoped_users["test_user_09991"]["repos"]) + self.assertEqual( + ["test-repo-49981"], + [repo["name"] for repo in scoped_users["test_user_09992"]["repos"]], + ) + + def test_capture_user_scoped_grants_aborts_when_circuit_breaker_opens(self) -> None: + users: list[permission_snapshot.SnapshotUser] = [ + {"id": f"user-{index}", "username": f"user-{index}"} for index in range(60) + ] + lookup_attempts: list[str] = [] + + def failing_user_lookup( + _client: src.SourcegraphClient, user_id: str + ) -> list[permission_types.Repository]: + lookup_attempts.append(user_id) + raise src.GraphQLError("HTTP request timed out") + + with ( + patch.object( + permission_snapshot.permissions_sourcegraph, + "list_user_explicit_repos", + side_effect=failing_user_lookup, + ), + self.assertRaisesRegex(RuntimeError, "circuit breaker"), + ): + permission_snapshot.capture_user_scoped_explicit_grants( + cast(src.SourcegraphClient, object()), + users, + parallelism=1, + ) + + self.assertLess(len(lookup_attempts), len(users)) + def test_list_users_explicit_repos_batches_aliases_and_follows_pages(self) -> None: repo_one: permission_types.Repository = { "id": src.encode_repository_id(1), From c936d41893926aac144869cfc3baa3ab30d08e73 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Wed, 10 Jun 2026 02:27:59 -0600 Subject: [PATCH 2/9] add more test cases --- .../add-users-by-email-and-list/after.json | 85 ++++++++ .../add-users-by-email-and-list/before.json | 80 +++++++ .../add-users-by-email-and-list/maps.yaml | 11 + .../e2e/fixtures/empty-maps-noop/before.json | 48 ++++ tests/e2e/fixtures/empty-maps-noop/maps.yaml | 1 + .../full-overwrite-dry-run/before.json | 90 ++++++++ .../fixtures/full-overwrite-dry-run/maps.yaml | 16 ++ .../full-overwrite-with-backup/after.json | 75 +++++++ .../full-overwrite-with-backup/before.json | 75 +++++++ .../full-overwrite-with-backup/maps.yaml | 9 + .../fixtures/get-full-snapshot/before.json | 48 ++++ .../before.json | 54 +++++ .../e2e/fixtures/get-user-grants/before.json | 48 ++++ .../before.json | 48 ++++ .../snapshot.json | 21 ++ .../invalid-set-unknown-repo/before.json | 46 ++++ .../invalid-set-unknown-repo/maps.yaml | 8 + .../invalid-set-unknown-user/before.json | 46 ++++ .../invalid-set-unknown-user/maps.yaml | 8 + .../match-provider-and-host-fields/after.json | 142 ++++++++++++ .../before.json | 136 ++++++++++++ .../match-provider-and-host-fields/maps.yaml | 13 ++ .../restore-applies-snapshot/after.json | 70 ++++++ .../restore-applies-snapshot/before.json | 70 ++++++ .../restore-applies-snapshot/snapshot.json | 30 +++ .../fixtures/restore-dry-run-noop/before.json | 70 ++++++ .../restore-dry-run-noop/snapshot.json | 30 +++ .../maps.yaml | 8 + .../set-full-sync-saml-orgs-dry-run/maps.yaml | 8 + .../maps.yaml | 8 + .../maps.yaml | 8 + .../maps.yaml | 8 + .../maps.yaml | 8 + tests/e2e/test_local_cases.py | 7 +- tests/tests.yaml | 206 +++++++++++++++++- 35 files changed, 1635 insertions(+), 4 deletions(-) create mode 100644 tests/e2e/fixtures/add-users-by-email-and-list/after.json create mode 100644 tests/e2e/fixtures/add-users-by-email-and-list/before.json create mode 100644 tests/e2e/fixtures/add-users-by-email-and-list/maps.yaml create mode 100644 tests/e2e/fixtures/empty-maps-noop/before.json create mode 100644 tests/e2e/fixtures/empty-maps-noop/maps.yaml create mode 100644 tests/e2e/fixtures/full-overwrite-dry-run/before.json create mode 100644 tests/e2e/fixtures/full-overwrite-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/full-overwrite-with-backup/after.json create mode 100644 tests/e2e/fixtures/full-overwrite-with-backup/before.json create mode 100644 tests/e2e/fixtures/full-overwrite-with-backup/maps.yaml create mode 100644 tests/e2e/fixtures/get-full-snapshot/before.json create mode 100644 tests/e2e/fixtures/get-repos-without-explicit-perms/before.json create mode 100644 tests/e2e/fixtures/get-user-grants/before.json create mode 100644 tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json create mode 100644 tests/e2e/fixtures/invalid-restore-wrong-schema-version/snapshot.json create mode 100644 tests/e2e/fixtures/invalid-set-unknown-repo/before.json create mode 100644 tests/e2e/fixtures/invalid-set-unknown-repo/maps.yaml create mode 100644 tests/e2e/fixtures/invalid-set-unknown-user/before.json create mode 100644 tests/e2e/fixtures/invalid-set-unknown-user/maps.yaml create mode 100644 tests/e2e/fixtures/match-provider-and-host-fields/after.json create mode 100644 tests/e2e/fixtures/match-provider-and-host-fields/before.json create mode 100644 tests/e2e/fixtures/match-provider-and-host-fields/maps.yaml create mode 100644 tests/e2e/fixtures/restore-applies-snapshot/after.json create mode 100644 tests/e2e/fixtures/restore-applies-snapshot/before.json create mode 100644 tests/e2e/fixtures/restore-applies-snapshot/snapshot.json create mode 100644 tests/e2e/fixtures/restore-dry-run-noop/before.json create mode 100644 tests/e2e/fixtures/restore-dry-run-noop/snapshot.json create mode 100644 tests/e2e/fixtures/set-created-after-sync-saml-orgs-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/set-full-sync-saml-orgs-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-created-after-sync-saml-orgs-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-sync-saml-orgs-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/set-repos-without-perms-sync-saml-orgs-dry-run/maps.yaml create mode 100644 tests/e2e/fixtures/set-users-without-perms-sync-saml-orgs-dry-run/maps.yaml diff --git a/tests/e2e/fixtures/add-users-by-email-and-list/after.json b/tests/e2e/fixtures/add-users-by-email-and-list/after.json new file mode 100644 index 0000000..472e339 --- /dev/null +++ b/tests/e2e/fixtures/add-users-by-email-and-list/after.json @@ -0,0 +1,85 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09993" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992", + "test_user_09993" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/add-users-by-email-and-list/before.json b/tests/e2e/fixtures/add-users-by-email-and-list/before.json new file mode 100644 index 0000000..44fb50f --- /dev/null +++ b/tests/e2e/fixtures/add-users-by-email-and-list/before.json @@ -0,0 +1,80 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/add-users-by-email-and-list/maps.yaml b/tests/e2e/fixtures/add-users-by-email-and-list/maps.yaml new file mode 100644 index 0000000..21e0be0 --- /dev/null +++ b/tests/e2e/fixtures/add-users-by-email-and-list/maps.yaml @@ -0,0 +1,11 @@ +maps: + - name: All three users may read both repos + users: + usernames: + - test_user_09991 + - test_user_09992 + - test_user_09993 + repos: + names: + - test-repo-49981 + - test-repo-49982 diff --git a/tests/e2e/fixtures/empty-maps-noop/before.json b/tests/e2e/fixtures/empty-maps-noop/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/empty-maps-noop/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/empty-maps-noop/maps.yaml b/tests/e2e/fixtures/empty-maps-noop/maps.yaml new file mode 100644 index 0000000..018b438 --- /dev/null +++ b/tests/e2e/fixtures/empty-maps-noop/maps.yaml @@ -0,0 +1 @@ +maps: [] diff --git a/tests/e2e/fixtures/full-overwrite-dry-run/before.json b/tests/e2e/fixtures/full-overwrite-dry-run/before.json new file mode 100644 index 0000000..d929f39 --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-dry-run/before.json @@ -0,0 +1,90 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09994", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09994-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09994" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09994" + ] + }, + { + "id": 103, + "name": "test-repo-49983", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09994" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/full-overwrite-dry-run/maps.yaml b/tests/e2e/fixtures/full-overwrite-dry-run/maps.yaml new file mode 100644 index 0000000..897196a --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-dry-run/maps.yaml @@ -0,0 +1,16 @@ +maps: + - name: test_user_09991 owns test-repo-49981 and test-repo-49982 + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 + - test-repo-49982 + - name: test_user_09992 also owns test-repo-49982 + users: + usernames: + - test_user_09992 + repos: + names: + - test-repo-49982 diff --git a/tests/e2e/fixtures/full-overwrite-with-backup/after.json b/tests/e2e/fixtures/full-overwrite-with-backup/after.json new file mode 100644 index 0000000..5485d5b --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-with-backup/after.json @@ -0,0 +1,75 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/full-overwrite-with-backup/before.json b/tests/e2e/fixtures/full-overwrite-with-backup/before.json new file mode 100644 index 0000000..49449d7 --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-with-backup/before.json @@ -0,0 +1,75 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-02T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09993" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/full-overwrite-with-backup/maps.yaml b/tests/e2e/fixtures/full-overwrite-with-backup/maps.yaml new file mode 100644 index 0000000..4876aa1 --- /dev/null +++ b/tests/e2e/fixtures/full-overwrite-with-backup/maps.yaml @@ -0,0 +1,9 @@ +maps: + - name: test_user_09991 and test_user_09992 own test-repo-49981 + users: + usernames: + - test_user_09991 + - test_user_09992 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/get-full-snapshot/before.json b/tests/e2e/fixtures/get-full-snapshot/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/get-full-snapshot/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json b/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json new file mode 100644 index 0000000..da46d6d --- /dev/null +++ b/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json @@ -0,0 +1,54 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/get-user-grants/before.json b/tests/e2e/fixtures/get-user-grants/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/get-user-grants/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json b/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json new file mode 100644 index 0000000..41e4e90 --- /dev/null +++ b/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json @@ -0,0 +1,48 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-restore-wrong-schema-version/snapshot.json b/tests/e2e/fixtures/invalid-restore-wrong-schema-version/snapshot.json new file mode 100644 index 0000000..0378da2 --- /dev/null +++ b/tests/e2e/fixtures/invalid-restore-wrong-schema-version/snapshot.json @@ -0,0 +1,21 @@ +{ + "schema_version": 1, + "captured_at": "2026-06-01T00:00:00+00:00", + "endpoint": "https://fixture.sourcegraph.test", + "bindID_mode": "USERNAME", + "config_file": null, + "config_sha256": null, + "pending_bindIDs": [], + "stats": { + "total_users_scanned": 0, + "users_with_explicit_grants": 0, + "repos_with_explicit_grants": 0, + "total_grants": 0 + }, + "repos": { + "101": { + "name": "test-repo-49981", + "users": [] + } + } +} diff --git a/tests/e2e/fixtures/invalid-set-unknown-repo/before.json b/tests/e2e/fixtures/invalid-set-unknown-repo/before.json new file mode 100644 index 0000000..568cbbe --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-unknown-repo/before.json @@ -0,0 +1,46 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-set-unknown-repo/maps.yaml b/tests/e2e/fixtures/invalid-set-unknown-repo/maps.yaml new file mode 100644 index 0000000..ef82b8c --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-unknown-repo/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Valid rule that must never run because the CLI filter is rejected + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/invalid-set-unknown-user/before.json b/tests/e2e/fixtures/invalid-set-unknown-user/before.json new file mode 100644 index 0000000..568cbbe --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-unknown-user/before.json @@ -0,0 +1,46 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/invalid-set-unknown-user/maps.yaml b/tests/e2e/fixtures/invalid-set-unknown-user/maps.yaml new file mode 100644 index 0000000..ef82b8c --- /dev/null +++ b/tests/e2e/fixtures/invalid-set-unknown-user/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Valid rule that must never run because the CLI filter is rejected + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/match-provider-and-host-fields/after.json b/tests/e2e/fixtures/match-provider-and-host-fields/after.json new file mode 100644 index 0000000..edf6cb3 --- /dev/null +++ b/tests/e2e/fixtures/match-provider-and-host-fields/after.json @@ -0,0 +1,142 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket LOB1", + "url": "https://bitbucket.test/", + "config": "{\"username\": \"LOB1-SA1\"}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + }, + { + "Value": "lob1" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 201, + "name": "test-repo-49982", + "externalServiceID": 2, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + }, + { + "id": 202, + "name": "test-repo-49983", + "externalServiceID": 2, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/match-provider-and-host-fields/before.json b/tests/e2e/fixtures/match-provider-and-host-fields/before.json new file mode 100644 index 0000000..4fd2193 --- /dev/null +++ b/tests/e2e/fixtures/match-provider-and-host-fields/before.json @@ -0,0 +1,136 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + }, + { + "id": 2, + "kind": "BITBUCKETSERVER", + "displayName": "Bitbucket LOB1", + "url": "https://bitbucket.test/", + "config": "{\"username\": \"LOB1-SA1\"}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + }, + { + "Value": "lob1" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + }, + { + "id": 201, + "name": "test-repo-49982", + "externalServiceID": 2, + "explicitPermissionsUsers": [] + }, + { + "id": 202, + "name": "test-repo-49983", + "externalServiceID": 2, + "explicitPermissionsUsers": [] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/match-provider-and-host-fields/maps.yaml b/tests/e2e/fixtures/match-provider-and-host-fields/maps.yaml new file mode 100644 index 0000000..d6d5e1f --- /dev/null +++ b/tests/e2e/fixtures/match-provider-and-host-fields/maps.yaml @@ -0,0 +1,13 @@ +maps: + - name: Okta account holders get the LOB1 Bitbucket repos + users: + authProvider: + type: saml + serviceID: http://www.okta.com/test123 + clientID: https://sourcegraph.test/.auth/saml/metadata + displayName: Okta + repos: + codeHostConnection: + url: https://bitbucket.test/ + displayName: Bitbucket LOB1 + username: LOB1-SA1 diff --git a/tests/e2e/fixtures/restore-applies-snapshot/after.json b/tests/e2e/fixtures/restore-applies-snapshot/after.json new file mode 100644 index 0000000..015e930 --- /dev/null +++ b/tests/e2e/fixtures/restore-applies-snapshot/after.json @@ -0,0 +1,70 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09992" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/restore-applies-snapshot/before.json b/tests/e2e/fixtures/restore-applies-snapshot/before.json new file mode 100644 index 0000000..71dda29 --- /dev/null +++ b/tests/e2e/fixtures/restore-applies-snapshot/before.json @@ -0,0 +1,70 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/restore-applies-snapshot/snapshot.json b/tests/e2e/fixtures/restore-applies-snapshot/snapshot.json new file mode 100644 index 0000000..3de8967 --- /dev/null +++ b/tests/e2e/fixtures/restore-applies-snapshot/snapshot.json @@ -0,0 +1,30 @@ +{ + "schema_version": 5, + "captured_at": "2026-06-01T00:00:00+00:00", + "endpoint": "https://fixture.sourcegraph.test", + "bindID_mode": "USERNAME", + "config_file": null, + "config_sha256": null, + "pending_bindIDs": [], + "stats": { + "total_users_scanned": 0, + "users_with_explicit_grants": 0, + "repos_with_explicit_grants": 2, + "total_grants": 3 + }, + "repos": { + "101": { + "name": "test-repo-49981", + "users": [ + "test_user_09992" + ] + }, + "102": { + "name": "test-repo-49982", + "users": [ + "test_user_09991", + "test_user_09992" + ] + } + } +} diff --git a/tests/e2e/fixtures/restore-dry-run-noop/before.json b/tests/e2e/fixtures/restore-dry-run-noop/before.json new file mode 100644 index 0000000..71dda29 --- /dev/null +++ b/tests/e2e/fixtures/restore-dry-run-noop/before.json @@ -0,0 +1,70 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + }, + { + "id": 102, + "name": "test-repo-49982", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ] + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/restore-dry-run-noop/snapshot.json b/tests/e2e/fixtures/restore-dry-run-noop/snapshot.json new file mode 100644 index 0000000..3de8967 --- /dev/null +++ b/tests/e2e/fixtures/restore-dry-run-noop/snapshot.json @@ -0,0 +1,30 @@ +{ + "schema_version": 5, + "captured_at": "2026-06-01T00:00:00+00:00", + "endpoint": "https://fixture.sourcegraph.test", + "bindID_mode": "USERNAME", + "config_file": null, + "config_sha256": null, + "pending_bindIDs": [], + "stats": { + "total_users_scanned": 0, + "users_with_explicit_grants": 0, + "repos_with_explicit_grants": 2, + "total_grants": 3 + }, + "repos": { + "101": { + "name": "test-repo-49981", + "users": [ + "test_user_09992" + ] + }, + "102": { + "name": "test-repo-49982", + "users": [ + "test_user_09991", + "test_user_09992" + ] + } + } +} diff --git a/tests/e2e/fixtures/set-created-after-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-created-after-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-created-after-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-full-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-full-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-full-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-repos-created-after-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-repos-created-after-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-created-after-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-repos-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-repos-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-repos-without-perms-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-repos-without-perms-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-repos-without-perms-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-users-without-perms-sync-saml-orgs-dry-run/maps.yaml b/tests/e2e/fixtures/set-users-without-perms-sync-saml-orgs-dry-run/maps.yaml new file mode 100644 index 0000000..9873788 --- /dev/null +++ b/tests/e2e/fixtures/set-users-without-perms-sync-saml-orgs-dry-run/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Everyone may read the canary repo + users: + usernameRegexes: + - '.*' + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/test_local_cases.py b/tests/e2e/test_local_cases.py index 1bf3afd..92486ea 100644 --- a/tests/e2e/test_local_cases.py +++ b/tests/e2e/test_local_cases.py @@ -56,12 +56,13 @@ def test_registry_cases_are_runnable(self) -> None: "{user} resolves to the live --user; local mode cannot use it", ) argv = shlex.split(cli_command) - if argv[:1] == ["restore"]: + if argv[:1] == ["restore"] and {"live", "performance"} & set(case_modes(case)): self.assertNotIn( "--apply", argv, - "registry cases must not run a bare restore --apply; restores " - "are managed by the seeded set-apply cycle", + "instance-mode registry cases must not run a bare restore --apply; " + "live restores are managed by the seeded set-apply cycle " + "(local-only cases may restore --apply against the fake)", ) def test_local_replay_cases(self) -> None: diff --git a/tests/tests.yaml b/tests/tests.yaml index 13cb5c0..c18cd35 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -392,6 +392,86 @@ cases: cliCommand: set --repos-created-after 2026-02-01 --apply --no-backup --parallelism 1 expectedMutations: 1 + full-overwrite-dry-run: + description: >- + set without --apply plans the full overwrite but makes zero mutations + and leaves state untouched — the dry-run default never mutates. + modes: + - local + cliCommand: set --full --no-backup --parallelism 1 + expectedMutations: 0 + + empty-maps-noop: + description: >- + An empty maps list is a no-op, not an error: zero mutations, existing + grants untouched. + modes: + - local + cliCommand: set --full --apply --no-backup --parallelism 1 + expectedMutations: 0 + + match-provider-and-host-fields: + description: >- + Users matched by authProvider type/serviceID/clientID/displayName + (without configID or samlGroup) and repos matched by codeHostConnection + url/displayName/username get grants; the builtin-only user does not. + modes: + - local + cliCommand: set --full --apply --no-backup --parallelism 1 + expectedMutations: 2 + + add-users-by-email-and-list: + description: >- + --users accepts a comma-delimited mix of an email address and a + username; only the two selected users gain grants, additively. + modes: + - local + cliCommand: >- + set --users + marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com,test_user_09993 + --apply --no-backup --parallelism 1 + expectedMutations: 4 + + restore-dry-run-noop: + description: >- + restore without --apply plans against a snapshot that differs from + current state but makes zero mutations. + modes: + - local + cliCommand: >- + restore --restore-path tests/e2e/fixtures/restore-dry-run-noop/snapshot.json + --no-backup --parallelism 1 + expectedMutations: 0 + + restore-applies-snapshot: + description: >- + restore --apply overwrites the repo that differs from the snapshot and + skips the repo that already matches. + modes: + - local + cliCommand: >- + restore --restore-path tests/e2e/fixtures/restore-applies-snapshot/snapshot.json + --apply --no-backup --parallelism 1 + expectedMutations: 1 + + full-overwrite-with-backup: + description: >- + The default backup path (no --no-backup) captures before/after + snapshots and validates the apply without changing the outcome. + modes: + - local + cliCommand: set --full --apply --parallelism 1 + expectedMutations: 1 + + get-user-grants: + description: >- + get scoped to one user captures that user's explicit grants and never + mutates the instance. + modes: + - local + cliCommand: get --users test_user_09991 + expectedMutations: 0 + # ── Local + live: expected-error cases, replayed read-only on the instance ── invalid-bad-regex: description: >- @@ -453,6 +533,41 @@ cases: expectedErrors: - "--repos-created-after must use YYYY-MM-DD" + invalid-set-unknown-user: + description: >- + A --users value naming no Sourcegraph user fails before any mutation. + modes: + - local + - live + cliCommand: set --users username_doesnt_exist_01 + expectedMutations: 0 + expectedErrors: + - "No Sourcegraph user found for 'username_doesnt_exist_01'" + + invalid-set-unknown-repo: + description: >- + A --repos value naming no Sourcegraph repo fails before any mutation. + modes: + - local + - live + cliCommand: set --repos repo-doesnt-exist-49999 + expectedMutations: 0 + expectedErrors: + - "No Sourcegraph repo found for: repo-doesnt-exist-49999" + + invalid-restore-wrong-schema-version: + description: >- + A snapshot with an unsupported schema_version is refused without + changing any state. + modes: + - local + cliCommand: >- + restore --restore-path + tests/e2e/fixtures/invalid-restore-wrong-schema-version/snapshot.json + expectedMutations: 0 + expectedErrors: + - "snapshot schema_version is 1, expected" + restore-missing-file: description: >- restore with a snapshot path that does not exist fails without changing @@ -633,6 +748,61 @@ cases: expectedOutput: - Dry run complete + set-full-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, full mode, dry run + only. + modes: + - live + cliCommand: set --full --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete + + set-created-after-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, created-after mode + (far-future date selects no users), dry run only. + modes: + - live + cliCommand: set --created-after 2099-01-01 --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete + + set-repos-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, repo-scoped, dry run + only. + modes: + - live + cliCommand: set --repos test-repo-49981 --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete + + set-users-without-perms-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, + users-without-explicit-perms mode, dry run only. + modes: + - live + cliCommand: set --users-without-explicit-perms --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete + + set-repos-created-after-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, repos-created-after + mode (far-future date selects no repos), dry run only. + modes: + - live + cliCommand: set --repos-created-after 2099-01-01 --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete + sync-saml-orgs-apply: description: >- Org membership sync converges to SAML group data and validates its own @@ -667,4 +837,38 @@ cases: modes: - performance cliCommand: get --users {user} - expectedExitCode: 0 \ No newline at end of file + expectedExitCode: 0 + + get-full-snapshot: + description: >- + Bare get captures the full instance snapshot. Locally it proves get + never mutates; on the instance it is the timed full 10k-user capture, + which is why it runs in the performance tier rather than live. + modes: + - local + - performance + cliCommand: get + expectedMutations: 0 + + get-repos-without-explicit-perms: + description: >- + get --repos-without-explicit-perms scopes the snapshot to repos with + no explicit grants. Requires the full before-snapshot, so on the + instance it runs in the performance tier. + modes: + - local + - performance + cliCommand: get --repos-without-explicit-perms + expectedMutations: 0 + + set-repos-without-perms-sync-saml-orgs-dry-run: + description: >- + Combined permission + organization sync dispatch, + repos-without-explicit-perms mode, dry run only. Needs the full + before-snapshot, so it runs in the performance tier. + modes: + - performance + cliCommand: set --repos-without-explicit-perms --sync-saml-orgs + expectedExitCode: 0 + expectedOutput: + - Dry run complete \ No newline at end of file From 8140af4b18a4ef3f96093cac8f430ccc66fd7653 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Wed, 10 Jun 2026 03:17:51 -0600 Subject: [PATCH 3/9] delete install script --- dev/test-cli-pypi-install.sh | 50 ------------------------------------ 1 file changed, 50 deletions(-) delete mode 100755 dev/test-cli-pypi-install.sh diff --git a/dev/test-cli-pypi-install.sh b/dev/test-cli-pypi-install.sh deleted file mode 100755 index fa1cb4e..0000000 --- a/dev/test-cli-pypi-install.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env bash -# Description: Tests CLI mode install - -set -euox pipefail - -# Set the working directory -tmp_root="${TMPDIR:-/tmp}" -working_directory="${tmp_root%/}/src-auth-perms-sync-pypi-install" - -# Delete, recreate, and cd to working directory -rm -rf "${working_directory}" && mkdir -p "${working_directory}" && cd "${working_directory}" - -log_file="${working_directory}/test-cli-pypi-install.log" -exec > >(tee "${log_file}") 2>&1 -echo "Writing output to ${log_file}" -echo "" -echo "Dir contents in ${working_directory} before" -ls -al - -# Use python3.13 to create and activate a venv -# shellcheck disable=SC1091 -echo "" -python3.13 -m venv .venv && source .venv/bin/activate -which python -python --version - -# Ensure pip is up to date -echo "" -python -m pip install --upgrade pip - -# pip install latest from https://pypi.org/project/src-auth-perms-sync -echo "" -python -m pip install src-auth-perms-sync - -# Run commands -echo "" -src-auth-perms-sync --help -echo "" -src-auth-perms-sync get --help -echo "" -src-auth-perms-sync set --help -echo "" -src-auth-perms-sync restore --help -echo "" -src-auth-perms-sync sync-saml-orgs --help - -echo "" -echo "Dir contents in ${working_directory} after" -ls -al -echo "" From 74b045f8a114b103ec833d3922ce0d31629104a6 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Wed, 10 Jun 2026 03:23:52 -0600 Subject: [PATCH 4/9] add install test, fix perf tests --- .../permissions/sourcegraph.py | 24 +++++- tests/README.md | 14 ++- tests/run.py | 85 ++++++++++++++++++- tests/unit/test_permissions_sourcegraph.py | 36 ++++++++ 4 files changed, 153 insertions(+), 6 deletions(-) diff --git a/src/src_auth_perms_sync/permissions/sourcegraph.py b/src/src_auth_perms_sync/permissions/sourcegraph.py index 5c8edb3..89131fa 100644 --- a/src/src_auth_perms_sync/permissions/sourcegraph.py +++ b/src/src_auth_perms_sync/permissions/sourcegraph.py @@ -300,24 +300,40 @@ def list_site_user_candidates_without_explicit_repos( return SiteUserCandidateSelection(candidates=[], explicit_user_count=0) if len(first_page) >= total_count or parallelism <= 1: - _log_user_candidate_load_progress(len(first_page), total_count, started) + # Sequential path: still page through ALL candidates. If the server + # caps `nodes(limit:)` below our requested page size, use the + # observed first-page width so offset steps do not skip rows. + sequential_pages: list[tuple[int, list[shared_types.SiteUserCandidate]]] = [(0, first_page)] + observed_page_size = len(first_page) + for offset in range(observed_page_size, total_count, observed_page_size): + nodes, _ = _site_user_candidate_page( + client, + created_filter, + offset=offset, + page_size=SITE_USER_CANDIDATE_PAGE_SIZE, + ) + sequential_pages.append((offset, nodes)) + sequential_candidates = _dedupe_site_user_candidate_pages(sequential_pages) + _log_user_candidate_load_progress(len(sequential_candidates), total_count, started) log.info( "Checking %d active user candidate(s)%s for existing explicit repo permissions " "in batches of %d ...", - len(first_page), + len(sequential_candidates), created_filter_label, batch_size, ) explicit_user_ids = user_ids_with_explicit_repos( client, - [candidate["id"] for candidate in first_page], + [candidate["id"] for candidate in sequential_candidates], batch_size=batch_size, parallelism=parallelism, worker_pool=worker_pool, ) return SiteUserCandidateSelection( candidates=[ - candidate for candidate in first_page if candidate["id"] not in explicit_user_ids + candidate + for candidate in sequential_candidates + if candidate["id"] not in explicit_user_ids ], explicit_user_count=len(explicit_user_ids), ) diff --git a/tests/README.md b/tests/README.md index 60f2a52..40dd288 100644 --- a/tests/README.md +++ b/tests/README.md @@ -4,7 +4,7 @@ All testing is driven by one entrypoint and one case registry: | Path | Purpose | | ---- | ------- | -| [run.py](./run.py) | The single test entrypoint: `uv run tests/run.py [--local \| --live \| --performance]` | +| [run.py](./run.py) | The single test entrypoint: `uv run tests/run.py [--local \| --live \| --performance \| --install]` | | [tests.yaml](./tests.yaml) | The case registry: what each case runs, where, and what it must produce (see its header comment for the full schema) | | [e2e/fixtures/](./e2e/fixtures/) | Per-case state files, in a directory matching the case name | | [e2e/case_runner.py](./e2e/case_runner.py) | The case execution engine: registry loader, in-memory Sourcegraph instance (`FakeSourcegraphClient`), full-command runs for state cases, in-process parser replays for replay cases | @@ -68,6 +68,18 @@ files (e.g. `test_user_09991`, `test-repo-49981`), and exact selectors only - **performance** — same as live, but timed and measured (traces, RSS sampling, TSV row). +## PyPI install smoke (`--install`) + +`uv run tests/run.py --install` pip-installs the **published** package into a +clean venv (`--install-python`, default `python3.13`) and runs every `--help` +command, asserting exit 0 and usage output. It needs network to pypi.org +only — no Sourcegraph instance. `--install-package` pins a version +(`src-auth-perms-sync==1.2.3`) or points at a wheel path. This complements +the live tier's "wheel install smoke", which builds and installs the +*local* wheel; CI separately installs the locally-built wheel in +validate.yml. Use `--install` after a release to verify the artifact +operators actually download. + ## Workflow for adding or editing a case 1. Register the case in [tests.yaml](./tests.yaml); create the fixture diff --git a/tests/run.py b/tests/run.py index 8a3a51c..a1c5fc5 100644 --- a/tests/run.py +++ b/tests/run.py @@ -13,6 +13,9 @@ --performance Repeated timed runs of the expensive paths against the test instance, with Sourcegraph trace retention and resource sampling, reported as a TSV and median summary. + --install PyPI install smoke test: pip-install the published package + (default: latest src-auth-perms-sync) into a clean venv and + run every --help command. Needs network to pypi.org only. --live and --performance optionally take a comma-delimited list of test names (substring match) to run a subset, e.g. --live full-overwrite-unions. @@ -29,6 +32,7 @@ uv run tests/run.py uv run tests/run.py --live uv run tests/run.py --performance --repeat 3 + uv run tests/run.py --install uv run tests/run.py --update-golden """ @@ -195,7 +199,7 @@ def configure_logging(log_file: Path, quiet: bool = False) -> None: class TestArguments: """Parsed command-line options for this test run.""" - level: str # "local" | "live" | "performance" + level: str # "local" | "live" | "performance" | "install" test_filter: tuple[str, ...] # empty = run everything in the level quiet: bool update_golden: bool @@ -219,6 +223,8 @@ class TestArguments: monitor_interval_seconds: int monitor_postgres_interval_seconds: int monitor_statements_interval_seconds: int + install_python: str + install_package: str def parse_arguments(argv: Sequence[str] | None = None) -> TestArguments: @@ -249,6 +255,12 @@ def parse_arguments(argv: Sequence[str] | None = None) -> TestArguments: help="Repeated timed runs against the .env instance with traces and resource " "sampling. Optionally pass a comma-delimited list of test names (substring match)", ) + level_group.add_argument( + "--install", + action="store_true", + help="PyPI install smoke test: pip-install the published package into a " + "clean venv and run every --help command. Needs network to pypi.org only", + ) parser.add_argument( "-q", "--quiet", @@ -349,6 +361,18 @@ def parse_arguments(argv: Sequence[str] | None = None) -> TestArguments: monitor_group.add_argument("--monitor-interval-seconds", type=int, default=5) monitor_group.add_argument("--monitor-postgres-interval-seconds", type=int, default=10) monitor_group.add_argument("--monitor-statements-interval-seconds", type=int, default=30) + install_group = parser.add_argument_group("install smoke test") + install_group.add_argument( + "--install-python", + default="python3.13", + help="Python interpreter used to create the clean venv (default: python3.13)", + ) + install_group.add_argument( + "--install-package", + default="src-auth-perms-sync", + help="pip requirement to install, e.g. 'src-auth-perms-sync==1.2.3' or a " + "wheel path (default: src-auth-perms-sync, the latest from PyPI)", + ) options = parser.parse_args(argv) level = "local" test_filter: tuple[str, ...] = () @@ -358,6 +382,8 @@ def parse_arguments(argv: Sequence[str] | None = None) -> TestArguments: if options.performance is not None: level = "performance" test_filter = parse_test_filter(cast(str, options.performance)) + if options.install: + level = "install" return TestArguments( level=level, test_filter=test_filter, @@ -385,6 +411,8 @@ def parse_arguments(argv: Sequence[str] | None = None) -> TestArguments: monitor_interval_seconds=int(options.monitor_interval_seconds), monitor_postgres_interval_seconds=int(options.monitor_postgres_interval_seconds), monitor_statements_interval_seconds=int(options.monitor_statements_interval_seconds), + install_python=str(options.install_python), + install_package=str(options.install_package), ) @@ -1172,6 +1200,59 @@ def run_wheel_install_smoke(self) -> None: return self.record("wheel install smoke", "live", True, time.monotonic() - started) + # -- install smoke (--install) ------------------------------------------------- + + def run_install(self) -> None: + """PyPI install smoke: pip-install the published package into a clean + venv and run every --help command. Replaces dev/test-cli-pypi-install.sh.""" + python = self.arguments.install_python + package = self.arguments.install_package + log.info("\n=== Install smoke: %s via %s ===", package, python) + with tempfile.TemporaryDirectory(prefix="src-auth-perms-sync-pypi-install-") as temporary: + venv_directory = Path(temporary) / "venv" + venv_python = venv_directory / "bin" / "python" + cli_path = venv_directory / "bin" / "src-auth-perms-sync" + setup_steps: tuple[tuple[str, list[str]], ...] = ( + ( + f"install: create venv ({python})", + [python, "-m", "venv", str(venv_directory)], + ), + ( + "install: upgrade pip", + [str(venv_python), "-m", "pip", "install", "--quiet", "--upgrade", "pip"], + ), + ( + # Not --quiet: the log must show which version was resolved. + f"install: pip install {package}", + [str(venv_python), "-m", "pip", "install", package], + ), + ) + for name, command in setup_steps: + started = time.monotonic() + execution = self.stream_command(command) + passed = execution.return_code == 0 + self.record( + name, + "install", + passed, + time.monotonic() - started, + "" if passed else f"exit {execution.return_code}", + ) + if not passed: + return + for help_arguments in ((), ("get",), ("set",), ("restore",), ("sync-saml-orgs",)): + name = "install: src-auth-perms-sync " + " ".join((*help_arguments, "--help")) + started = time.monotonic() + execution = self.stream_command([str(cli_path), *help_arguments, "--help"]) + usage_shown = "usage: src-auth-perms-sync" in execution.output + passed = execution.return_code == 0 and usage_shown + detail = "" + if execution.return_code != 0: + detail = f"exit {execution.return_code}" + elif not usage_shown: + detail = "usage text missing from --help output" + self.record(name, "install", passed, time.monotonic() - started, detail) + def run_live_fixture_cases(self, environment: dict[str, str]) -> None: log.info("\n--- Live: tests.yaml cases against the real instance ---") for case_name, case in self.fixture_cases_for_mode("live"): @@ -2860,6 +2941,8 @@ def main() -> None: suite.run_property_checks() elif arguments.level == "live": suite.run_live() + elif arguments.level == "install": + suite.run_install() else: suite.run_performance() diff --git a/tests/unit/test_permissions_sourcegraph.py b/tests/unit/test_permissions_sourcegraph.py index 0ec3ae8..94d5dda 100644 --- a/tests/unit/test_permissions_sourcegraph.py +++ b/tests/unit/test_permissions_sourcegraph.py @@ -239,6 +239,42 @@ def test_user_ids_with_explicit_repos_batches_existence_checks(self) -> None: self.assertNotIn("first", call) self.assertFalse(any(variable_name.startswith("after") for variable_name in call)) + def test_candidates_without_explicit_repos_pages_past_first_page_sequentially(self) -> None: + # Regression: with parallelism=1 and more users than one page, the + # selection used to silently consider ONLY the first page (1000 + # users) — every later user was excluded from candidates. + site_users = _SiteUsersClient(total_count=2500) + explicit_repos = _ExplicitReposClient({"user-1500"}) + + class _CombinedClient: + def graphql( + self, + query: str, + variables: src.JSONDict | None = None, + *, + follow_pages: bool = True, + ) -> src.JSONDict: + if "query SiteUsers" in query: + return site_users.graphql(query, variables, follow_pages=follow_pages) + if "query UserExplicitRepoExistsBatch" in query: + return explicit_repos.graphql(query, variables, follow_pages=follow_pages) + raise AssertionError(f"unexpected query: {query[:80]}") + + selection = permissions_sourcegraph.list_site_user_candidates_without_explicit_repos( + cast(src.SourcegraphClient, _CombinedClient()), + None, + batch_size=1000, + parallelism=1, + ) + + self.assertEqual(selection.explicit_user_count, 1) + self.assertEqual(len(selection.candidates), 2499) + candidate_ids = {candidate["id"] for candidate in selection.candidates} + self.assertIn("user-2499", candidate_ids) + self.assertNotIn("user-1500", candidate_ids) + _, offsets = _site_users_call_page_args(site_users.calls) + self.assertEqual(sorted(offsets), [0, 1000, 2000]) + def test_candidates_without_explicit_repos_pipelines_checks_after_first_page(self) -> None: client = _PipelinedCandidateClient() From ff3e2c5f6c260f02c663a177a7c44da5e63d0901 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Wed, 10 Jun 2026 03:33:15 -0600 Subject: [PATCH 5/9] remove --parallelism 1 --- tests/e2e/case_runner.py | 11 +++++++--- tests/tests.yaml | 47 ++++++++++++++++++++-------------------- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index 7315f23..46cf575 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -686,10 +686,15 @@ def run_fixture_case(case_name: str, runner: str = "cli") -> FixtureRunResult: try: cli_input = cli_input_for_case(case, case_name, client.endpoint, runner) - command = cli.resolve_command(cli_input.command_name, cli_input.config) - with ThreadPoolExecutor(max_workers=cli_input.config.parallelism) as worker_pool: + # Local runs execute in-process against the in-memory fake, where + # client parallelism buys nothing and only adds scheduling + # nondeterminism — pin it to 1 regardless of the case's command + # line. Live/performance runs use the command line as written. + local_config = cli_input.config.model_copy(update={"parallelism": 1}) + command = cli.resolve_command(cli_input.command_name, local_config) + with ThreadPoolExecutor(max_workers=local_config.parallelism) as worker_pool: cli.run_command( - cli_input.config, + local_config, command, cast(src.SourcegraphClient, client), worker_pool, diff --git a/tests/tests.yaml b/tests/tests.yaml index c18cd35..f6bb836 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -298,7 +298,7 @@ cases: but only test_user_09991 matches the email filter. modes: - local - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup expectedMutations: 1 regex-filters-scope: @@ -311,7 +311,7 @@ cases: - local - live - performance - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup live: involvedRepos: - test-repo-49980 @@ -332,7 +332,7 @@ cases: SAML assertion includes the group. modes: - local - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup expectedMutations: 2 set-users-created-after: @@ -342,14 +342,13 @@ cases: modes: - local cliCommand: >- - set --created-after 2026-02-01 --apply --no-backup --parallelism 1 + set --created-after 2026-02-01 --apply --no-backup importConfig: command: set created_after: "2026-02-01" apply: true no_backup: true - parallelism: 1 expectedMutations: 4 set-users-without-explicit-perms: @@ -367,6 +366,10 @@ cases: usersWithoutOtherGrants: - test_user_09961 - test_user_09962 + # Deliberately --parallelism 1: keeps live coverage of the SEQUENTIAL + # candidate-selection code path, where the 2026-06-10 first-page-only + # truncation bug lived. Every other case uses the CLI default so the + # suite exercises what production runs actually use. cliCommand: >- set --users-without-explicit-perms --apply --no-backup --parallelism 1 --explicit-permissions-batch-size 1 @@ -380,7 +383,7 @@ cases: modes: - local - performance - cliCommand: set --repos-without-explicit-perms --apply --no-backup --parallelism 1 + cliCommand: set --repos-without-explicit-perms --apply --no-backup expectedMutations: 1 set-repos-created-after: @@ -389,7 +392,7 @@ cases: the date. modes: - local - cliCommand: set --repos-created-after 2026-02-01 --apply --no-backup --parallelism 1 + cliCommand: set --repos-created-after 2026-02-01 --apply --no-backup expectedMutations: 1 full-overwrite-dry-run: @@ -398,7 +401,7 @@ cases: and leaves state untouched — the dry-run default never mutates. modes: - local - cliCommand: set --full --no-backup --parallelism 1 + cliCommand: set --full --no-backup expectedMutations: 0 empty-maps-noop: @@ -407,7 +410,7 @@ cases: grants untouched. modes: - local - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup expectedMutations: 0 match-provider-and-host-fields: @@ -417,7 +420,7 @@ cases: url/displayName/username get grants; the builtin-only user does not. modes: - local - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup expectedMutations: 2 add-users-by-email-and-list: @@ -429,7 +432,7 @@ cases: cliCommand: >- set --users marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com,test_user_09993 - --apply --no-backup --parallelism 1 + --apply --no-backup expectedMutations: 4 restore-dry-run-noop: @@ -440,7 +443,7 @@ cases: - local cliCommand: >- restore --restore-path tests/e2e/fixtures/restore-dry-run-noop/snapshot.json - --no-backup --parallelism 1 + --no-backup expectedMutations: 0 restore-applies-snapshot: @@ -451,7 +454,7 @@ cases: - local cliCommand: >- restore --restore-path tests/e2e/fixtures/restore-applies-snapshot/snapshot.json - --apply --no-backup --parallelism 1 + --apply --no-backup expectedMutations: 1 full-overwrite-with-backup: @@ -460,7 +463,7 @@ cases: snapshots and validates the apply without changing the outcome. modes: - local - cliCommand: set --full --apply --parallelism 1 + cliCommand: set --full --apply expectedMutations: 1 get-user-grants: @@ -588,7 +591,7 @@ cases: modes: - local - live - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup expectedMutations: 0 set-repos-filter: @@ -601,7 +604,7 @@ cases: - live - performance cliCommand: >- - set --repos test-repo-49971 --apply --no-backup --parallelism 1 + set --repos test-repo-49971 --apply --no-backup --src-log-level DEBUG expectedMutations: 1 @@ -612,14 +615,13 @@ cases: modes: - local - live - cliCommand: set --users test_user_09992 --apply --no-backup --parallelism 1 + cliCommand: set --users test_user_09992 --apply --no-backup importConfig: command: set users: - test_user_09992 apply: true no_backup: true - parallelism: 1 expectedMutations: 2 full-overwrite-removes-stale-grant: @@ -629,7 +631,7 @@ cases: modes: - local - live - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup expectedMutations: 1 # ── Live only: real-instance validation and organization sync ── @@ -713,7 +715,7 @@ cases: modes: - live - performance - cliCommand: set --created-after 2099-01-01 --apply --no-backup --parallelism 1 + cliCommand: set --created-after 2099-01-01 --apply --no-backup expectedMutations: 0 set-repos-created-after-noop: @@ -723,7 +725,7 @@ cases: modes: - live - performance - cliCommand: set --repos-created-after 2099-01-01 --apply --no-backup --parallelism 1 + cliCommand: set --repos-created-after 2099-01-01 --apply --no-backup expectedMutations: 0 sync-saml-orgs-dry-run: @@ -823,13 +825,12 @@ cases: - local - live - performance - cliCommand: set --full --apply --no-backup --parallelism 1 + cliCommand: set --full --apply --no-backup importConfig: command: set full: true apply: true no_backup: true - parallelism: 1 expectedMutations: 2 get-user-baseline: From ae997281f8fb189e1450f919c6c00163a07fb4dc Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Wed, 10 Jun 2026 20:27:30 -0600 Subject: [PATCH 6/9] Separate functional (live) from scale (performance) testing Live tier is now the fast FUNCTIONAL tier (~10 min, was ~87 min): - Seed/restore involved repos directly via GraphQL instead of the product's restore command (which full-captures 10k users per call and whose whole-instance semantics clobber concurrent runs) - Demote intrinsically whole-instance cases to the performance tier: get-repos-filter, set-full/set-repos/set-users-without-perms sync-saml-orgs dry-runs (each measured ~400 s) - Make the legacy whole-instance stress cycle opt-in only (--live "full cycle"); it crashed the test instance's Postgres (1,150 repo overwrites x 10,002 bindIDs at parallelism 16) Catch scale bugs with small data: - FakeSourcegraphClient caps site-user pages at 2, so 4-user fixtures exercise multi-page candidate selection locally; verified it catches the 2026-06-10 first-page truncation bug in milliseconds Product fixes surfaced by the split: - Additive applies now emit mutations_succeeded to the run log (harness mutation-count checks previously read 0) - capture_explicit_grants short-circuits per-user lookups when the repo selection is empty, while still draining the recording user stream that feeds mapping and SAML extraction Amp-Thread-ID: https://ampcode.com/threads/T-019eaec8-b78c-7386-b977-d93720ad3219 Co-authored-by: Amp --- dev/TODO.md | 10 + .../permissions/command.py | 9 + .../permissions/snapshot.py | 8 + tests/README.md | 40 ++- tests/e2e/case_runner.py | 13 +- .../after.json | 26 ++ .../before.json | 26 ++ tests/run.py | 231 +++++++++--------- tests/tests.yaml | 38 +-- tests/unit/test_snapshot.py | 26 ++ 10 files changed, 280 insertions(+), 147 deletions(-) diff --git a/dev/TODO.md b/dev/TODO.md index 81e084f..29d1e96 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -23,12 +23,22 @@ - Use the stress-run evidence in [memory-efficiency.md](./memory-efficiency.md) to request Sourcegraph bulk explicit-permission read and write APIs. + New evidence 2026-06-10: the whole-instance apply (1,150 repo + overwrites x 10,002 bindIDs each at parallelism 16) crashed the test + instance's Postgres ("connection refused", "unexpected EOF"); the + client circuit breaker opened and the harness restored cleanly. That + stress cycle is now opt-in: `uv run tests/run.py --live "full cycle"`. - Add an explicit destructive/performance-test mode to the e2e runner so giant stress runs can skip or defer full restore cleanup when the goal is finding the server-side breaking point. - Revisit full snapshot capture once Sourcegraph exposes a bulk read path; replace aliased `User.permissionsInfo.repositories(source: API)` calls before raising concurrency further. +- `get --repos ` still scans every user's explicit grants to find one + repo's holders (~400 s at 10k users). A repo-centric read + (`repository.permissionsInfo.users` + site-admin disambiguation, as the + test harness already does) would make it seconds — see the repo-centric + section below. ## Low priority: Repo-centric path, when users > repos, or for cross-checking diff --git a/src/src_auth_perms_sync/permissions/command.py b/src/src_auth_perms_sync/permissions/command.py index 42f4b28..842f137 100644 --- a/src/src_auth_perms_sync/permissions/command.py +++ b/src/src_auth_perms_sync/permissions/command.py @@ -1344,6 +1344,15 @@ def _apply_additive_permissions( mutations.failed, mutations.canceled, ) + # Structured counts, mirroring the full-set and restore command events so + # every apply path reports mutations_succeeded in the run log. + src.info( + "additive_apply_done", + mutations_succeeded=mutations.succeeded, + mutations_skipped=mutations.skipped, + mutations_failed=mutations.failed, + mutations_canceled=mutations.canceled, + ) return mutations diff --git a/src/src_auth_perms_sync/permissions/snapshot.py b/src/src_auth_perms_sync/permissions/snapshot.py index a22f5c6..c7b35e8 100644 --- a/src/src_auth_perms_sync/permissions/snapshot.py +++ b/src/src_auth_perms_sync/permissions/snapshot.py @@ -212,6 +212,14 @@ def capture_explicit_grants( that need the user-count statistic don't have to materialize the iterator twice or measure it themselves. """ + if selected_repository_ids is not None and not selected_repository_ids: + # No repos selected (e.g. --repos-created-after matched nothing): no + # per-user permission lookup could contribute anything, so skip them. + # Still drain the users iterable — callers pass recording streams + # whose side effects feed later phases (mapping, SAML extraction). + log.info("No repositories selected — skipping the explicit-permissions lookups.") + return {}, sum(1 for _ in users) + # Invert directly as each per-user fetch completes. Store only repo IDs # first, then hydrate each unique repo name once after all users complete. usernames_by_repository_id: dict[str, list[str]] = {} diff --git a/tests/README.md b/tests/README.md index 40dd288..b2b43f1 100644 --- a/tests/README.md +++ b/tests/README.md @@ -56,17 +56,35 @@ files (e.g. `test_user_09991`, `test-repo-49981`), and exact selectors only resulting state against `after.json`. Replay-style cases (`expectedExitCode`/`expectedOutput`) assert parser behavior instead and need no files. -- **live** — runs `cliCommand` against the `.env` test instance. Read-only - commands assert exit code and output. Mutating `set --apply` commands run - the full cycle: seed the `before.json` state onto the involved repos, run, - verify the result with an independent GraphQL read-back, then restore the - original state. Cases may declare `live.involvedRepos` (extra repos to - capture/seed/restore; the ones absent from `after.json` are canaries that - must come back unchanged — this is how widened regex selectors get caught) - and `live.usersWithoutOtherGrants` (preflight: named users must hold no - grants outside the involved repos). -- **performance** — same as live, but timed and measured (traces, RSS - sampling, TSV row). +- **live** — FUNCTIONAL tier: fast, scoped checks against the `.env` test + instance; the whole tier should take minutes. Read-only commands assert + exit code and output. Mutating `set --apply` commands run the full cycle: + seed the `before.json` state onto the involved repos, run, verify the + result with an independent GraphQL read-back, then restore the original + state. Seeding and restoring write the involved repos directly via + GraphQL — never through the product's `restore` command, whose full + instance capture takes minutes at 10k users and whose whole-instance + semantics clobber concurrent runs. Cases may declare `live.involvedRepos` + (extra repos to read/seed/restore; the ones absent from `after.json` are + canaries that must come back unchanged — this is how widened regex + selectors get caught) and `live.usersWithoutOtherGrants` (preflight: + named users must hold no grants outside the involved repos). Cases whose + main command intrinsically scans the whole instance (full captures, + candidate scans over all users/repos) belong in **performance**, not + live. +- **performance** — SCALE tier: same workflow as live, but timed and + measured (traces, RSS sampling, TSV row), and the place for cases whose + commands walk all 10k users / 50k repos. Run deliberately, not + pre-commit. The legacy whole-instance stress cycle (`set --full` with the + root maps.yaml — 10k users x ~1,150 repos, known to crash the test + instance's Postgres) is opt-in only: `uv run tests/run.py --live "full + cycle"`. + +Functional coverage of scale-only code paths (pagination, batch stepping, +dedupe) does NOT require scale data: the local fake serves site-user pages +of at most 2 (`SITE_USERS_PAGE_CAP` in `e2e/case_runner.py`), so a fixture +with 4 users already spans 2 pages — that is what catches selection +truncation bugs locally in milliseconds. ## PyPI install smoke (`--install`) diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index 46cf575..9d67158 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -29,6 +29,11 @@ from src_auth_perms_sync.shared import types as shared_types FIXTURES_DIR = Path(__file__).with_name("fixtures") + +# Maximum site-users page width the fake serves, regardless of the requested +# limit. Small enough that fixtures with a handful of users span multiple +# pages, so pagination handling is functionally tested without scale data. +SITE_USERS_PAGE_CAP = 2 E2E_TESTS_PATH = Path(__file__).resolve().parents[1] / "tests.yaml" DEFAULT_CASE_MODES = ["local"] SITE_CONFIG = json.dumps( @@ -437,7 +442,13 @@ def _site_users(self, variables: dict[str, object]) -> dict[str, Any]: if created_after is None or user["createdAt"] >= created_after ] offset = self._integer_variable(variables, "offset") - limit = self._integer_variable(variables, "limit") + # Serve pages no wider than SITE_USERS_PAGE_CAP regardless of the + # requested limit, mimicking a server-side nodes(limit:) cap. This + # makes every local fixture with >2 users exercise multi-page + # candidate selection (offset stepping, dedupe, the sequential + # paging branch). The 2026-06-10 first-page-only truncation bug is + # invisible to local tests without this. + limit = min(self._integer_variable(variables, "limit"), SITE_USERS_PAGE_CAP) nodes = [ { "id": self._user_graphql_id(user["id"]), diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/after.json b/tests/e2e/fixtures/set-users-without-explicit-perms/after.json index 29ea002..069ddc1 100644 --- a/tests/e2e/fixtures/set-users-without-explicit-perms/after.json +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/after.json @@ -33,6 +33,32 @@ ], "externalAccounts": [] }, + { + "id": 3, + "username": "test_user_09963", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09963-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 4, + "username": "test_user_09964", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09964-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, { "id": 2, "username": "test_user_09962", diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/before.json b/tests/e2e/fixtures/set-users-without-explicit-perms/before.json index cd010a7..60aac3e 100644 --- a/tests/e2e/fixtures/set-users-without-explicit-perms/before.json +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/before.json @@ -33,6 +33,32 @@ ], "externalAccounts": [] }, + { + "id": 3, + "username": "test_user_09963", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09963-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 4, + "username": "test_user_09964", + "builtinAuth": true, + "createdAt": "2026-01-03T00:00:00Z", + "emails": [ + { + "email": "marc.leblanc+test09964-m.eks.m.ps.sgdev.org@sourcegraph.com", + "verified": true + } + ], + "externalAccounts": [] + }, { "id": 2, "username": "test_user_09962", diff --git a/tests/run.py b/tests/run.py index a1c5fc5..4872457 100644 --- a/tests/run.py +++ b/tests/run.py @@ -107,6 +107,14 @@ } """ +SET_REPOSITORY_PERMISSIONS_MUTATION = """ +mutation TestSetRepositoryPermissions($repository: ID!, $userPermissions: [UserPermissionInput!]!) { + setRepositoryPermissionsForUsers(repository: $repository, userPermissions: $userPermissions) { + alwaysNil + } +} +""" + EXPLICIT_API_PERMISSION_REASON = "Explicit API" SITE_ADMIN_PERMISSION_REASON = "Site Admin" @@ -748,6 +756,16 @@ def test_selected(self, *names: str) -> bool: """ if not self.arguments.test_filter: return True + return self.explicitly_selected(*names) + + def explicitly_selected(self, *names: str) -> bool: + """Return whether a filter token names one of `names`. + + Unlike `test_selected`, returns False when no filter was given — + for checks that must be opt-in (instance-wide stress runs). + """ + if not self.arguments.test_filter: + return False return any( token.lower() in name.lower() for token in self.arguments.test_filter for name in names ) @@ -1333,11 +1351,14 @@ def run_seeded_fixture_apply( """Seed the case's before-state, run it with --apply, verify, restore. Every involved repo — fixture state repos, exact rule names, and any - declared `live.involvedRepos` — is captured, seeded, verified, and - restored. Involved repos absent from after.json are canaries: they - are seeded to their before-state (empty when undeclared) and must - read back unchanged, which catches selectors matching wider than the - case intends. + declared `live.involvedRepos` — is read, seeded, verified, and + restored, all SCOPED to those repos via direct GraphQL (seconds), + never through the product's restore command (which performs a full + instance capture: minutes at 10k users, and whole-instance restore + semantics that clobber concurrently-running cases). Involved repos + absent from after.json are canaries: they are seeded to their + before-state (empty when undeclared) and must read back unchanged, + which catches selectors matching wider than the case intends. """ from tests.e2e.case_runner import case_cli_arguments @@ -1407,78 +1428,91 @@ def run_seeded_fixture_apply( name: after_grants.get(name, before_grants.get(name, set())) for name in involved_names } - with tempfile.TemporaryDirectory(prefix=f"src-auth-perms-sync-live-{case_name}-") as tmp: - seed_path = Path(tmp) / "seed-before.json" - cleanup_path = Path(tmp) / "cleanup.json" - write_state_snapshot( - seed_path, - self.endpoint, + try: + seeded = self.set_repository_states( + f"{label} [seed before-state]", + level, { - name: (repository_ids[name], sorted(before_grants.get(name, set()))) + name: (repository_ids[name], before_grants.get(name, set())) for name in involved_names }, ) - write_state_snapshot( - cleanup_path, - self.endpoint, - { - name: (repository_ids[name], sorted(original_state[name][1])) - for name in involved_names - }, + if not seeded: + return + self.check_repository_states( + f"{label} [seed verified]", + level, + {name: before_grants.get(name, set()) for name in involved_names}, ) - try: - self.run_cli_case( - CliCase( - f"{label} [seed before-state]", - restore_arguments(seed_path), - 0, - must_contain_one_of=RESTORE_SUCCESS_MARKERS, - ), - environment, - level=level, - ) - self.check_repository_states( - f"{label} [seed verified]", + + main_case = CliCase( + label, + tuple(case_cli_arguments(cast("Any", case), case_name)), + 1 if expected_errors else 0, + expected_errors, + ) + if run_main_case is not None: + result = run_main_case(main_case) + else: + result = self.run_cli_case(main_case, environment, level=level) + if expected_mutations is not None: + actual_mutations = mutations_succeeded_from_log(result.log_path) or 0 + self.record( + f"{label} [mutation count]", level, - {name: before_grants.get(name, set()) for name in involved_names}, + actual_mutations == expected_mutations, + 0.0, + f"expected {expected_mutations}, got {actual_mutations}", ) + self.check_repository_states(f"{label} [state verified]", level, expected_after) + finally: + self.set_repository_states( + f"{label} [restore original state]", + level, + original_state, + ) + self.check_repository_states( + f"{label} [restore verified]", + level, + {name: state[1] for name, state in original_state.items()}, + ) - main_case = CliCase( - label, - tuple(case_cli_arguments(cast("Any", case), case_name)), - 1 if expected_errors else 0, - expected_errors, - ) - if run_main_case is not None: - result = run_main_case(main_case) - else: - result = self.run_cli_case(main_case, environment, level=level) - if expected_mutations is not None: - actual_mutations = mutations_succeeded_from_log(result.log_path) or 0 - self.record( - f"{label} [mutation count]", - level, - actual_mutations == expected_mutations, - 0.0, - f"expected {expected_mutations}, got {actual_mutations}", - ) - self.check_repository_states(f"{label} [state verified]", level, expected_after) - finally: - self.run_cli_case( - CliCase( - f"{label} [restore original state]", - restore_arguments(cleanup_path), - 0, - must_contain_one_of=RESTORE_SUCCESS_MARKERS, - ), - environment, - level=level, - ) - self.check_repository_states( - f"{label} [restore verified]", - level, - {name: state[1] for name, state in original_state.items()}, + def set_repository_states( + self, name: str, level: str, target_grants: dict[str, tuple[int, set[str]]] + ) -> bool: + """Directly overwrite involved repos' explicit users via GraphQL. + + Scoped replacement for seeding/restoring through the product's + `restore` command, which always performs a full instance capture + (~minutes at 10k users) even for a two-repo snapshot. Writing the + involved repos directly keeps live functional cases scoped to + seconds, and keeps concurrent cases from clobbering each other's + repos. Returns True when every repo was written. + """ + started = time.monotonic() + failures: list[str] = [] + for repository_name, (database_id, usernames) in sorted(target_grants.items()): + try: + self.graphql( + SET_REPOSITORY_PERMISSIONS_MUTATION, + { + "repository": encode_repository_node_id(database_id), + "userPermissions": [ + {"bindID": username, "permission": "READ"} + for username in sorted(usernames) + ], + }, ) + except Exception as exception: + failures.append(f"{repository_name}: {exception}") + self.record( + name, + level, + not failures, + time.monotonic() - started, + "; ".join(failures) if failures else f"{len(target_grants)} repo(s) written", + ) + return not failures def check_repository_states( self, name: str, level: str, expected_grants: dict[str, set[str]] @@ -1508,7 +1542,11 @@ def run_live_permission_cycles(self, environment: dict[str, str]) -> None: # The baseline get is a prerequisite for both cycles, so it runs when # any of them is selected. want_user_cycle = self.test_selected("live: set --users apply", "user cycle") - want_full_cycle = self.test_selected("live: set --full", "full cycle") + # The full cycle applies the ROOT maps.yaml to the whole instance + # (10k users x ~1,150 repos) — an instance-wide stress run that has + # crashed the test instance's Postgres. Opt-in only: + # uv run tests/run.py --live "full cycle" + want_full_cycle = self.explicitly_selected("live: set --full", "full cycle") want_baseline = ( self.test_selected("live: get user baseline", "baseline") or want_user_cycle @@ -2365,14 +2403,9 @@ def run_property_checks(seed: int, iterations: int) -> list[PropertyCheckOutcome # --------------------------------------------------------------------------- -# Live fixture-case helpers: identity translation, seed/cleanup snapshots +# Live fixture-case helpers: identity translation, fixture-state loading # --------------------------------------------------------------------------- -RESTORE_SUCCESS_MARKERS = ( - "VALIDATION OK", - "Restore done", - "Nothing to restore", -) EXACT_REPOSITORY_SELECTOR_FIELDS = {"names"} @@ -2425,49 +2458,6 @@ def fixture_maps_repo_scope( return (rule_repository_names, "") -def write_state_snapshot( - path: Path, endpoint: str, grants: dict[str, tuple[int, list[str]]] -) -> None: - """Write a restore-compatible snapshot file describing exact repo states.""" - repos = { - str(repository_id): {"name": repository_name, "users": usernames} - for repository_name, (repository_id, usernames) in sorted(grants.items()) - } - users_with_grants = {username for _, usernames in grants.values() for username in usernames} - snapshot: dict[str, Any] = { - "schema_version": 5, - "captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"), - "endpoint": endpoint, - "bindID_mode": "USERNAME", - "config_file": None, - "config_sha256": None, - "pending_bindIDs": [], - "stats": { - "total_users_scanned": len(users_with_grants), - "users_with_explicit_grants": len(users_with_grants), - "repos_with_explicit_grants": sum(1 for _, usernames in grants.values() if usernames), - "total_grants": sum(len(usernames) for _, usernames in grants.values()), - }, - "repos": repos, - } - path.write_text(json.dumps(snapshot, indent=2) + "\n", encoding="utf-8") - - -def restore_arguments(snapshot_path: Path) -> tuple[str, ...]: - # Parallelism 8: the dominant cost of seed/cleanup restores is the full - # explicit-permissions capture (10k users in batches), which serializes - # painfully at parallelism 1; the mutation counts here are tiny. - return ( - "restore", - "--restore-path", - str(snapshot_path), - "--apply", - "--no-backup", - "--parallelism", - "8", - ) - - def decode_repository_node_id(graphql_id: str) -> int: """Decode a base64 GraphQL Repository node ID to its integer database ID.""" decoded = base64.b64decode(graphql_id, validate=True).decode() @@ -2477,6 +2467,11 @@ def decode_repository_node_id(graphql_id: str) -> int: return int(database_id) +def encode_repository_node_id(database_id: int) -> str: + """Encode an integer database ID as a base64 GraphQL Repository node ID.""" + return base64.b64encode(f"Repository:{database_id}".encode()).decode() + + def mutations_succeeded_from_log(log_path: Path | None) -> int | None: """Return the last mutations_succeeded count from a run's structured log.""" if log_path is None or not log_path.is_file(): diff --git a/tests/tests.yaml b/tests/tests.yaml index f6bb836..8d5186e 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -354,24 +354,21 @@ cases: set-users-without-explicit-perms: description: >- --users-without-explicit-perms additively grants mapped repos only to - users who currently hold no explicit grants anywhere. Live, the - preflight asserts the named users hold no grants outside the involved - repos, which makes the selection deterministic. Also pins + users who currently hold no explicit grants anywhere. The candidate + selection scans every site user by design, so the real-instance run + lives in the performance tier; the local fixture spans multiple + site-user pages (page cap 2), which is what catches selection + truncation bugs like the 2026-06-10 first-page-only one. Also pins --explicit-permissions-batch-size by forcing the smallest batch. modes: - local - - live - performance live: usersWithoutOtherGrants: - test_user_09961 - test_user_09962 - # Deliberately --parallelism 1: keeps live coverage of the SEQUENTIAL - # candidate-selection code path, where the 2026-06-10 first-page-only - # truncation bug lived. Every other case uses the CLI default so the - # suite exercises what production runs actually use. cliCommand: >- - set --users-without-explicit-perms --apply --no-backup --parallelism 1 + set --users-without-explicit-perms --apply --no-backup --explicit-permissions-batch-size 1 expectedMutations: 2 @@ -689,9 +686,12 @@ cases: - Selected 0 user(s) for get output. get-repos-filter: - description: get scoped to one repo by exact name. + description: >- + get scoped to one repo by exact name. The snapshot still scans every + user's explicit grants to find the repo's holders (measured ~400 s at + 10k users), so it runs in the performance tier. modes: - - live + - performance cliCommand: get --repos test-repo-49981 expectedExitCode: 0 expectedOutput: @@ -753,9 +753,10 @@ cases: set-full-sync-saml-orgs-dry-run: description: >- Combined permission + organization sync dispatch, full mode, dry run - only. + only. The dry-run before-capture scans every user (measured ~395 s), + so it runs in the performance tier. modes: - - live + - performance cliCommand: set --full --sync-saml-orgs expectedExitCode: 0 expectedOutput: @@ -775,9 +776,10 @@ cases: set-repos-sync-saml-orgs-dry-run: description: >- Combined permission + organization sync dispatch, repo-scoped, dry run - only. + only. The dry-run before-capture scans every user even for one repo + (measured ~400 s), so it runs in the performance tier. modes: - - live + - performance cliCommand: set --repos test-repo-49981 --sync-saml-orgs expectedExitCode: 0 expectedOutput: @@ -786,9 +788,11 @@ cases: set-users-without-perms-sync-saml-orgs-dry-run: description: >- Combined permission + organization sync dispatch, - users-without-explicit-perms mode, dry run only. + users-without-explicit-perms mode, dry run only. The unfiltered + candidate selection scans every site user, so it runs in the + performance tier. modes: - - live + - performance cliCommand: set --users-without-explicit-perms --sync-saml-orgs expectedExitCode: 0 expectedOutput: diff --git a/tests/unit/test_snapshot.py b/tests/unit/test_snapshot.py index 710133e..363c32a 100644 --- a/tests/unit/test_snapshot.py +++ b/tests/unit/test_snapshot.py @@ -135,6 +135,32 @@ def list_repo_ids( self.assertTrue(pending_counts) self.assertLessEqual(max(pending_counts), 4) + def test_capture_explicit_grants_skips_scan_when_no_repositories_selected(self) -> None: + users: list[permission_snapshot.SnapshotUser] = [ + {"id": "user-1", "username": "test_user_09991"}, + ] + + def must_not_be_called(*arguments: object, **keywords: object) -> dict[str, list[str]]: + raise AssertionError("no user lookup may run when no repos are selected") + + with patch.object( + permission_snapshot.permissions_sourcegraph, + "list_users_explicit_repo_ids", + side_effect=must_not_be_called, + ): + repos, scanned_user_count = permission_snapshot.capture_explicit_grants( + cast(src.SourcegraphClient, object()), + users, + parallelism=1, + explicit_permissions_batch_size=25, + selected_repository_ids=set(), + ) + + self.assertEqual({}, repos) + # The users iterable must still be drained: callers pass recording + # streams whose side effects feed later phases. + self.assertEqual(1, scanned_user_count) + def test_capture_explicit_grants_aborts_when_circuit_breaker_opens(self) -> None: users: list[permission_snapshot.SnapshotUser] = [ {"id": f"user-{index}", "username": f"user-{index}"} for index in range(60) From 97306db4267d43d6bd79ba91171f020e9d77e18b Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Thu, 11 Jun 2026 16:56:10 -0600 Subject: [PATCH 7/9] Add tests/setup.py: converge the instance to declared synthetic state setup.yaml declares the desired test-instance state; setup.py (dry-run default, --apply converges) verifies site config and synthetic user/repo counts, rewrites legacy marc.leblanc+ addresses to {username}@perms-sync.test (10,000 rewritten), fabricates SAML external accounts with synthetic groups via SQL on the pgsql pod (verified back through the product's own accountData parser), deletes orphaned explicit grants (60,006 rows on soft-deleted repos found and removed), and clears pending permissions. New live coverage built on those fixtures: - saml-group-live: samlGroup selector against fabricated accounts, with sales-only and no-group canaries; same fixture asserts identical behavior locally (39 s live) - set-created-after-temp-user: harness creates a temp user via createUser, {today} substitution makes --created-after select exactly it out of 10k users, then hard-deletes it (13 s live) - live.requiredSamlGroups preflight points at setup.py on drift - live hygiene bookends: pending bindIDs must be empty at tier start/end All fixtures migrated off marc.leblanc+...@sourcegraph.com addresses (49 files); regex-filters-scope now matches ^test_user_0999[12]@... Amp-Thread-ID: https://ampcode.com/threads/T-019eaec8-b78c-7386-b977-d93720ad3219 Co-authored-by: Amp --- AGENTS.md | 5 + dev/TODO.md | 18 +- dev/engineering-requests.md | 270 ++ .../mapping-efficiency.md | 0 .../memory-efficiency-analyze.py | 0 .../memory-efficiency-generate.py | 0 .../memory-efficiency-monitor-sourcegraph.sh | 0 dev/memory-efficiency.md | 257 +- dev/test-end-to-end.py | 3047 ----------------- tests/README.md | 28 + .../add-users-by-email-and-list/after.json | 6 +- .../add-users-by-email-and-list/before.json | 6 +- .../add-users-preserves-existing/after.json | 6 +- .../add-users-preserves-existing/before.json | 6 +- .../fixtures/and-filters-intersect/after.json | 4 +- .../and-filters-intersect/before.json | 4 +- .../fixtures/and-filters-intersect/maps.yaml | 2 +- .../e2e/fixtures/empty-maps-noop/before.json | 2 +- .../full-overwrite-dry-run/before.json | 6 +- .../after.json | 6 +- .../before.json | 6 +- .../fixtures/full-overwrite-unions/after.json | 6 +- .../full-overwrite-unions/before.json | 6 +- .../full-overwrite-with-backup/after.json | 6 +- .../full-overwrite-with-backup/before.json | 6 +- .../fixtures/get-full-snapshot/before.json | 2 +- .../before.json | 2 +- .../e2e/fixtures/get-user-grants/before.json | 2 +- .../fixtures/invalid-bad-regex/before.json | 2 +- .../invalid-missing-repos-section/before.json | 2 +- .../before.json | 2 +- .../before.json | 2 +- .../before.json | 2 +- .../invalid-set-unknown-repo/before.json | 2 +- .../invalid-set-unknown-user/before.json | 2 +- .../before.json | 2 +- .../match-provider-and-host-fields/after.json | 6 +- .../before.json | 6 +- tests/e2e/fixtures/no-match-noop/before.json | 2 +- .../fixtures/regex-filters-scope/after.json | 6 +- .../fixtures/regex-filters-scope/before.json | 6 +- .../fixtures/regex-filters-scope/maps.yaml | 2 +- .../restore-applies-snapshot/after.json | 4 +- .../restore-applies-snapshot/before.json | 4 +- .../fixtures/restore-dry-run-noop/before.json | 4 +- .../fixtures/restore-missing-file/before.json | 2 +- .../e2e/fixtures/saml-group-filter/after.json | 6 +- .../fixtures/saml-group-filter/before.json | 6 +- tests/e2e/fixtures/saml-group-live/after.json | 181 + .../e2e/fixtures/saml-group-live/before.json | 175 + tests/e2e/fixtures/saml-group-live/maps.yaml | 10 + .../set-created-after-temp-user/after.json | 42 + .../set-created-after-temp-user/before.json | 40 + .../set-created-after-temp-user/maps.yaml | 8 + .../set-repos-created-after-noop/before.json | 2 +- .../set-repos-created-after/after.json | 4 +- .../set-repos-created-after/before.json | 4 +- .../e2e/fixtures/set-repos-filter/after.json | 4 +- .../e2e/fixtures/set-repos-filter/before.json | 4 +- .../after.json | 4 +- .../before.json | 4 +- .../set-users-created-after-noop/before.json | 2 +- .../set-users-created-after/after.json | 6 +- .../set-users-created-after/before.json | 6 +- .../after.json | 8 +- .../before.json | 8 +- tests/e2e/test_local_cases.py | 16 +- tests/run.py | 134 +- tests/setup.py | 416 +++ tests/tests.yaml | 45 +- 70 files changed, 1491 insertions(+), 3411 deletions(-) create mode 100644 dev/engineering-requests.md rename dev/{ => memory-analysis}/mapping-efficiency.md (100%) rename dev/{ => memory-analysis}/memory-efficiency-analyze.py (100%) rename dev/{ => memory-analysis}/memory-efficiency-generate.py (100%) rename dev/{ => memory-analysis}/memory-efficiency-monitor-sourcegraph.sh (100%) delete mode 100755 dev/test-end-to-end.py create mode 100644 tests/e2e/fixtures/saml-group-live/after.json create mode 100644 tests/e2e/fixtures/saml-group-live/before.json create mode 100644 tests/e2e/fixtures/saml-group-live/maps.yaml create mode 100644 tests/e2e/fixtures/set-created-after-temp-user/after.json create mode 100644 tests/e2e/fixtures/set-created-after-temp-user/before.json create mode 100644 tests/e2e/fixtures/set-created-after-temp-user/maps.yaml create mode 100644 tests/setup.py diff --git a/AGENTS.md b/AGENTS.md index 406febb..ca79da4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,5 +1,10 @@ # AGENTS.md +## Reference materials + +- GraphQL schema and database migrations (changes to SQL schema) are available in + + ## Linting ```bash diff --git a/dev/TODO.md b/dev/TODO.md index 29d1e96..d82e06b 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -1,5 +1,21 @@ # TODO +## Medium priority: extend SAML-group live coverage to org sync + +tests/setup.py now fabricates SAML accounts with synthetic groups +(`perms-sync-test-eng` / `perms-sync-test-sales`, see tests/setup.yaml). +saml-group-live covers permission mapping; add a seeded +`sync-saml-orgs --apply` live case that maps those groups to a throwaway +org and asserts membership is added AND removed (today's +sync-saml-orgs-apply only covers the single real Okta user, add-only). + +## Decide: pendingBindIDs / usersWithPendingPermissions + +The CLI cannot create pending permissions (it validates users exist), but +snapshots record `pending_bindIDs` and setup.py clears leftovers. Decide +whether "grant before first login" is a customer need; if not, consider +dropping the snapshot field. See the thread discussion 2026-06-11. + ## High priority: Remote trigger on demand - Sourcegraph webhook for new user coming in v7.4.0 @@ -21,7 +37,7 @@ ## High priority: Reduce worst-case full-permission sync load - Use the stress-run evidence in - [memory-efficiency.md](./memory-efficiency.md) + [engineering-requests.md](./engineering-requests.md) to request Sourcegraph bulk explicit-permission read and write APIs. New evidence 2026-06-10: the whole-instance apply (1,150 repo overwrites x 10,002 bindIDs each at parallelism 16) crashed the test diff --git a/dev/engineering-requests.md b/dev/engineering-requests.md new file mode 100644 index 0000000..66cde1b --- /dev/null +++ b/dev/engineering-requests.md @@ -0,0 +1,270 @@ +# Engineering requests + +Use this when opening Sourcegraph Engineering issues from memory-efficiency +evidence. Capture steps stay in [memory-efficiency.md](./memory-efficiency.md); +this file keeps the request-ready problem statement, evidence, proposed API +shape, and copy/paste issue text. + +## Requested Sourcegraph changes + +1. Add a bulk GraphQL read path for explicit API repository permissions. +2. Add a cheaper presence/filter path for users without explicit API repo + permissions. +3. Add Jaeger spans / metrics around the new store methods and around current + `ListUserPermissions` / `CountUserPermissions` paths. +4. Follow up with a bulk overwrite API for large full-set applies. + +## Current trace findings + +Current `src-auth-perms-sync` snapshots explicit API grants by calling +`User.permissionsInfo.repositories(source: API)` through aliased +`UserExplicitReposBatch` queries. It requests only permission repo IDs, then +hydrates names separately with `RepositoryNamesByID`. + +A focused traced batch for one user with 19 explicit repos showed per-user +fanout even when only IDs were requested: + +| User aliases | CLI request | Jaeger spans | `LoadUserPermissions` | `sql.conn.query` | +| ---: | ---: | ---: | ---: | ---: | +| 1 | 398ms | 13 | 1 | 7 | +| 25 | 508ms | 157 | 25 | 127 | +| 100 | 1,185ms | 607 | 100 | 502 | + +The second hydration query also fans out. A traced `RepositoryNamesByID` query +for 19 repos produced 46 spans, including 19 `repos.Get` spans and 22 +`sql.conn.query` spans. + +An older trace shape that resolved repository objects directly inside +`permissionsInfo.repositories` showed the per-repo resolver fanout more +dramatically: + +| Request shape | Root GraphQL span | Jaeger fanout | +| --- | ---: | --- | +| 25 user aliases, 19 explicit repos each | ~770ms | 475 `repos.Get`, 603 `sql.conn.query` | +| 100 user aliases, 19 explicit repos each | ~3,769ms | 1,900 `repos.Get`, 2,403 `sql.conn.query` | + +Together these point to Sourcegraph server-side GraphQL / DB resolver fanout, +not local Python CPU. Larger batches reduce request count but can increase +per-request resolver and SQL work enough to cause timeouts on the test +instance. + +One live-instance behavior is expected: if Sourcegraph returns a GraphQL +application error showing that a repo/user disappeared between planning and the +mutation, `src-auth-perms-sync` logs a skipped mutation and continues. The next +scheduled run will re-plan against the then-current users/repos. Other GraphQL +application errors still fail normally. + +## Stress-run evidence + +A prior hard stress map used about 10,001 users and about 1,000 repos, planning +roughly 10 million explicit grants. That run showed Sourcegraph-side read and +write costs were the bottleneck. `pg_stat_statements` attributed most database +time to explicit-permissions helpers: + +| Sourcegraph operation | Calls | Total time | Mean time | +| --- | ---: | ---: | ---: | +| `permsStore.ListUserPermissions` | 19,974 | 30,862.6s | 1,545ms | +| `permsStore.upsertUserRepoPermissions-range1` | 472 | 1,178.8s | 2,497ms | + +Compared with focused traces at normal scale, `ListUserPermissions` became much +slower under the large explicit-perms state. This reinforces that the CLI needs +better Sourcegraph bulk read and write APIs for very large explicit permission +sets. + +## Concurrent-operator evidence (2026-06-10) + +Four `src-auth-perms-sync` processes ran full explicit-permissions captures +concurrently against the 10k-user / 50k-repo test instance (each at +`--parallelism 8`, `--explicit-permissions-batch-size 25`), while a fifth ran a +small `set` command. Instance: single `pgsql-0` on an 8-core node. + +Observed during the concurrent captures: + +- `pgsql-0` CPU (`kubectl top`): 7,636–7,683 millicores of 8,000 (saturated). +- `frontend` / `gitserver` CPU: 124–138m / 2–3m (idle bystanders). +- `pg_stat_activity`: 29 active statements, all + `permsStore.ListUserPermissions`, **zero wait events** — pure CPU, no lock + contention. +- `pg_stat_statements`: `permsStore.ListUserPermissions` at 24,026 calls, + 27,635.6s total, 1,150ms mean. +- Per-client capture throughput: 23 users/sec solo → 2–4 users/sec at 4-way + concurrency. +- Aggregate throughput: 8–16 users/sec at 4-way — **below the 23 users/sec a + single client achieves alone** (negative scaling). +- ALB (CloudWatch): no 5xx, no rejected connections — the edge and frontend + are not the bottleneck. +- Collateral failure: the fifth client's queries exceeded the 60s read timeout + under this load; 5 retry attempts exhausted; its run failed with exit 1. + +Implications for the engineering request: + +- A single per-user `permissionsInfo.repositories(source: API)` read costs + roughly 0.3–0.4s of Postgres CPU at this state size (1,150ms mean execution + under contention), so one operator at modest parallelism can saturate the + database by itself, and two concurrent operators degrade each other below + single-operator throughput. +- Timeout/retry behavior amplifies the problem: once statements exceed the + client read timeout, retries re-run the same expensive queries, adding load + exactly when the database is saturated. +- A bulk read API (one query returning explicit grants for many users or for + whole repos) would replace ~10,000 × ~1s statements per capture with a + single scan, and would also make concurrent operators viable. + +## Sourcegraph codepath findings + +[Deep Search findings](https://sourcegraph.sourcegraph.com/deepsearch/52a24164-1eb3-4db1-a92d-e320ef1c7557) +from `github.com/sourcegraph/sourcegraph`: + +- Schema: `cmd/frontend/graphqlbackend/authz.graphql` exposes + `User.permissionsInfo.repositories(source: PermissionSource)`. +- `UserResolver.PermissionsInfo` enters + `cmd/frontend/internal/authz/resolvers/resolver.go` and calls + `db.Perms().LoadUserPermissions(ctx, userID)` before the repositories + connection is resolved. +- `userPermissionsInfoResolver.Repositories` in + `cmd/frontend/internal/authz/resolvers/permissions_info.go` uses the generic + connection resolver, so `nodes` and `totalCount` can evaluate separately. +- Each permission node's `Repository()` resolver calls `db.Repos().Get`, + creating an N+1 query pattern for repository hydration. +- Even when the client asks only for permission repo IDs, each aliased user + still runs `LoadUserPermissions` and several SQL queries. Current + `src-auth-perms-sync` then hydrates repository names separately through + `node(id)`, which also resolves as one `repos.Get` per repository ID. +- `internal/database/perms_store.go` has bulk write helpers for setting repo + permissions, but the read path uses per-user connection queries and repo + resolver fanout. + +## Proposed bulk read API + +`src-auth-perms-sync` needs to snapshot explicit API permissions for many +users. Today it calls `User.permissionsInfo.repositories(source: API)` with +GraphQL aliases. This is correct, but expensive at scale. + +Request a bulk read API for explicit permissions. GraphQL semantics make this a +query, not a mutation: + +```graphql +type ExplicitRepositoryPermission { + userID: ID! + repositoryID: ID! + repositoryName: String! + updatedAt: DateTime! +} + +extend type Query { + explicitRepositoryPermissionsForUsers( + userIDs: [ID!]! + source: PermissionSource = API + ): [ExplicitRepositoryPermission!]! +} +``` + +Back it with one SQL shape per user batch: + +```sql +SELECT urp.user_id, urp.repo_id, repo.name, urp.updated_at +FROM user_repo_permissions urp +JOIN repo ON repo.id = urp.repo_id AND repo.deleted_at IS NULL +WHERE urp.user_id = ANY($1) + AND urp.source = 'api' +ORDER BY urp.user_id, repo.name; +``` + +Important requirements: + +- Return compact scalar data, not `Repository` GraphQL objects, to avoid + per-repo resolver hydration. +- Enforce the same authorization policy as the current user permissions + resolver. +- Support batching / pagination for large user lists. +- Add Jaeger spans around the new store method and around existing + `ListUserPermissions` / `CountUserPermissions` so future investigations do + not require inferring work from `sql.conn.query` spans alone. + +Expected benefit: replace hundreds or thousands of per-repo resolver SQL spans +per request with one indexed `user_repo_permissions` join per user batch. + +## Proposed presence/filter API + +The `get --users-without-explicit-perms` path also needs a cheaper presence +check. Today it has to ask +`User.permissionsInfo.repositories(source: API, first: 1)` for every candidate +user, in aliased batches. Recent test runs show the client can parallelize +those batches, but the Sourcegraph frontend / load balancer can still return +502/503s under that resolver load. Add one or both direct APIs: + +```graphql +type ExplicitRepositoryPermissionPresence { + userID: ID! + hasExplicitRepositoryPermissions: Boolean! +} + +extend type Query { + explicitRepositoryPermissionPresenceForUsers( + userIDs: [ID!]! + source: PermissionSource = API + ): [ExplicitRepositoryPermissionPresence!]! + + usersWithoutExplicitRepositoryPermissions( + createdAt: DateTimeFilter + source: PermissionSource = API + first: Int + after: String + ): UserConnection! +} +``` + +Expected benefit: `src-auth-perms-sync get --users-without-explicit-perms` can +either check explicit-permission presence for candidate users in one indexed +batch query, or ask Sourcegraph for the filtered user set directly instead of +probing every user through the expensive permissions connection resolver. + +## Bulk overwrite follow-up + +The stress profile also needs attention on the write path. A purpose-built bulk +overwrite API that accepts many repo/user edges at once, streams or stages the +input server-side, and avoids repeated per-repo permission reconciliation would +make worst-case full syncs much safer. + +## Copy/paste request + +Title: Add a bulk GraphQL read path for explicit repository permissions + +Problem: `src-auth-perms-sync` must snapshot explicit API repo permissions for +many users. The only current GraphQL read path is +`User.permissionsInfo.repositories(source: API)`. Current traces show this is +per-user work even when the client asks only for repo IDs: 25 aliases produced +25 `LoadUserPermissions` spans and 127 SQL spans; 100 aliases produced 100 +`LoadUserPermissions` spans and 502 SQL spans. The client must then hydrate +repository names separately; a 19-repo `RepositoryNamesByID` query produced 19 +`repos.Get` spans and 22 SQL spans. Older traces that resolved repository +objects directly inside `permissionsInfo.repositories` produced 475 `repos.Get` +spans for 25 aliases and 1,900 for 100 aliases. Larger batches and higher +concurrency therefore increase server-side resolver/SQL fanout enough to cause +timeouts instead of improving throughput. + +Request: add a bulk explicit-permissions read API that accepts many user IDs and +returns compact permission edges (`userID`, `repositoryID`, `repositoryName`, +`updatedAt`) for `source: API`, without resolving full `Repository` GraphQL +objects. A single indexed query over `user_repo_permissions` joined to `repo` +should be enough for each user batch. Also add a cheaper presence/filter path +for `get --users-without-explicit-perms`: either `userID -> has explicit API +repo permissions` for many users, or a direct query for users without explicit +API repo permissions, optionally filtered by `createdAt`. + +Acceptance criteria: + +- One request can fetch explicit API repo permissions for many users. +- The response includes repository ID and name without triggering per-repo + `db.Repos().Get` resolver calls. +- The implementation preserves current authorization checks. +- The store method and resolver have Jaeger spans/metrics that make per-batch + latency visible. +- `src-auth-perms-sync` can replace its aliased + `User.permissionsInfo.repositories(source: API)` calls with this API. +- `src-auth-perms-sync get --users-without-explicit-perms` can stop probing + every candidate user through `User.permissionsInfo.repositories(source: API, + first: 1)`. +- Follow-up: evaluate a bulk overwrite API for large full-set applies. The + stress run planned roughly 10 million grants and observed + `permsStore.upsertUserRepoPermissions-range1` averaging about 2.5s per call. diff --git a/dev/mapping-efficiency.md b/dev/memory-analysis/mapping-efficiency.md similarity index 100% rename from dev/mapping-efficiency.md rename to dev/memory-analysis/mapping-efficiency.md diff --git a/dev/memory-efficiency-analyze.py b/dev/memory-analysis/memory-efficiency-analyze.py similarity index 100% rename from dev/memory-efficiency-analyze.py rename to dev/memory-analysis/memory-efficiency-analyze.py diff --git a/dev/memory-efficiency-generate.py b/dev/memory-analysis/memory-efficiency-generate.py similarity index 100% rename from dev/memory-efficiency-generate.py rename to dev/memory-analysis/memory-efficiency-generate.py diff --git a/dev/memory-efficiency-monitor-sourcegraph.sh b/dev/memory-analysis/memory-efficiency-monitor-sourcegraph.sh similarity index 100% rename from dev/memory-efficiency-monitor-sourcegraph.sh rename to dev/memory-analysis/memory-efficiency-monitor-sourcegraph.sh diff --git a/dev/memory-efficiency.md b/dev/memory-efficiency.md index 8c38ec9..e7482f8 100644 --- a/dev/memory-efficiency.md +++ b/dev/memory-efficiency.md @@ -2,8 +2,9 @@ Use this when full snapshot capture or full-set apply is slow. The goal is to correlate `src-auth-perms-sync` structured logs with Sourcegraph Jaeger spans -and pod/Postgres load, then use the evidence to ask Sourcegraph engineering for -bulk explicit-permissions APIs. +and pod/Postgres load. Request-ready evidence and copy/paste text for +Sourcegraph Engineering live in +[engineering-requests.md](./engineering-requests.md). ## Capture a focused trace @@ -169,252 +170,8 @@ pg_stat_statements` and `pg_stat_statements_reset()` through `kubectl exec` against `pod/pgsql-0`, so statement summaries start clean for the monitored run. -## Current trace findings - -Current `src-auth-perms-sync` snapshots explicit API grants by calling -`User.permissionsInfo.repositories(source: API)` through aliased -`UserExplicitReposBatch` queries. It requests only permission repo IDs, then -hydrates names separately with `RepositoryNamesByID`. - -A focused traced batch for one user with 19 explicit repos showed per-user -fanout even when only IDs were requested: - -| User aliases | CLI request | Jaeger spans | `LoadUserPermissions` | `sql.conn.query` | -| ---: | ---: | ---: | ---: | ---: | -| 1 | 398ms | 13 | 1 | 7 | -| 25 | 508ms | 157 | 25 | 127 | -| 100 | 1,185ms | 607 | 100 | 502 | - -The second hydration query also fans out. A traced `RepositoryNamesByID` query -for 19 repos produced 46 spans, including 19 `repos.Get` spans and 22 -`sql.conn.query` spans. - -An older trace shape that resolved repository objects directly inside -`permissionsInfo.repositories` showed the per-repo resolver fanout more -dramatically: - -| Request shape | Root GraphQL span | Jaeger fanout | -| --- | ---: | --- | -| 25 user aliases, 19 explicit repos each | ~770ms | 475 `repos.Get`, 603 `sql.conn.query` | -| 100 user aliases, 19 explicit repos each | ~3,769ms | 1,900 `repos.Get`, 2,403 `sql.conn.query` | - -Together these point to Sourcegraph server-side GraphQL / DB resolver fanout, -not local Python CPU. Larger batches reduce request count but can increase -per-request resolver and SQL work enough to cause timeouts on the test -instance. - -One live-instance behavior is expected: if Sourcegraph returns a GraphQL -application error showing that a repo/user disappeared between planning and the -mutation, `src-auth-perms-sync` logs a skipped mutation and continues. The next -scheduled run will re-plan against the then-current users/repos. Other GraphQL -application errors still fail normally. - -## Stress-run evidence - -A prior hard stress map used about 10,001 users and about 1,000 repos, planning -roughly 10 million explicit grants. That run showed Sourcegraph-side read and -write costs were the bottleneck. `pg_stat_statements` attributed most database -time to explicit-permissions helpers: - -| Sourcegraph operation | Calls | Total time | Mean time | -| --- | ---: | ---: | ---: | -| `permsStore.ListUserPermissions` | 19,974 | 30,862.6s | 1,545ms | -| `permsStore.upsertUserRepoPermissions-range1` | 472 | 1,178.8s | 2,497ms | - -Compared with focused traces at normal scale, `ListUserPermissions` became much -slower under the large explicit-perms state. This reinforces that the CLI needs -better Sourcegraph bulk read and write APIs for very large explicit permission -sets. - -## Concurrent-operator evidence (2026-06-10) - -Four `src-auth-perms-sync` processes ran full explicit-permissions captures -concurrently against the 10k-user / 50k-repo test instance (each at -`--parallelism 8`, `--explicit-permissions-batch-size 25`), while a fifth ran -a small `set` command. Instance: single `pgsql-0` on an 8-core node. - -Observed during the concurrent captures: - -- `pgsql-0` CPU (`kubectl top`): 7,636–7,683 millicores of 8,000 (saturated). -- `frontend` / `gitserver` CPU: 124–138m / 2–3m (idle bystanders). -- `pg_stat_activity`: 29 active statements, all - `permsStore.ListUserPermissions`, **zero wait events** — pure CPU, no lock - contention. -- `pg_stat_statements`: `permsStore.ListUserPermissions` at 24,026 calls, - 27,635.6s total, 1,150ms mean. -- Per-client capture throughput: 23 users/sec solo → 2–4 users/sec at 4-way - concurrency. -- Aggregate throughput: 8–16 users/sec at 4-way — **below the 23 users/sec a - single client achieves alone** (negative scaling). -- ALB (CloudWatch): no 5xx, no rejected connections — the edge and frontend - are not the bottleneck. -- Collateral failure: the fifth client's queries exceeded the 60s read - timeout under this load; 5 retry attempts exhausted; its run failed with - exit 1. - -Implications for the engineering request: - -- A single per-user `permissionsInfo.repositories(source: API)` read costs - roughly 0.3–0.4s of Postgres CPU at this state size (1,150ms mean execution - under contention), so one operator at modest parallelism can saturate the - database by itself, and two concurrent operators degrade each other below - single-operator throughput. -- Timeout/retry behavior amplifies the problem: once statements exceed the - client read timeout, retries re-run the same expensive queries, adding load - exactly when the database is saturated. -- A bulk read API (one query returning explicit grants for many users or for - whole repos) would replace ~10,000 × ~1s statements per capture with a - single scan, and would also make concurrent operators viable. - -## Sourcegraph engineering request - -`src-auth-perms-sync` needs to snapshot explicit API permissions for many -users. Today it calls `User.permissionsInfo.repositories(source: API)` with -GraphQL aliases. This is correct, but expensive at scale. - -[Deep Search findings](https://sourcegraph.sourcegraph.com/deepsearch/52a24164-1eb3-4db1-a92d-e320ef1c7557) -from `github.com/sourcegraph/sourcegraph`: - -- Schema: `cmd/frontend/graphqlbackend/authz.graphql` exposes - `User.permissionsInfo.repositories(source: PermissionSource)`. -- `UserResolver.PermissionsInfo` enters - `cmd/frontend/internal/authz/resolvers/resolver.go` and calls - `db.Perms().LoadUserPermissions(ctx, userID)` before the repositories - connection is resolved. -- `userPermissionsInfoResolver.Repositories` in - `cmd/frontend/internal/authz/resolvers/permissions_info.go` uses the generic - connection resolver, so `nodes` and `totalCount` can evaluate separately. -- Each permission node's `Repository()` resolver calls `db.Repos().Get`, - creating an N+1 query pattern for repository hydration. -- Even when the client asks only for permission repo IDs, each aliased user - still runs `LoadUserPermissions` and several SQL queries. Current - `src-auth-perms-sync` then hydrates repository names separately through - `node(id)`, which also resolves as one `repos.Get` per repository ID. -- `internal/database/perms_store.go` has bulk write helpers for setting repo - permissions, but the read path uses per-user connection queries and repo - resolver fanout. - -Request a bulk read API for explicit permissions. GraphQL semantics make this -a query, not a mutation: - -```graphql -type ExplicitRepositoryPermission { - userID: ID! - repositoryID: ID! - repositoryName: String! - updatedAt: DateTime! -} - -extend type Query { - explicitRepositoryPermissionsForUsers( - userIDs: [ID!]! - source: PermissionSource = API - ): [ExplicitRepositoryPermission!]! -} -``` - -Back it with one SQL shape per user batch: - -```sql -SELECT urp.user_id, urp.repo_id, repo.name, urp.updated_at -FROM user_repo_permissions urp -JOIN repo ON repo.id = urp.repo_id AND repo.deleted_at IS NULL -WHERE urp.user_id = ANY($1) - AND urp.source = 'api' -ORDER BY urp.user_id, repo.name; -``` - -Important requirements: - -- Return compact scalar data, not `Repository` GraphQL objects, to avoid - per-repo resolver hydration. -- Enforce the same authorization policy as the current user permissions - resolver. -- Support batching / pagination for large user lists. -- Add Jaeger spans around the new store method and around existing - `ListUserPermissions` / `CountUserPermissions` so future investigations do - not require inferring work from `sql.conn.query` spans alone. - -Expected benefit: replace hundreds or thousands of per-repo resolver SQL spans -per request with one indexed `user_repo_permissions` join per user batch. - -The `get --users-without-explicit-perms` path also needs a cheaper presence -check. Today it has to ask -`User.permissionsInfo.repositories(source: API, first: 1)` for every candidate -user, in aliased batches. Recent test runs show the client can parallelize -those batches, but the Sourcegraph frontend / load balancer can still return -502/503s under that resolver load. Add one or both direct APIs: - -```graphql -type ExplicitRepositoryPermissionPresence { - userID: ID! - hasExplicitRepositoryPermissions: Boolean! -} - -extend type Query { - explicitRepositoryPermissionPresenceForUsers( - userIDs: [ID!]! - source: PermissionSource = API - ): [ExplicitRepositoryPermissionPresence!]! - - usersWithoutExplicitRepositoryPermissions( - createdAt: DateTimeFilter - source: PermissionSource = API - first: Int - after: String - ): UserConnection! -} -``` +## Engineering requests -Expected benefit: `src-auth-perms-sync get --users-without-explicit-perms` -can either check explicit-permission presence for candidate users in one indexed -batch query, or ask Sourcegraph for the filtered user set directly instead of -probing every user through the expensive permissions connection resolver. - -The stress profile also needs attention on the write path. A purpose-built -bulk overwrite API that accepts many repo/user edges at once, streams or stages -the input server-side, and avoids repeated per-repo permission reconciliation -would make worst-case full syncs much safer. - -## Copy/paste request - -Title: Add a bulk GraphQL read path for explicit repository permissions - -Problem: `src-auth-perms-sync` must snapshot explicit API repo permissions for -many users. The only current GraphQL read path is -`User.permissionsInfo.repositories(source: API)`. Current traces show this is -per-user work even when the client asks only for repo IDs: 25 aliases produced -25 `LoadUserPermissions` spans and 127 SQL spans; 100 aliases produced 100 -`LoadUserPermissions` spans and 502 SQL spans. The client must then hydrate -repository names separately; a 19-repo `RepositoryNamesByID` query produced 19 -`repos.Get` spans and 22 SQL spans. Older traces that resolved repository -objects directly inside `permissionsInfo.repositories` produced 475 `repos.Get` -spans for 25 aliases and 1,900 for 100 aliases. Larger batches and higher -concurrency therefore increase server-side resolver/SQL fanout enough to cause -timeouts instead of improving throughput. - -Request: add a bulk explicit-permissions read API that accepts many user IDs and -returns compact permission edges (`userID`, `repositoryID`, `repositoryName`, -`updatedAt`) for `source: API`, without resolving full `Repository` GraphQL -objects. A single indexed query over `user_repo_permissions` joined to `repo` -should be enough for each user batch. Also add a cheaper presence/filter path -for `get --users-without-explicit-perms`: either `userID -> has explicit API -repo permissions` for many users, or a direct query for users without explicit -API repo permissions, optionally filtered by `createdAt`. - -Acceptance criteria: - -- One request can fetch explicit API repo permissions for many users. -- The response includes repository ID and name without triggering per-repo - `db.Repos().Get` resolver calls. -- The implementation preserves current authorization checks. -- The store method and resolver have Jaeger spans/metrics that make per-batch - latency visible. -- `src-auth-perms-sync` can replace its aliased - `User.permissionsInfo.repositories(source: API)` calls with this API. -- `src-auth-perms-sync get --users-without-explicit-perms` can stop probing - every candidate user through `User.permissionsInfo.repositories(source: API, - first: 1)`. -- Follow-up: evaluate a bulk overwrite API for large full-set applies. The - stress run planned roughly 10 million grants and observed - `permsStore.upsertUserRepoPermissions-range1` averaging about 2.5s per call. +Request-ready trace findings, stress evidence, Sourcegraph codepath notes, +proposed GraphQL APIs, and copy/paste issue text now live in +[engineering-requests.md](./engineering-requests.md). diff --git a/dev/test-end-to-end.py b/dev/test-end-to-end.py deleted file mode 100755 index b6952cd..0000000 --- a/dev/test-end-to-end.py +++ /dev/null @@ -1,3047 +0,0 @@ -#!/usr/bin/env python3 -"""Run src-auth-perms-sync end-to-end cases and assert expected outcomes. - -This is an integration smoke runner for a real Sourcegraph test instance. It -uses the same CLI entrypoint an operator uses (`uv run src-auth-perms-sync`) and -checks both process exit codes and structured `run` log records. - -The script covers every major command path: read-only, dry-run, -invalid-argument, no-op apply, mutating apply, and overwrite/restore. It avoids -running the same expensive full-snapshot path more than once when another case -already covers that behavior. -""" - -from __future__ import annotations - -import contextlib -import csv -import datetime -import heapq -import json -import os -import re -import shlex -import signal -import statistics -import subprocess -import sys -import threading -import time -from collections.abc import Iterable, Mapping, Sequence -from concurrent.futures import Future -from concurrent.futures import wait as wait_for_futures -from dataclasses import dataclass -from pathlib import Path -from typing import Any, TextIO, cast -from urllib.parse import urlsplit - -import src_py_lib as src -from src_py_lib.clients.sourcegraph import sourcegraph_trace_from_headers, summarize_jaeger_trace - -LOG_PATH_PATTERN = re.compile(r"Writing log events to (.+?/log\.json)\.") -SAFE_PATH_PART_PATTERN = re.compile(r"[^A-Za-z0-9_.-]+") -DEFAULT_FUTURE_DATE = "2099-01-01" -REMOVED_SRC_AUTH_PERMS_SYNC_ENVIRONMENT_PREFIX = "SRC_AUTH_PERMS_SYNC_" -DEFAULT_SAMPLE_INTERVAL_SECONDS = 1.0 -DEFAULT_REPEAT_COUNT = 1 -DEFAULT_JAEGER_TRACE_LIMIT: int | None = None -DEFAULT_JAEGER_TRACE_PARALLELISM = 8 -DEFAULT_JAEGER_INITIAL_DELAY_SECONDS = 35.0 -DEFAULT_JAEGER_RETRY_DELAYS_SECONDS = ( - 2.0, - 5.0, - 10.0, - 20.0, - 30.0, - 60.0, - 60.0, - 60.0, - 60.0, - 60.0, - 60.0, -) -DEFAULT_PARALLELISM = 4 -DEFAULT_FULL_RESTORE_PARALLELISM = 1 -DEFAULT_INCLUDE_REDUNDANT_SCALE_CASES = False -DEFAULT_MEMORY_SUMMARY_LIMIT = 20 -DEFAULT_SRC_AUTH_PERMS_SYNC_COMMAND = "uv run src-auth-perms-sync" -DEFAULT_SOURCEGRAPH_MONITOR_NAMESPACE = "m" -DEFAULT_SOURCEGRAPH_MONITOR_INTERVAL_SECONDS = 5 -DEFAULT_SOURCEGRAPH_MONITOR_POSTGRES_INTERVAL_SECONDS = 10 -DEFAULT_SOURCEGRAPH_MONITOR_STATEMENTS_INTERVAL_SECONDS = 30 -DEFAULT_SOURCEGRAPH_MONITOR_FRONTEND_TARGET = "deployment/sourcegraph-frontend" -DEFAULT_SOURCEGRAPH_MONITOR_POSTGRES_TARGET = "pod/pgsql-0" -DEFAULT_SOURCEGRAPH_MONITOR_PSQL_COMMAND = "psql -X -U sg -d sg" - - -def format_jaeger_retry_delays(delays: Sequence[float]) -> str: - """Return retry delays in the format accepted by --jaeger-retry-delays.""" - return ",".join(f"{delay:g}" for delay in delays) - - -class EndToEndConfig(src.SourcegraphClientConfig, src.LoggingConfig): - """Config values for the end-to-end runner.""" - - src_endpoint: str = src.config_field( - default="", - env_var="SRC_ENDPOINT", - cli_flag="--src-endpoint", - cli_aliases=("--endpoint",), - metavar="URL", - help="Sourcegraph test instance URL", - required=True, - ) - src_access_token: str = src.config_field( - default="", - env_var="SRC_ACCESS_TOKEN", - cli_flag="--src-access-token", - cli_aliases=("--access-token",), - metavar="TOKEN", - help="Sourcegraph access token, or op:// secret reference", - secret=True, - required=True, - ) - src_auth_perms_sync_command: str = src.config_field( - default=DEFAULT_SRC_AUTH_PERMS_SYNC_COMMAND, - env_var="SRC_AUTH_PERMS_SYNC_E2E_COMMAND", - cli_flag="--src-auth-perms-sync-command", - help=( - "Candidate command used to invoke the CLI " - f"(default: {DEFAULT_SRC_AUTH_PERMS_SYNC_COMMAND})" - ), - ) - candidate_command: str | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_CANDIDATE_COMMAND", - cli_flag="--candidate-command", - help="Candidate command to compare; overrides --src-auth-perms-sync-command", - ) - baseline_command: str | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_BASELINE_COMMAND", - cli_flag="--baseline-command", - help="Optional baseline command. When set, baseline and candidate results are compared.", - ) - repeat: int = src.config_field( - default=DEFAULT_REPEAT_COUNT, - env_var="SRC_AUTH_PERMS_SYNC_E2E_REPEAT", - cli_flag="--repeat", - metavar="N", - ge=1, - help=( - "Number of times to run each command for each variant " - f"(default: {DEFAULT_REPEAT_COUNT})" - ), - ) - user: str = src.config_field( - default="", - env_var="SRC_AUTH_PERMS_SYNC_TEST_USER", - cli_flag="--user", - metavar="USER", - help="Sourcegraph user for user-scoped get/set/restore cases (default: USER)", - ) - future_date: str = src.config_field( - default=DEFAULT_FUTURE_DATE, - env_var="SRC_AUTH_PERMS_SYNC_E2E_FUTURE_DATE", - cli_flag="--future-date", - metavar="YYYY-MM-DD", - pattern=r"^\d{4}-\d{2}-\d{2}$", - help=f"YYYY-MM-DD date expected to match no users (default: {DEFAULT_FUTURE_DATE})", - ) - parallelism: int = src.config_field( - default=DEFAULT_PARALLELISM, - env_var="SRC_AUTH_PERMS_SYNC_E2E_PARALLELISM", - cli_flag="--parallelism", - metavar="N", - ge=1, - help=f"Parallelism for light mutation/no-op apply cases (default: {DEFAULT_PARALLELISM})", - ) - full_restore_parallelism: int = src.config_field( - default=DEFAULT_FULL_RESTORE_PARALLELISM, - env_var="SRC_AUTH_PERMS_SYNC_E2E_FULL_RESTORE_PARALLELISM", - cli_flag="--full-restore-parallelism", - metavar="N", - ge=1, - help=( - "Parallelism for the expensive full restore cleanup " - f"(default: {DEFAULT_FULL_RESTORE_PARALLELISM})" - ), - ) - include_redundant_scale_cases: bool = src.config_field( - default=DEFAULT_INCLUDE_REDUNDANT_SCALE_CASES, - env_var="SRC_AUTH_PERMS_SYNC_E2E_INCLUDE_REDUNDANT_SCALE_CASES", - cli_flag="--include-redundant-scale-cases", - cli_action="store_true", - help=( - "Also run older overlapping full-scale cases. Default keeps one heavy full " - "snapshot path and uses smaller cases for overlapping coverage." - ), - ) - allow_non_test_endpoint: bool = src.config_field( - default=False, - env_var="SRC_AUTH_PERMS_SYNC_E2E_ALLOW_NON_TEST_ENDPOINT", - cli_flag="--allow-non-test-endpoint", - cli_action="store_true", - help="Allow mutating cases outside localhost/sgdev endpoints", - ) - keep_going: bool = src.config_field( - default=False, - env_var="SRC_AUTH_PERMS_SYNC_E2E_KEEP_GOING", - cli_flag="--keep-going", - cli_action="store_true", - help="Continue after assertion failures where it is safe to do so", - ) - fetch_sg_traces: bool = src.config_field( - default=False, - env_var="SRC_AUTH_PERMS_SYNC_E2E_FETCH_SG_TRACES", - cli_flag="--fetch-sg-traces", - cli_action="store_true", - help="Pass --fetch-sg-traces to each child src-auth-perms-sync command", - ) - jaeger_trace_limit: int | None = src.config_field( - default=DEFAULT_JAEGER_TRACE_LIMIT, - env_var="SRC_AUTH_PERMS_SYNC_E2E_JAEGER_TRACE_LIMIT", - cli_flag="--jaeger-trace-limit", - metavar="N", - ge=0, - help=( - "When --fetch-sg-traces is set, fetch and summarize the N slowest GraphQL " - "Jaeger traces " - "while each child command runs; omit for all traces, set 0 to disable" - ), - ) - jaeger_trace_parallelism: int = src.config_field( - default=DEFAULT_JAEGER_TRACE_PARALLELISM, - env_var="SRC_AUTH_PERMS_SYNC_E2E_JAEGER_TRACE_PARALLELISM", - cli_flag="--jaeger-trace-parallelism", - metavar="N", - ge=1, - help=( - "Concurrent Jaeger trace fetch requests when --fetch-sg-traces is set " - f"(default: {DEFAULT_JAEGER_TRACE_PARALLELISM})" - ), - ) - jaeger_initial_delay_seconds: float = src.config_field( - default=DEFAULT_JAEGER_INITIAL_DELAY_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_JAEGER_INITIAL_DELAY_SECONDS", - cli_flag="--jaeger-initial-delay-seconds", - metavar="SECONDS", - ge=0, - help=( - "Seconds to wait before first fetching each Jaeger trace, to allow OTel tail " - f"sampling to decide (default: {DEFAULT_JAEGER_INITIAL_DELAY_SECONDS:g})" - ), - ) - jaeger_trace_jsonl: Path | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_JAEGER_TRACE_JSONL", - cli_flag="--jaeger-trace-jsonl", - metavar="PATH", - help=( - "Write Jaeger trace summaries incrementally as JSON Lines. Defaults to a sibling " - "of --results-json or --results-csv when --fetch-sg-traces is set." - ), - ) - jaeger_trace_directory: Path | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_JAEGER_TRACE_DIR", - cli_flag="--jaeger-trace-dir", - metavar="PATH", - help=( - "Directory where complete raw Jaeger trace JSON files are written. Defaults " - "to a sibling directory of --results-json or --results-csv when --fetch-sg-traces " - "is set." - ), - ) - jaeger_retry_delays: tuple[float, ...] = src.config_field( - default=DEFAULT_JAEGER_RETRY_DELAYS_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_JAEGER_RETRY_DELAYS", - cli_flag="--jaeger-retry-delays", - metavar="SECONDS[,SECONDS...]", - help=( - "Comma-separated delays between queued Jaeger trace fetch retries. " - "Each value schedules one retry after the initial fetch; add more values " - "to try for longer " - f"(default: {format_jaeger_retry_delays(DEFAULT_JAEGER_RETRY_DELAYS_SECONDS)})" - ), - ) - sample_interval: float = src.config_field( - default=DEFAULT_SAMPLE_INTERVAL_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_SAMPLE_INTERVAL", - cli_flag="--sample-interval", - metavar="SECONDS", - ge=0, - help=( - "Seconds between child resource_sample log events. The run end record always " - "includes peak_rss_mb; set 0 to disable samples. Default: " - f"{DEFAULT_SAMPLE_INTERVAL_SECONDS}" - ), - ) - external_sample_interval: float = src.config_field( - default=DEFAULT_SAMPLE_INTERVAL_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_EXTERNAL_SAMPLE_INTERVAL", - cli_flag="--external-sample-interval", - metavar="SECONDS", - ge=0, - help=( - "Seconds between external child process-tree RSS samples; set 0 to disable " - f"(default: {DEFAULT_SAMPLE_INTERVAL_SECONDS})" - ), - ) - memory_summary_limit: int = src.config_field( - default=DEFAULT_MEMORY_SUMMARY_LIMIT, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MEMORY_SUMMARY_LIMIT", - cli_flag="--memory-summary-limit", - metavar="N", - ge=1, - help="Number of highest-RSS cases to print in the final memory summary", - ) - results_json: Path | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_RESULTS_JSON", - cli_flag="--results-json", - metavar="PATH", - help="Optional path to write machine-readable run and comparison results as JSON", - ) - results_csv: Path | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_RESULTS_CSV", - cli_flag="--results-csv", - metavar="PATH", - help=( - "Optional path to write per-command memory results as CSV; phase rows are written " - "beside it as *-phases.csv" - ), - ) - monitor_sourcegraph_load: bool = src.config_field( - default=False, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_SOURCEGRAPH_LOAD", - cli_flag="--monitor-sourcegraph-load", - cli_action="store_true", - help=( - "Start the Sourcegraph pod/Postgres load monitor for this e2e run and write " - "its output beside the result artifacts." - ), - ) - sourcegraph_monitor_namespace: str = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_NAMESPACE, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_NAMESPACE", - cli_flag="--monitor-namespace", - metavar="NAME", - help=( - "Kubernetes namespace for Sourcegraph load monitoring " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_NAMESPACE})" - ), - ) - sourcegraph_monitor_output_dir: Path | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_OUTPUT_DIR", - cli_flag="--monitor-output-dir", - metavar="PATH", - help="Directory for Sourcegraph load monitor output; defaults beside result artifacts.", - ) - sourcegraph_monitor_interval_seconds: int = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_INTERVAL_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_INTERVAL_SECONDS", - cli_flag="--monitor-interval-seconds", - metavar="SECONDS", - ge=1, - help=( - "Pod/process/cgroup monitor interval in seconds " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_INTERVAL_SECONDS})" - ), - ) - sourcegraph_monitor_postgres_interval_seconds: int = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_POSTGRES_INTERVAL_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_POSTGRES_INTERVAL_SECONDS", - cli_flag="--monitor-postgres-interval-seconds", - metavar="SECONDS", - ge=1, - help=( - "Postgres activity monitor interval in seconds " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_POSTGRES_INTERVAL_SECONDS})" - ), - ) - sourcegraph_monitor_statements_interval_seconds: int = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_STATEMENTS_INTERVAL_SECONDS, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_STATEMENTS_INTERVAL_SECONDS", - cli_flag="--monitor-statements-interval-seconds", - metavar="SECONDS", - ge=1, - help=( - "pg_stat_statements monitor interval in seconds " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_STATEMENTS_INTERVAL_SECONDS})" - ), - ) - sourcegraph_monitor_frontend_target: str = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_FRONTEND_TARGET, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_FRONTEND_TARGET", - cli_flag="--monitor-frontend-target", - metavar="TARGET", - help=( - "kubectl target for Sourcegraph frontend " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_FRONTEND_TARGET})" - ), - ) - sourcegraph_monitor_postgres_target: str = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_POSTGRES_TARGET, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_POSTGRES_TARGET", - cli_flag="--monitor-postgres-target", - metavar="TARGET", - help=( - "kubectl target for Sourcegraph Postgres " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_POSTGRES_TARGET})" - ), - ) - sourcegraph_monitor_psql_command: str = src.config_field( - default=DEFAULT_SOURCEGRAPH_MONITOR_PSQL_COMMAND, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_PSQL_COMMAND", - cli_flag="--monitor-psql-command", - metavar="COMMAND", - help=( - "psql command to run inside the Postgres pod " - f"(default: {DEFAULT_SOURCEGRAPH_MONITOR_PSQL_COMMAND})" - ), - ) - sourcegraph_monitor_no_logs: bool = src.config_field( - default=False, - env_var="SRC_AUTH_PERMS_SYNC_E2E_MONITOR_NO_LOGS", - cli_flag="--monitor-no-logs", - cli_action="store_true", - help="Do not stream frontend logs while Sourcegraph load monitoring is enabled.", - ) - fail_on_memory_regression_percent: float | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_FAIL_ON_MEMORY_REGRESSION_PERCENT", - cli_flag="--fail-on-memory-regression-percent", - metavar="PERCENT", - ge=0, - help="Fail if candidate median peak RSS regresses by more than this percent", - ) - fail_on_memory_regression_mib: float | None = src.config_field( - default=None, - env_var="SRC_AUTH_PERMS_SYNC_E2E_FAIL_ON_MEMORY_REGRESSION_MIB", - cli_flag="--fail-on-memory-regression-mib", - metavar="MIB", - ge=0, - help="Fail if candidate median peak RSS regresses by more than this many MiB", - ) - - -@dataclass(frozen=True) -class CommandCase: - """One CLI invocation and the conditions it must satisfy.""" - - name: str - arguments: tuple[str, ...] - expected_exit_code: int = 0 - expected_log_command: str | None = None - expected_log_status: str | None = "ok" - must_contain: tuple[str, ...] = () - must_contain_one_of: tuple[str, ...] = () - must_not_contain: tuple[str, ...] = () - - -@dataclass(frozen=True) -class CommandResult: - """Captured result for one CLI invocation.""" - - variant: str - iteration: int - case: CommandCase - return_code: int - output: str - log_path: Path | None - run_directory: Path | None - run_record: dict[str, Any] | None - memory: MemorySummary | None - phase_memory: list[PhaseMemorySummary] - artifact_sizes: dict[str, int] - workload: dict[str, int | float | str] - jaeger_traces: list[dict[str, Any]] - elapsed_seconds: float - - -@dataclass(frozen=True) -class MemorySummary: - """Resource usage extracted from structured run logs.""" - - peak_rss_mb: float | None - sampled_peak_rss_mb: float | None - external_peak_rss_mb: float | None - resource_sample_count: int - external_sample_count: int - max_num_fds: int | None - max_num_threads: int | None - max_process_cpu_percent: float | None - - -@dataclass(frozen=True) -class PhaseMemorySummary: - """Peak RSS observed while one structured event span was active.""" - - event: str - stage: str | None - peak_rss_mb: float - sample_count: int - total_duration_ms: int - - -@dataclass(frozen=True) -class RunVariant: - """One executable variant to run through the matrix.""" - - name: str - executable: tuple[str, ...] - - -@dataclass(frozen=True) -class SpanInterval: - """One structured event span reconstructed from log start/end records.""" - - event: str - stage: str | None - started_at: datetime.datetime - ended_at: datetime.datetime - duration_ms: int - - -@dataclass(frozen=True) -class CaseComparison: - """Median baseline/candidate measurements for one command case.""" - - case_name: str - baseline_count: int - candidate_count: int - baseline_peak_rss_mb: float | None - candidate_peak_rss_mb: float | None - peak_rss_delta_mb: float | None - peak_rss_delta_percent: float | None - baseline_external_peak_rss_mb: float | None - candidate_external_peak_rss_mb: float | None - external_peak_rss_delta_mb: float | None - external_peak_rss_delta_percent: float | None - baseline_elapsed_seconds: float | None - candidate_elapsed_seconds: float | None - elapsed_delta_seconds: float | None - elapsed_delta_percent: float | None - - -class CommandPermutationFailure(RuntimeError): - """Raised when a command permutation does not meet its assertion.""" - - -class ExternalProcessSampler: - """Sample RSS for the child process tree from outside the CLI process.""" - - def __init__(self, root_process_identifier: int, interval_seconds: float) -> None: - self.root_process_identifier = root_process_identifier - self.interval_seconds = interval_seconds - self.peak_rss_mb: float | None = None - self.sample_count = 0 - self._stop = threading.Event() - self._thread: threading.Thread | None = None - - def start(self) -> None: - if self.interval_seconds <= 0: - return - self._thread = threading.Thread(target=self._loop, name="ExternalProcessSampler") - self._thread.daemon = True - self._thread.start() - self.sample_once() - - def stop(self) -> None: - if self.interval_seconds <= 0: - return - self.sample_once() - self._stop.set() - if self._thread is not None: - self._thread.join(timeout=2.0) - - def _loop(self) -> None: - while not self._stop.wait(self.interval_seconds): - self.sample_once() - - def sample_once(self) -> None: - rss_mb = process_tree_rss_mb(self.root_process_identifier) - if rss_mb is None: - return - self.sample_count += 1 - self.peak_rss_mb = max_optional_float(self.peak_rss_mb, rss_mb) - - -class SourcegraphLoadMonitor: - """Run the Sourcegraph pod/Postgres monitor for the duration of the e2e suite.""" - - def __init__(self, config: EndToEndConfig, output_dir: Path) -> None: - self.config = config - self.output_dir = output_dir - self.log_path = output_dir.with_name(f"{output_dir.name}.log") - self._log_file: TextIO | None = None - self._process: subprocess.Popen[str] | None = None - - def start(self) -> None: - script_path = sourcegraph_monitor_script_path() - if not script_path.exists(): - raise RuntimeError(f"Sourcegraph load monitor script not found: {script_path}") - self.output_dir.parent.mkdir(parents=True, exist_ok=True) - self.log_path.parent.mkdir(parents=True, exist_ok=True) - command = [ - str(script_path), - "--namespace", - self.config.sourcegraph_monitor_namespace, - "--output-dir", - str(self.output_dir), - "--interval-seconds", - str(self.config.sourcegraph_monitor_interval_seconds), - "--postgres-interval-seconds", - str(self.config.sourcegraph_monitor_postgres_interval_seconds), - "--statements-interval-seconds", - str(self.config.sourcegraph_monitor_statements_interval_seconds), - "--frontend-target", - self.config.sourcegraph_monitor_frontend_target, - "--postgres-target", - self.config.sourcegraph_monitor_postgres_target, - "--psql-command", - self.config.sourcegraph_monitor_psql_command, - ] - if self.config.sourcegraph_monitor_no_logs: - command.append("--no-logs") - print(f"Starting Sourcegraph load monitor: {self.output_dir}") - self._log_file = self.log_path.open("w", encoding="utf-8") - self._process = subprocess.Popen( # noqa: S603 - command is trusted test config. - command, - cwd=Path.cwd(), - stdout=self._log_file, - stderr=subprocess.STDOUT, - text=True, - start_new_session=True, - ) - self._wait_until_started() - - def stop(self) -> None: - process = self._process - if process is None: - self._close_log_file() - return - if process.poll() is None: - with contextlib.suppress(ProcessLookupError): - os.killpg(process.pid, signal.SIGTERM) - try: - process.wait(timeout=15) - except subprocess.TimeoutExpired: - with contextlib.suppress(ProcessLookupError): - os.killpg(process.pid, signal.SIGKILL) - process.wait(timeout=15) - return_code = process.returncode - self._close_log_file() - if return_code not in {0, -15, 143}: - print( - f"Sourcegraph load monitor exited with status {return_code}; see {self.log_path}", - file=sys.stderr, - ) - else: - print(f"Stopped Sourcegraph load monitor. Output: {self.output_dir}") - - def _wait_until_started(self) -> None: - process = self._process - if process is None: - return - deadline = time.monotonic() + 60 - while time.monotonic() < deadline: - if process.poll() is not None: - raise RuntimeError( - f"Sourcegraph load monitor exited before startup completed; see {self.log_path}" - ) - if self.log_path.exists() and "Started kubectl-top" in self.log_path.read_text( - encoding="utf-8", errors="ignore" - ): - return - time.sleep(0.2) - raise RuntimeError( - f"Timed out waiting for Sourcegraph load monitor startup; see {self.log_path}" - ) - - def _close_log_file(self) -> None: - if self._log_file is not None: - self._log_file.close() - self._log_file = None - - -@dataclass -class JaegerTraceFetchTask: - """One trace fetch request that can be retried across the whole e2e run.""" - - trace_request: dict[str, Any] - future: Future[dict[str, Any]] - fetch_attempts: int = 0 - first_fetch_at: str | None = None - last_fetch_at: str | None = None - - -class JaegerTraceFetchPool: - """Fetch Sourcegraph Jaeger traces through one bounded retry queue.""" - - def __init__( - self, - config: EndToEndConfig, - *, - parallelism: int, - initial_delay_seconds: float, - retry_delays_seconds: Sequence[float], - jsonl_path: Path | None, - trace_directory: Path | None, - ) -> None: - self.initial_delay_seconds = initial_delay_seconds - self.retry_delays_seconds = tuple(retry_delays_seconds) - self.max_fetch_attempts = len(self.retry_delays_seconds) + 1 - self._trace_directory = trace_directory - self._tasks: list[tuple[float, int, JaegerTraceFetchTask]] = [] - self._condition = threading.Condition() - self._sequence = 0 - self._closed = False - self._jsonl_file: TextIO | None = None - self._lock = threading.Lock() - http = src.HTTPClient( - user_agent="src-auth-perms-sync-e2e/0.1 (+python)", - max_attempts=1, - max_connections=parallelism, - ) - self._client = src.sourcegraph_client_from_config(config, http=http) - if jsonl_path is not None: - jsonl_path.parent.mkdir(parents=True, exist_ok=True) - self._jsonl_file = jsonl_path.open("w", encoding="utf-8") - print(f"Writing Jaeger trace summaries incrementally to {jsonl_path}") - if self._trace_directory is not None: - self._trace_directory.mkdir(parents=True, exist_ok=True) - print(f"Writing complete Jaeger traces to {self._trace_directory}") - self._workers = [ - threading.Thread( - target=self._worker, - name=f"JaegerTraceFetch-{worker_number}", - daemon=True, - ) - for worker_number in range(1, parallelism + 1) - ] - for worker in self._workers: - worker.start() - - def submit( - self, - trace_request: dict[str, Any], - collector: JaegerTraceCollector, - ) -> Future[dict[str, Any]]: - future: Future[dict[str, Any]] = Future() - future.add_done_callback(lambda completed: self._record_summary(collector, completed)) - task = JaegerTraceFetchTask( - trace_request=trace_request, - future=future, - ) - self._schedule(task, self.initial_delay_seconds) - return future - - def close(self) -> None: - with self._condition: - self._closed = True - self._condition.notify_all() - for worker in self._workers: - worker.join() - self._client.http.close() - if self._jsonl_file is not None: - self._jsonl_file.close() - - def _schedule(self, task: JaegerTraceFetchTask, delay_seconds: float) -> None: - with self._condition: - self._sequence += 1 - heapq.heappush( - self._tasks, - (time.monotonic() + delay_seconds, self._sequence, task), - ) - self._condition.notify() - - def _worker(self) -> None: - while True: - task = self._next_ready_task() - if task is None: - return - self._process(task) - - def _next_ready_task(self) -> JaegerTraceFetchTask | None: - with self._condition: - while True: - if self._closed and not self._tasks: - return None - if not self._tasks: - self._condition.wait() - continue - ready_at, _sequence, task = self._tasks[0] - delay_seconds = ready_at - time.monotonic() - if delay_seconds > 0: - self._condition.wait(delay_seconds) - continue - heapq.heappop(self._tasks) - return task - - def _process(self, task: JaegerTraceFetchTask) -> None: - if task.future.done(): - return - summary = self._fetch_summary(task) - if summary.get("jaeger_found") is True or not self._should_retry(task, summary): - task.future.set_result(summary) - return - self._schedule(task, self._retry_delay_seconds(task.fetch_attempts)) - - def _fetch_summary(self, task: JaegerTraceFetchTask) -> dict[str, Any]: - task.fetch_attempts += 1 - now = datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds") - if task.first_fetch_at is None: - task.first_fetch_at = now - task.last_fetch_at = now - try: - trace = sourcegraph_trace_from_request(task.trace_request) - jaeger_trace = self._client.fetch_jaeger_trace( - trace.trace_id, - retry_delays_seconds=(0.0,), - ) - summary = summarize_jaeger_trace(trace, jaeger_trace).to_json() - try: - trace_path = self._write_complete_trace(task, jaeger_trace, summary) - if trace_path is not None: - summary["jaeger_trace_path"] = str(trace_path) - except OSError as write_error: - summary["jaeger_trace_write_error"] = f"{type(write_error).__name__}: {write_error}" - return self._with_fetch_fields(task, summary) - except Exception as exception: # noqa: BLE001 - keep long-running evidence collection alive. - return self._with_fetch_fields( - task, - { - **task.trace_request, - "jaeger_found": False, - "error": f"{type(exception).__name__}: {exception}", - }, - ) - - def _with_fetch_fields( - self, task: JaegerTraceFetchTask, summary: dict[str, Any] - ) -> dict[str, Any]: - return { - **task.trace_request, - **summary, - "fetch_attempts": task.fetch_attempts, - "first_fetch_at": task.first_fetch_at, - "last_fetch_at": task.last_fetch_at, - "max_fetch_attempts": self.max_fetch_attempts, - } - - def _write_complete_trace( - self, - task: JaegerTraceFetchTask, - jaeger_trace: dict[str, Any], - summary: dict[str, Any], - ) -> Path | None: - if self._trace_directory is None: - return None - path = complete_jaeger_trace_path(self._trace_directory, task.trace_request) - payload = { - "collected_at": task.last_fetch_at, - "fetch_attempts": task.fetch_attempts, - "max_fetch_attempts": self.max_fetch_attempts, - "trace_request": task.trace_request, - "jaeger_summary": summary, - "jaeger_trace": jaeger_trace, - } - path.parent.mkdir(parents=True, exist_ok=True) - temporary_path = path.with_name( - f".{path.name}.tmp-{threading.get_ident()}-{time.monotonic_ns()}" - ) - temporary_path.write_text( - json.dumps(payload, indent=2, sort_keys=True) + "\n", - encoding="utf-8", - ) - temporary_path.replace(path) - return path - - def _should_retry(self, task: JaegerTraceFetchTask, summary: dict[str, Any]) -> bool: - if self._closed or task.fetch_attempts >= self.max_fetch_attempts: - return False - error = str(summary.get("error") or "") - return error.startswith(("HTTP 404", "HTTP 502", "HTTP 503", "HTTP 504")) - - def _retry_delay_seconds(self, fetch_attempts: int) -> float: - if not self.retry_delays_seconds: - return 0.0 - delay_index = min(fetch_attempts - 1, len(self.retry_delays_seconds) - 1) - return self.retry_delays_seconds[delay_index] - - def _record_summary( - self, - collector: JaegerTraceCollector, - future: Future[dict[str, Any]], - ) -> None: - summary = future.result() - collector.record_summary(summary) - self._write_jsonl(summary) - - def _write_jsonl(self, summary: dict[str, Any]) -> None: - if self._jsonl_file is None: - return - with self._lock: - self._jsonl_file.write(json.dumps(summary, sort_keys=True) + "\n") - self._jsonl_file.flush() - - -class JaegerTraceCollector: - """Tail a child log and submit Jaeger trace fetches while the child runs.""" - - def __init__( - self, - log_path: Path, - limit: int | None, - fetch_pool: JaegerTraceFetchPool, - *, - variant: str, - iteration: int, - case_name: str, - ) -> None: - self.log_path = log_path - self.limit = limit - self.fetch_pool = fetch_pool - self.variant = variant - self.iteration = iteration - self.case_name = case_name - self.summaries: list[dict[str, Any]] = [] - self._graphql_queries_by_span: dict[tuple[str, str], dict[str, Any]] = {} - self._trace_requests_by_graphql_span: dict[tuple[str, str], dict[str, Any]] = {} - self._requests_by_trace_id: dict[str, dict[str, Any]] = {} - self._queued_trace_ids: set[str] = set() - self._futures: list[Future[dict[str, Any]]] = [] - self._lock = threading.Lock() - self._log_complete = threading.Event() - self._started = False - self._tail_thread: threading.Thread | None = None - - def start(self) -> None: - if self._started: - return - self._started = True - scope = "all traced" if self.limit is None else f"up to {self.limit} slowest traced" - print(f"Collecting {scope} GraphQL Jaeger trace(s) for this case in the background ...") - self._tail_thread = threading.Thread( - target=self._tail_log, - name="JaegerTraceLogTail", - daemon=True, - ) - self._tail_thread.start() - - def finish_log_capture(self) -> None: - self._log_complete.set() - if self._tail_thread is not None: - self._tail_thread.join() - - def wait(self) -> None: - if not self._started: - return - self.finish_log_capture() - with self._lock: - futures = list(self._futures) - if futures: - wait_for_futures(futures) - with self._lock: - self.summaries.sort(key=trace_summary_duration_ms, reverse=True) - print_jaeger_trace_summaries(self.summaries) - - def record_summary(self, summary: dict[str, Any]) -> None: - with self._lock: - self.summaries.append(summary) - - def _tail_log(self) -> None: - while not self.log_path.exists(): - if self._log_complete.wait(0.1): - self._submit_limited_requests() - return - with self.log_path.open(encoding="utf-8") as log_file: - while True: - position = log_file.tell() - line = log_file.readline() - if line: - if not line.endswith("\n") and not self._log_complete.is_set(): - log_file.seek(position) - time.sleep(0.1) - continue - self._record_line(line) - continue - if self._log_complete.is_set(): - break - time.sleep(0.1) - self._submit_limited_requests() - - def _record_line(self, line: str) -> None: - if not line.strip(): - return - try: - record = json.loads(line) - except json.JSONDecodeError: - return - if not isinstance(record, dict): - return - self._record_graphql_query_metadata(cast(dict[str, Any], record)) - trace_request = graphql_trace_request_from_record(cast(dict[str, Any], record)) - if trace_request is None: - return - trace_request.update( - {"variant": self.variant, "iteration": self.iteration, "case": self.case_name} - ) - graphql_span_key = self._graphql_span_key_for_http_record(cast(dict[str, Any], record)) - trace_id = trace_request["trace_id"] - submit_request: dict[str, Any] | None = None - with self._lock: - if graphql_span_key is not None: - graphql_query = self._graphql_queries_by_span.get(graphql_span_key) - if graphql_query is not None: - trace_request["graphql_query"] = dict(graphql_query) - self._trace_requests_by_graphql_span[graphql_span_key] = trace_request - existing_request = self._requests_by_trace_id.get(trace_id) - if existing_request is None or trace_summary_duration_ms( - trace_request - ) > trace_summary_duration_ms(existing_request): - self._requests_by_trace_id[trace_id] = trace_request - if self.limit is None and trace_id not in self._queued_trace_ids: - self._queued_trace_ids.add(trace_id) - submit_request = trace_request - if submit_request is not None: - future = self.fetch_pool.submit(submit_request, self) - with self._lock: - self._futures.append(future) - - def _record_graphql_query_metadata(self, record: dict[str, Any]) -> None: - metadata = graphql_query_metadata_from_record(record) - if metadata is None: - return - span_key = graphql_query_span_key(record) - if span_key is None: - return - with self._lock: - existing_metadata = self._graphql_queries_by_span.get(span_key, {}) - merged_metadata = existing_metadata | metadata - self._graphql_queries_by_span[span_key] = merged_metadata - trace_request = self._trace_requests_by_graphql_span.get(span_key) - if trace_request is not None: - trace_request["graphql_query"] = dict(merged_metadata) - - @staticmethod - def _graphql_span_key_for_http_record(record: dict[str, Any]) -> tuple[str, str] | None: - trace_id = optional_string(record.get("trace")) - parent_span_id = optional_string(record.get("parent_span")) - if trace_id is None or parent_span_id is None: - return None - return trace_id, parent_span_id - - def _submit_limited_requests(self) -> None: - if self.limit is None: - return - with self._lock: - trace_requests = sorted( - self._requests_by_trace_id.values(), - key=trace_summary_duration_ms, - reverse=True, - )[: self.limit] - new_trace_requests = [ - trace_request - for trace_request in trace_requests - if trace_request["trace_id"] not in self._queued_trace_ids - ] - self._queued_trace_ids.update( - trace_request["trace_id"] for trace_request in new_trace_requests - ) - futures = [ - self.fetch_pool.submit(trace_request, self) for trace_request in new_trace_requests - ] - with self._lock: - self._futures.extend(futures) - - -class CommandPermutationRunner: - """Run command cases and assert CLI/log outcomes.""" - - def __init__( - self, - variant: RunVariant, - environment: dict[str, str], - *, - iteration: int, - keep_going: bool, - fetch_sg_traces: bool, - jaeger_trace_limit: int | None, - jaeger_trace_fetch_pool: JaegerTraceFetchPool | None, - sample_interval: float, - external_sample_interval: float, - ) -> None: - self.variant = variant - self.environment = environment - self.iteration = iteration - self.keep_going = keep_going - self.fetch_sg_traces = fetch_sg_traces - self.jaeger_trace_limit = jaeger_trace_limit - self.jaeger_trace_fetch_pool = jaeger_trace_fetch_pool - self.sample_interval = sample_interval - self.external_sample_interval = external_sample_interval - self.results: list[CommandResult] = [] - self.failures: list[str] = [] - self.jaeger_collectors: list[JaegerTraceCollector] = [] - - def run(self, case: CommandCase) -> CommandResult: - """Run one case, assert it, and return the captured result.""" - result = self._run_process(case) - try: - self._assert_result(result) - except CommandPermutationFailure as failure: - self.failures.append(str(failure)) - print(f"\n✗ {case.name}: {failure}", file=sys.stderr) - if not self.keep_going: - raise - else: - self.results.append(result) - print(f"✓ {case.name} ({result.elapsed_seconds:.1f}s{_memory_suffix(result.memory)})") - return result - - def _run_process(self, case: CommandCase) -> CommandResult: - full_command = [ - *self.variant.executable, - *case.arguments, - *(("--fetch-sg-traces",) if self.fetch_sg_traces else ()), - "--sample-interval", - str(self.sample_interval), - ] - print("\n" + "=" * 100) - print(f"VARIANT {self.variant.name} ITERATION {self.iteration} CASE {case.name}") - print("$ " + shlex.join(full_command)) - print("=" * 100) - - started_at = time.monotonic() - process = subprocess.Popen( - full_command, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - env=self.environment, - ) - external_sampler = ExternalProcessSampler(process.pid, self.external_sample_interval) - external_sampler.start() - output_lines: list[str] = [] - log_path: Path | None = None - jaeger_collector: JaegerTraceCollector | None = None - assert process.stdout is not None - for line in process.stdout: - output_lines.append(line) - print(line, end="") - if log_path is None: - log_path = _extract_log_path(line) - if log_path is not None and self.jaeger_trace_fetch_pool is not None: - jaeger_collector = JaegerTraceCollector( - log_path, - self.jaeger_trace_limit, - self.jaeger_trace_fetch_pool, - variant=self.variant.name, - iteration=self.iteration, - case_name=case.name, - ) - jaeger_collector.start() - return_code = process.wait() - external_sampler.stop() - output = "".join(output_lines) - elapsed_seconds = time.monotonic() - started_at - if log_path is None: - log_path = _extract_log_path(output) - if ( - jaeger_collector is None - and log_path is not None - and self.jaeger_trace_fetch_pool is not None - ): - jaeger_collector = JaegerTraceCollector( - log_path, - self.jaeger_trace_limit, - self.jaeger_trace_fetch_pool, - variant=self.variant.name, - iteration=self.iteration, - case_name=case.name, - ) - jaeger_collector.start() - run_record: dict[str, Any] | None = None - memory: MemorySummary | None = None - phase_memory: list[PhaseMemorySummary] = [] - artifact_sizes: dict[str, int] = {} - workload: dict[str, int | float | str] = {} - if jaeger_collector is not None: - jaeger_collector.finish_log_capture() - self.jaeger_collectors.append(jaeger_collector) - jaeger_traces = jaeger_collector.summaries - else: - jaeger_traces = [] - if log_path is not None and log_path.is_file(): - run_record, memory, phase_memory, workload = _read_run_log_summary(log_path) - artifact_sizes = artifact_sizes_for_run(log_path) - if memory is not None: - memory = MemorySummary( - peak_rss_mb=memory.peak_rss_mb, - sampled_peak_rss_mb=memory.sampled_peak_rss_mb, - external_peak_rss_mb=external_sampler.peak_rss_mb, - resource_sample_count=memory.resource_sample_count, - external_sample_count=external_sampler.sample_count, - max_num_fds=memory.max_num_fds, - max_num_threads=memory.max_num_threads, - max_process_cpu_percent=memory.max_process_cpu_percent, - ) - elif external_sampler.peak_rss_mb is not None: - memory = MemorySummary( - peak_rss_mb=None, - sampled_peak_rss_mb=None, - external_peak_rss_mb=external_sampler.peak_rss_mb, - resource_sample_count=0, - external_sample_count=external_sampler.sample_count, - max_num_fds=None, - max_num_threads=None, - max_process_cpu_percent=None, - ) - return CommandResult( - variant=self.variant.name, - iteration=self.iteration, - case=case, - return_code=return_code, - output=output, - log_path=log_path, - run_directory=log_path.parent if log_path is not None else None, - run_record=run_record, - memory=memory, - phase_memory=phase_memory, - artifact_sizes=artifact_sizes, - workload=workload, - jaeger_traces=jaeger_traces, - elapsed_seconds=elapsed_seconds, - ) - - def _assert_result(self, result: CommandResult) -> None: - case = result.case - if result.return_code != case.expected_exit_code: - raise CommandPermutationFailure( - f"expected exit {case.expected_exit_code}, got {result.return_code}" - ) - for substring in case.must_contain: - if substring not in result.output: - raise CommandPermutationFailure(f"output did not contain {substring!r}") - if case.must_contain_one_of and not any( - substring in result.output for substring in case.must_contain_one_of - ): - expected = ", ".join(repr(substring) for substring in case.must_contain_one_of) - raise CommandPermutationFailure(f"output did not contain any of: {expected}") - for substring in case.must_not_contain: - if substring in result.output: - raise CommandPermutationFailure(f"output unexpectedly contained {substring!r}") - if case.expected_log_command is None: - return - if result.log_path is None: - raise CommandPermutationFailure("command did not print a structured log path") - if result.run_record is None: - raise CommandPermutationFailure(f"{result.log_path} did not contain a run end record") - if result.run_record.get("command") != case.expected_log_command: - raise CommandPermutationFailure( - "structured log command mismatch: " - f"expected {case.expected_log_command!r}, got {result.run_record.get('command')!r}" - ) - if ( - case.expected_log_status is not None - and result.run_record.get("status") != case.expected_log_status - ): - raise CommandPermutationFailure( - "structured log status mismatch: " - f"expected {case.expected_log_status!r}, got {result.run_record.get('status')!r}" - ) - if result.run_record.get("exit_code") != case.expected_exit_code: - raise CommandPermutationFailure( - "structured log exit_code mismatch: " - f"expected {case.expected_exit_code!r}, got {result.run_record.get('exit_code')!r}" - ) - - -def main() -> None: - config = load_end_to_end_config() - logging_settings = src.logging_settings_from_config( - config, - logs_dir=Path("logs-test-end-to-end"), - ) - with src.logging( - config, - command="test_end_to_end", - git_cwd=Path.cwd(), - logging_config=logging_settings, - ): - run_end_to_end(config) - - -def load_end_to_end_config() -> EndToEndConfig: - """Load runner Config from CLI flags, environment, and .env.""" - config = src.parse_args( - EndToEndConfig, - description="Run src-auth-perms-sync end-to-end cases against a test instance.", - ) - validate_date(config.future_date, "--future-date") - if any(delay < 0 for delay in config.jaeger_retry_delays): - raise SystemExit("--jaeger-retry-delays values must be >= 0") - user = config.user or os.environ.get("SRC_AUTH_PERMS_SYNC_TEST_USER") or os.environ.get("USER") - if not user: - raise SystemExit("--user is required when SRC_AUTH_PERMS_SYNC_TEST_USER and USER are unset") - normalized_endpoint = src.normalize_sourcegraph_endpoint(config.src_endpoint) - if not config.allow_non_test_endpoint: - assert_test_endpoint(normalized_endpoint) - return config.model_copy(update={"src_endpoint": normalized_endpoint, "user": user}) - - -def run_end_to_end(config: EndToEndConfig) -> None: - """Run the full matrix for the loaded Config.""" - variants = run_variants(config) - environment = command_environment(config) - all_results: list[CommandResult] = [] - all_failures: list[str] = [] - all_jaeger_collectors: list[JaegerTraceCollector] = [] - jaeger_trace_fetch_pool = create_jaeger_trace_fetch_pool(config) - sourcegraph_load_monitor = create_sourcegraph_load_monitor(config) - latest_baseline_repositories: set[str] = set() - try: - if sourcegraph_load_monitor is not None: - sourcegraph_load_monitor.start() - with src.span( - "end_to_end_matrix", - repeat=config.repeat, - variant_count=len(variants), - fetch_sg_traces=config.fetch_sg_traces, - sourcegraph_load_monitor=sourcegraph_load_monitor is not None, - ) as matrix_summary: - if sourcegraph_load_monitor is not None: - matrix_summary["sourcegraph_load_monitor_dir"] = str( - sourcegraph_load_monitor.output_dir - ) - for iteration in range(1, config.repeat + 1): - for variant in variants: - with src.stage("matrix_variant", variant=variant.name, iteration=iteration): - runner = CommandPermutationRunner( - variant, - environment, - iteration=iteration, - keep_going=config.keep_going, - fetch_sg_traces=config.fetch_sg_traces, - jaeger_trace_limit=config.jaeger_trace_limit, - jaeger_trace_fetch_pool=jaeger_trace_fetch_pool, - sample_interval=config.sample_interval, - external_sample_interval=config.external_sample_interval, - ) - try: - latest_baseline_repositories = run_matrix(config, runner) - finally: - all_results.extend(runner.results) - all_failures.extend( - f"{variant.name}: {failure}" for failure in runner.failures - ) - all_jaeger_collectors.extend(runner.jaeger_collectors) - matrix_summary["case_count"] = len(all_results) - matrix_summary["failure_count"] = len(all_failures) - finally: - wait_for_jaeger_trace_collectors(all_jaeger_collectors) - if jaeger_trace_fetch_pool is not None: - jaeger_trace_fetch_pool.close() - if sourcegraph_load_monitor is not None: - sourcegraph_load_monitor.stop() - if all_failures: - print("\nFailures:", file=sys.stderr) - for failure in all_failures: - print(f"- {failure}", file=sys.stderr) - raise SystemExit(1) - - print("\nAll end-to-end cases passed.") - print(f"Cases passed: {len(all_results)}") - print(f"Baseline repositories for {config.user}: {len(latest_baseline_repositories)}") - print_memory_summary(all_results, config.memory_summary_limit) - print_phase_memory_summary(all_results, config.memory_summary_limit) - comparisons = compare_variants(all_results) - print_comparison_summary(comparisons) - write_results_files(all_results, comparisons, config, sourcegraph_load_monitor) - raise_for_memory_regressions(comparisons, config) - - -def run_variants(config: EndToEndConfig) -> list[RunVariant]: - """Return the executable variants to measure.""" - candidate_command = config.candidate_command or config.src_auth_perms_sync_command - candidate = RunVariant("candidate", tuple(shlex.split(candidate_command))) - if not candidate.executable: - raise SystemExit("candidate command cannot be empty") - if not config.baseline_command: - return [candidate] - baseline = RunVariant("baseline", tuple(shlex.split(config.baseline_command))) - if not baseline.executable: - raise SystemExit("--baseline-command cannot be empty") - return [baseline, candidate] - - -def create_jaeger_trace_fetch_pool( - config: EndToEndConfig, -) -> JaegerTraceFetchPool | None: - """Return the shared trace fetch pool for this run, if trace collection is enabled.""" - if not config.fetch_sg_traces or config.jaeger_trace_limit == 0: - return None - return JaegerTraceFetchPool( - config, - parallelism=config.jaeger_trace_parallelism, - initial_delay_seconds=config.jaeger_initial_delay_seconds, - retry_delays_seconds=config.jaeger_retry_delays, - jsonl_path=jaeger_trace_jsonl_path(config), - trace_directory=jaeger_trace_directory(config), - ) - - -def jaeger_trace_jsonl_path(config: EndToEndConfig) -> Path | None: - """Return where to stream trace summaries for this run.""" - if config.jaeger_trace_jsonl is not None: - return config.jaeger_trace_jsonl - anchor = config.results_json or config.results_csv - if anchor is not None: - return anchor.with_name(f"{anchor.stem}-jaeger-traces.jsonl") - stamp = datetime.datetime.now(datetime.UTC).strftime("%Y%m%d-%H%M%S") - return Path("/tmp") / f"src-auth-perms-sync-end-to-end-jaeger-traces-{stamp}.jsonl" - - -def jaeger_trace_directory(config: EndToEndConfig) -> Path: - """Return the directory where complete raw Jaeger traces should be stored.""" - if config.jaeger_trace_directory is not None: - return config.jaeger_trace_directory - anchor = config.results_json or config.results_csv - if anchor is not None: - return anchor.with_name(f"{anchor.stem}-jaeger-traces") - stamp = datetime.datetime.now(datetime.UTC).strftime("%Y%m%d-%H%M%S") - return Path("/tmp") / f"src-auth-perms-sync-end-to-end-jaeger-traces-{stamp}" - - -def create_sourcegraph_load_monitor(config: EndToEndConfig) -> SourcegraphLoadMonitor | None: - """Return the Sourcegraph load monitor for this run, if enabled.""" - if not config.monitor_sourcegraph_load: - return None - return SourcegraphLoadMonitor(config, sourcegraph_monitor_output_dir(config)) - - -def sourcegraph_monitor_output_dir(config: EndToEndConfig) -> Path: - """Return where Sourcegraph pod/Postgres monitor artifacts should be stored.""" - if config.sourcegraph_monitor_output_dir is not None: - return config.sourcegraph_monitor_output_dir - anchor = config.results_json or config.results_csv - if anchor is not None: - return anchor.with_name(f"{anchor.stem}-sourcegraph-load") - stamp = datetime.datetime.now(datetime.UTC).strftime("%Y%m%d-%H%M%S") - return Path("/tmp") / f"src-auth-perms-sync-end-to-end-sourcegraph-load-{stamp}" - - -def sourcegraph_monitor_script_path() -> Path: - """Return the lower-level monitor script used by the e2e orchestrator.""" - return Path(__file__).resolve().with_name("memory-efficiency-monitor-sourcegraph.sh") - - -def complete_jaeger_trace_path(trace_directory: Path, trace_request: dict[str, Any]) -> Path: - """Return the stable per-trace path for a complete Jaeger trace payload.""" - variant = safe_path_part(trace_request.get("variant"), default="variant") - iteration = int_field(trace_request, "iteration") or 0 - case_name = safe_path_part(trace_request.get("case"), default="case") - trace_id = safe_path_part(trace_request.get("trace_id"), default="trace") - return trace_directory / variant / f"iteration-{iteration:04d}" / case_name / f"{trace_id}.json" - - -def safe_path_part(value: object, *, default: str) -> str: - """Return a filesystem-safe path segment for generated trace artifacts.""" - text = str(value) if value is not None else "" - safe_text = SAFE_PATH_PART_PATTERN.sub("-", text).strip("-.") - return safe_text[:120] or default - - -def command_environment(config: EndToEndConfig) -> dict[str, str]: - """Return a deterministic child environment for CLI config parsing.""" - environment = dict(os.environ) - for name in list(environment): - if name.startswith(REMOVED_SRC_AUTH_PERMS_SYNC_ENVIRONMENT_PREFIX): - del environment[name] - environment["SRC_ENDPOINT"] = config.src_endpoint - environment["SRC_ACCESS_TOKEN"] = config.src_access_token - return environment - - -def assert_test_endpoint(endpoint: str) -> None: - """Refuse mutating cases unless the endpoint looks like a test instance.""" - hostname = (urlsplit(endpoint).hostname or "").lower() - if hostname in {"localhost", "127.0.0.1", "::1"}: - return - if hostname.endswith(".sgdev.org") or ".sgdev." in hostname: - return - raise SystemExit( - "Refusing mutating tests against non-test-looking endpoint " - f"{endpoint!r}. Pass --allow-non-test-endpoint if this is intentional." - ) - - -def validate_date(value: str, flag_name: str) -> None: - try: - datetime.date.fromisoformat(value) - except ValueError as error: - raise SystemExit(f"{flag_name} must be YYYY-MM-DD, got {value!r}") from error - - -def run_matrix( - config: EndToEndConfig, - runner: CommandPermutationRunner, -) -> set[str]: - for case in invalid_configuration_cases(config): - runner.run(case) - - baseline_result: CommandResult | None = None - for case in read_only_cases(config): - result = runner.run(case) - if case.name == "get-users-baseline": - baseline_result = result - assert baseline_result is not None - baseline_repositories = repositories_for_user(snapshot_path(baseline_result), config.user) - - run_safe_set_cases(config, runner) - run_full_apply_cases(config, runner) - - set_user_dry_run = runner.run(set_user_dry_run_case(config)) - runner.run(restore_scoped_dry_run_case(snapshot_path(set_user_dry_run), config)) - set_user_apply = runner.run(set_user_apply_case(config)) - try: - runner.run(restore_scoped_apply_case(snapshot_path(set_user_apply), config)) - finally: - final_result = runner.run(final_get_user_case(config)) - final_repositories = repositories_for_user(snapshot_path(final_result), config.user) - if final_repositories != baseline_repositories: - added = sorted(final_repositories - baseline_repositories) - removed = sorted(baseline_repositories - final_repositories) - raise CommandPermutationFailure( - f"final user baseline differs after cleanup; added={added}, removed={removed}" - ) - - runner.run(users_without_explicit_permissions_no_op_case(config)) - runner.run(sync_saml_apply_case()) - return baseline_repositories - - -def invalid_configuration_cases(config: EndToEndConfig) -> list[CommandCase]: - restore_placeholder = "definitely-missing-before.json" - missing_maps = "definitely-missing-command-permutation-maps.yaml" - command_pairs: list[tuple[str, tuple[str, ...]]] = [ - ("get-set", ("get", "set")), - ("get-restore", ("get", "restore", "--restore-path", restore_placeholder)), - ("set-restore", ("set", "--maps-path", "maps.yaml", "restore")), - ] - cases = [ - CommandCase( - name=f"invalid-multiple-commands-{name}", - arguments=command_arguments, - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ) - for name, command_arguments in command_pairs - ] - cases.append( - CommandCase( - name="invalid-restore-sync-saml-orgs", - arguments=("restore", "--restore-path", restore_placeholder, "--sync-saml-orgs"), - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ) - ) - cases.extend( - [ - CommandCase( - name="invalid-full-without-set", - arguments=("get", "--full"), - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ), - CommandCase( - name="invalid-set-full-and-user", - arguments=("set", "--full", "--users", config.user), - expected_exit_code=2, - must_contain=("choose at most one",), - ), - CommandCase( - name="invalid-set-full-and-users-without-explicit-perms", - arguments=( - "set", - "--full", - "--users-without-explicit-perms", - ), - expected_exit_code=2, - must_contain=("choose at most one",), - ), - CommandCase( - name="invalid-set-full-and-created-after", - arguments=("set", "--full", "--created-after", config.future_date), - expected_exit_code=2, - must_contain=("--full cannot be combined with --created-after",), - ), - CommandCase( - name="invalid-user-filter-conflict", - arguments=("get", "--users", config.user, "--users-without-explicit-perms"), - expected_exit_code=2, - must_contain=("choose only one of --users or --users-without-explicit-perms",), - ), - CommandCase( - name="invalid-restore-user-filter", - arguments=( - "restore", - "--restore-path", - restore_placeholder, - "--users", - config.user, - ), - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ), - CommandCase( - name="invalid-sync-created-after-filter", - arguments=("sync-saml-orgs", "--created-after", config.future_date), - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ), - CommandCase( - name="invalid-date-shape", - arguments=("get", "--created-after", "2026-1-01"), - expected_exit_code=2, - ), - CommandCase( - name="invalid-date-value", - arguments=("get", "--created-after", "2026-02-31"), - expected_exit_code=1, - must_contain=("--created-after must use YYYY-MM-DD",), - ), - CommandCase( - name="invalid-missing-set-file", - arguments=("set", "--maps-path", missing_maps), - expected_exit_code=1, - expected_log_command="set_full", - expected_log_status="error", - must_contain=("set input file does not exist",), - ), - CommandCase( - name="invalid-removed-repositories-created-after-flag", - arguments=("get", "--repositories-created-after", config.future_date), - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ), - CommandCase( - name="invalid-removed-get-schema-flag", - arguments=("get", "--get-schema", "definitely-missing-schema.gql"), - expected_exit_code=2, - must_contain=("unrecognized arguments",), - ), - ] - ) - return cases - - -def read_only_cases(config: EndToEndConfig) -> list[CommandCase]: - cases = [ - CommandCase( - name="help", - arguments=("--help",), - must_contain=("usage: src-auth-perms-sync", "commands:"), - must_not_contain=("--repositories-created-after", "--get-schema"), - ), - CommandCase( - name="get-users-baseline", - arguments=("get", "--users", config.user), - expected_log_command="get", - must_contain=("Wrote before-snapshot",), - ), - CommandCase( - name="get-created-after-future", - arguments=("get", "--created-after", config.future_date), - expected_log_command="get", - must_contain=("Selected 0 user(s) for get output",), - ), - CommandCase( - name="get-user-created-after-future", - arguments=("get", "--users", config.user, "--created-after", config.future_date), - expected_log_command="get", - must_contain_one_of=( - "Selected 0 user(s) for get output", - "Wrote before-snapshot", - ), - ), - CommandCase( - name="get-users-without-explicit-perms-created-after-future", - arguments=( - "get", - "--users-without-explicit-perms", - "--created-after", - config.future_date, - ), - expected_log_command="get", - must_contain=("Selected 0 user(s) for get output",), - ), - ] - return cases - - -def run_safe_set_cases(config: EndToEndConfig, runner: CommandPermutationRunner) -> None: - runner.run( - CommandCase( - name="set-created-after-no-op-apply", - arguments=( - "set", - "--created-after", - config.future_date, - "--apply", - "--no-backup", - "--parallelism", - str(config.parallelism), - ), - expected_log_command="set_created_after", - must_contain=("No users selected",), - ) - ) - - -def set_user_dry_run_case(config: EndToEndConfig) -> CommandCase: - return CommandCase( - name="set-user-dry-run", - arguments=("set", "--users", config.user), - expected_log_command="set_users", - must_contain=("Dry run complete",), - ) - - -def set_user_apply_case(config: EndToEndConfig) -> CommandCase: - return CommandCase( - name="set-user-apply", - arguments=( - "set", - "--users", - config.user, - "--apply", - "--parallelism", - str(config.parallelism), - ), - expected_log_command="set_users", - must_contain_one_of=( - "VALIDATION OK: all", - "All selected users already have the mapped explicit grants", - ), - ) - - -def users_without_explicit_permissions_no_op_case(config: EndToEndConfig) -> CommandCase: - return CommandCase( - name="set-users-without-explicit-perms-no-op-apply", - arguments=( - "set", - "--users-without-explicit-perms", - "--created-after", - config.future_date, - "--apply", - "--no-backup", - "--parallelism", - str(config.parallelism), - ), - expected_log_command="set_users_without_explicit_perms", - must_contain=("No users selected",), - ) - - -def restore_scoped_dry_run_case(snapshot: Path, config: EndToEndConfig) -> CommandCase: - return CommandCase( - name="restore-scoped-dry-run", - arguments=( - "restore", - "--restore-path", - str(snapshot), - "--parallelism", - str(config.parallelism), - ), - expected_log_command="restore", - must_contain=("Dry run complete",), - ) - - -def restore_scoped_apply_case(snapshot: Path, config: EndToEndConfig) -> CommandCase: - return CommandCase( - name="restore-scoped-apply-cleanup", - arguments=( - "restore", - "--restore-path", - str(snapshot), - "--apply", - "--parallelism", - str(config.parallelism), - ), - expected_log_command="restore", - must_contain_one_of=( - "VALIDATION OK: scoped restore matches the target snapshot", - "Scoped restore target already matches current state", - ), - ) - - -def sync_saml_apply_case() -> CommandCase: - return CommandCase( - name="sync-saml-orgs-apply", - arguments=("sync-saml-orgs", "--apply"), - expected_log_command="sync_saml_orgs", - must_contain=("VALIDATION OK: all target org memberships match",), - ) - - -def final_get_user_case(config: EndToEndConfig) -> CommandCase: - return CommandCase( - name="final-get-user-baseline-check", - arguments=("get", "--users", config.user), - expected_log_command="get", - must_contain=("Wrote before-snapshot",), - ) - - -def run_full_apply_cases(config: EndToEndConfig, runner: CommandPermutationRunner) -> None: - dry_run_result = runner.run( - CommandCase( - name="set-full-dry-run", - arguments=("set",), - expected_log_command="set_full", - must_contain=("Dry run complete",), - ) - ) - baseline_snapshot = snapshot_path(dry_run_result) - - if config.include_redundant_scale_cases: - try: - runner.run( - CommandCase( - name="set-full-apply", - arguments=( - "set", - "--apply", - "--parallelism", - str(config.parallelism), - ), - expected_log_command="set_full", - must_contain=("VALIDATION OK",), - ) - ) - finally: - runner.run( - restore_full_apply_case( - "restore-full-apply-cleanup", - baseline_snapshot, - config, - no_backup=False, - ) - ) - - try: - runner.run( - CommandCase( - name="set-full-no-backup-apply", - arguments=( - "set", - "--apply", - "--no-backup", - "--parallelism", - str(config.parallelism), - ), - expected_log_command="set_full", - must_contain=("Apply done",), - ) - ) - finally: - runner.run( - restore_full_apply_case( - "restore-full-no-backup-cleanup", - baseline_snapshot, - config, - no_backup=True, - ) - ) - - # Covers combined set+SAML dispatch and SAML dry-run with a user-scoped - # set path, so the default suite keeps only one expensive full-snapshot - # case. Pass --include-redundant-scale-cases to restore older overlap. - runner.run( - CommandCase( - name="set-user-sync-saml-orgs-dry-run", - arguments=( - "set", - "--users", - config.user, - "--sync-saml-orgs", - ), - expected_log_command="set_users_sync_saml_orgs", - must_contain=("Dry run complete",), - ) - ) - - -def restore_full_apply_case( - name: str, - snapshot: Path, - config: EndToEndConfig, - *, - no_backup: bool, -) -> CommandCase: - restore_arguments = [ - "restore", - "--restore-path", - str(snapshot), - "--apply", - "--parallelism", - str(config.full_restore_parallelism), - ] - if no_backup: - restore_arguments.append("--no-backup") - return CommandCase( - name=name, - arguments=tuple(restore_arguments), - expected_log_command="restore", - must_contain_one_of=( - "VALIDATION OK: post-restore state matches", - "Restore done", - "Nothing to restore", - ), - ) - - -def _extract_log_path(output: str) -> Path | None: - matches = LOG_PATH_PATTERN.findall(output) - if not matches: - return None - return Path(matches[-1]) - - -def _read_run_log_summary( - log_path: Path, -) -> tuple[ - dict[str, Any] | None, - MemorySummary | None, - list[PhaseMemorySummary], - dict[str, int | float | str], -]: - if not log_path.is_file(): - raise CommandPermutationFailure(f"structured log file does not exist: {log_path}") - run_record: dict[str, Any] | None = None - sample_count = 0 - sampled_peak_rss_mb: float | None = None - max_num_fds: int | None = None - max_num_threads: int | None = None - max_process_cpu_percent: float | None = None - records: list[dict[str, Any]] = [] - with log_path.open(encoding="utf-8") as log_file: - for line in log_file: - if not line.strip(): - continue - record = json.loads(line) - records.append(record) - if record.get("event") == "resource_sample": - sample_count += 1 - sampled_peak_rss_mb = max_optional_float( - sampled_peak_rss_mb, - float_field(record, "peak_rss_mb", "rss_mb", "process_rss_mb"), - ) - max_num_fds = max_optional_int(max_num_fds, int_field(record, "num_fds")) - max_num_threads = max_optional_int( - max_num_threads, int_field(record, "num_threads") - ) - max_process_cpu_percent = max_optional_float( - max_process_cpu_percent, - float_field(record, "process_cpu_percent", "cpu_percent"), - ) - if record.get("event") == "run" and record.get("phase") == "end": - run_record = record - if run_record is None: - return None, None, phase_memory_from_records(records), workload_from_records(records) - memory = MemorySummary( - peak_rss_mb=float_field(run_record, "peak_rss_mb"), - sampled_peak_rss_mb=sampled_peak_rss_mb, - external_peak_rss_mb=None, - resource_sample_count=sample_count, - external_sample_count=0, - max_num_fds=max_optional_int(max_num_fds, int_field(run_record, "num_fds")), - max_num_threads=max_optional_int(max_num_threads, int_field(run_record, "num_threads")), - max_process_cpu_percent=max_process_cpu_percent, - ) - return run_record, memory, phase_memory_from_records(records), workload_from_records(records) - - -def phase_memory_from_records(records: list[dict[str, Any]]) -> list[PhaseMemorySummary]: - """Attribute resource samples to every active structured span.""" - spans = span_intervals_from_records(records) - if not spans: - return [] - duration_by_phase: dict[tuple[str, str | None], int] = {} - for span in spans: - key = (span.event, span.stage) - duration_by_phase[key] = duration_by_phase.get(key, 0) + span.duration_ms - phase_stats: dict[tuple[str, str | None], dict[str, int | float]] = {} - for record in records: - if record.get("event") != "resource_sample": - continue - timestamp = parse_log_timestamp(record.get("ts")) - rss_mb = float_field(record, "peak_rss_mb", "rss_mb", "process_rss_mb") - if timestamp is None or rss_mb is None: - continue - active_spans = [span for span in spans if span.started_at <= timestamp <= span.ended_at] - if not active_spans: - continue - for active_span in active_spans: - key = (active_span.event, active_span.stage) - stats = phase_stats.setdefault( - key, - {"peak_rss_mb": 0.0, "sample_count": 0}, - ) - stats["peak_rss_mb"] = max(float(stats["peak_rss_mb"]), rss_mb) - stats["sample_count"] = int(stats["sample_count"]) + 1 - phase_memory = [ - PhaseMemorySummary( - event=event, - stage=stage, - peak_rss_mb=float(stats["peak_rss_mb"]), - sample_count=int(stats["sample_count"]), - total_duration_ms=duration_by_phase.get((event, stage), 0), - ) - for (event, stage), stats in phase_stats.items() - ] - phase_memory.sort(key=phase_memory_sort_key) - return phase_memory - - -def phase_memory_sort_key(phase: PhaseMemorySummary) -> tuple[bool, float, str, str]: - return (phase.event == "run", -phase.peak_rss_mb, phase.stage or "", phase.event) - - -def span_intervals_from_records(records: list[dict[str, Any]]) -> list[SpanInterval]: - starts_by_span: dict[str, dict[str, Any]] = {} - spans: list[SpanInterval] = [] - run_start_record: dict[str, Any] | None = None - run_end_record: dict[str, Any] | None = None - for record in records: - if record.get("event") == "run": - if record.get("phase") == "start": - run_start_record = record - elif record.get("phase") == "end": - run_end_record = record - span = record.get("span") - if not isinstance(span, str): - continue - phase = record.get("phase") - if phase == "start": - starts_by_span[span] = record - continue - if phase != "end": - continue - ended_at = parse_log_timestamp(record.get("ts")) - if ended_at is None: - continue - duration_ms = int_field(record, "duration_ms") or 0 - start_record = starts_by_span.get(span) - started_at = parse_log_timestamp(start_record.get("ts")) if start_record else None - if started_at is None: - started_at = ended_at - datetime.timedelta(milliseconds=duration_ms) - event = record.get("event") - if not isinstance(event, str): - continue - stage = record.get("stage") - if not isinstance(stage, str): - stage = None - spans.append( - SpanInterval( - event=event, - stage=stage, - started_at=started_at, - ended_at=ended_at, - duration_ms=duration_ms, - ) - ) - run_span = run_span_interval(run_start_record, run_end_record) - if run_span is not None: - spans.append(run_span) - return spans - - -def run_span_interval( - start_record: dict[str, Any] | None, end_record: dict[str, Any] | None -) -> SpanInterval | None: - if end_record is None: - return None - ended_at = parse_log_timestamp(end_record.get("ts")) - if ended_at is None: - return None - duration_ms = int_field(end_record, "duration_ms") or 0 - started_at = parse_log_timestamp(start_record.get("ts")) if start_record else None - if started_at is None: - started_at = ended_at - datetime.timedelta(milliseconds=duration_ms) - return SpanInterval( - event="run", - stage=None, - started_at=started_at, - ended_at=ended_at, - duration_ms=duration_ms, - ) - - -def parse_log_timestamp(value: object) -> datetime.datetime | None: - if not isinstance(value, str): - return None - try: - return datetime.datetime.fromisoformat(value.replace("Z", "+00:00")) - except ValueError: - return None - - -def workload_from_records(records: list[dict[str, Any]]) -> dict[str, int | float | str]: - """Collect named workload dimensions from structured log records. - - Earlier e2e summaries used raw field names from unrelated events, which made - values like `total_users` and `repo_count` ambiguous. Keep this summary - event-aware so each key says what it counts. - """ - workload: dict[str, int | float | str] = {} - for record in records: - event_name = optional_string(record.get("event")) - phase = optional_string(record.get("phase")) - if event_name == "capture_explicit_grants": - record_workload_max(workload, "sourcegraph_user_count", record.get("total_users")) - if phase == "end": - record_workload_max(workload, "captured_user_count", record.get("user_count")) - elif event_name in {"build_snapshot", "build_user_scoped_snapshot"} and phase == "end": - record_workload_max(workload, "snapshot_user_count_max", record.get("user_count")) - record_workload_max( - workload, - "snapshot_repos_with_explicit_grants_max", - record.get("repos_with_explicit_grants"), - ) - record_workload_max(workload, "snapshot_total_grants_max", record.get("total_grants")) - record_workload_max(workload, "captured_user_count", record.get("user_count")) - elif event_name == "user_explicit_repos_batch_fetch" and phase == "end": - record_workload_max(workload, "batch_user_count_max", record.get("user_count")) - record_workload_max( - workload, - "batch_fetched_grant_count_max", - record.get("fetched_grant_count") - if "fetched_grant_count" in record - else record.get("repo_count"), - ) - elif event_name == "load_repos_by_external_service" and phase == "end": - record_workload_max(workload, "loaded_repo_count", record.get("repo_count")) - record_workload_max( - workload, - "expected_repo_count", - record.get("expected_repo_count"), - ) - elif event_name == "apply_username_overwrites": - record_workload_max(workload, "apply_payload_count", record.get("payload_count")) - record_workload_max( - workload, - "apply_payload_grant_count", - record.get("payload_grant_count") - if "payload_grant_count" in record - else record.get("total_users"), - ) - record_workload_max(workload, "parallelism", record.get("parallelism")) - if phase == "end": - record_workload_max( - workload, - "apply_mutations_succeeded", - record.get("succeeded"), - ) - record_workload_max(workload, "apply_mutations_failed", record.get("failed")) - record_workload_max(workload, "apply_mutations_canceled", record.get("canceled")) - elif ( - event_name - in { - "cmd_get", - "cmd_restore", - "cmd_restore_user_scoped", - "cmd_set", - "cmd_set_additive_user", - "cmd_set_additive_users_without_explicit_perms", - } - and phase == "end" - ): - record_command_workload(workload, record) - elif event_name in {"sync_saml_orgs", "cmd_sync_saml_orgs"} and phase == "end": - record_workload_max( - workload, - "target_organizations", - record.get("target_organizations"), - ) - record_workload_max(workload, "desired_memberships", record.get("desired_memberships")) - - record_workload_model_dimensions(workload) - return workload - - -def record_command_workload(workload: dict[str, int | float | str], record: dict[str, Any]) -> None: - """Copy command-level counts using names that preserve their meaning.""" - event_name = optional_string(record.get("event")) - repo_count = record.get("repo_count") - total_grants = record.get("total_grants") - if event_name == "cmd_set": - record_workload_max(workload, "planned_repo_count", repo_count) - record_workload_max(workload, "planned_total_grants", total_grants) - elif event_name == "cmd_get": - record_workload_max(workload, "selected_user_count", record.get("user_count")) - record_workload_max(workload, "selected_total_grants", total_grants) - elif event_name == "cmd_restore": - record_workload_max(workload, "restore_snapshot_repo_count", record.get("snapshot_repos")) - record_workload_max( - workload, - "restore_snapshot_total_grants", - record.get("snapshot_grants"), - ) - elif event_name == "cmd_set_additive_user": - record_workload_max(workload, "selected_user_count", record.get("user_count")) - record_workload_max(workload, "planned_repo_count", repo_count) - record_workload_max(workload, "planned_total_grants", total_grants) - - record_workload_max(workload, "mapping_count", record.get("mapping_count")) - record_workload_max(workload, "mutations_succeeded", record.get("mutations_succeeded")) - record_workload_max(workload, "mutations_failed", record.get("mutations_failed")) - record_workload_max(workload, "mutations_canceled", record.get("mutations_canceled")) - - -def record_workload_model_dimensions(workload: dict[str, int | float | str]) -> None: - """Add the canonical dimensions used by memory modeling.""" - user_count = max_workload_number( - workload, - ( - "selected_user_count", - "captured_user_count", - "snapshot_user_count_max", - "sourcegraph_user_count", - ), - ) - repo_count = max_workload_number( - workload, - ( - "planned_repo_count", - "restore_snapshot_repo_count", - "snapshot_repos_with_explicit_grants_max", - "loaded_repo_count", - ), - ) - grant_count = max_workload_number( - workload, - ( - "planned_total_grants", - "restore_snapshot_total_grants", - "selected_total_grants", - "snapshot_total_grants_max", - "apply_payload_grant_count", - ), - ) - if user_count is not None: - workload["memory_model_user_count"] = user_count - if repo_count is not None: - workload["memory_model_repo_count"] = repo_count - if grant_count is not None: - workload["memory_model_grant_count"] = grant_count - - -def max_workload_number( - workload: dict[str, int | float | str], field_names: Sequence[str] -) -> int | float | None: - """Return the largest numeric value found for the supplied workload fields.""" - values = [ - value - for field_name in field_names - if isinstance((value := workload.get(field_name)), int | float) - ] - return max(values) if values else None - - -def record_workload_max( - workload: dict[str, int | float | str], field_name: str, value: object -) -> None: - """Record the maximum numeric value for a named workload dimension.""" - if isinstance(value, bool) or not isinstance(value, int | float): - return - old_value = workload.get(field_name) - if not isinstance(old_value, int | float) or value > old_value: - workload[field_name] = value - - -def artifact_sizes_for_run(log_path: Path) -> dict[str, int]: - """Return sizes of JSON artifacts in the same run directory as the log.""" - run_directory = log_path.parent - sizes = { - path.name: path.stat().st_size - for path in sorted(run_directory.glob("*.json")) - if path.is_file() - } - sizes["__total_json_bytes"] = sum(sizes.values()) - return sizes - - -def wait_for_jaeger_trace_collectors(collectors: list[JaegerTraceCollector]) -> None: - if not collectors: - return - print(f"\nWaiting for {len(collectors)} background Jaeger trace collector(s) ...") - for collector in collectors: - collector.wait() - - -def graphql_query_metadata_from_record(record: dict[str, Any]) -> dict[str, Any] | None: - """Return correlation metadata from a structured `graphql_query` log record.""" - if record.get("event") != "graphql_query": - return None - metadata: dict[str, Any] = { - "span_id": record.get("span"), - "parent_span_id": record.get("parent_span"), - "trace_id": record.get("trace"), - } - phase = record.get("phase") - if phase == "start": - metadata["started_at"] = record.get("ts") - elif phase == "end": - metadata["ended_at"] = record.get("ts") - for field_name in ( - "cursor_present", - "duration_ms", - "error_type", - "graphql_client", - "page_number", - "page_size", - "query_bytes", - "query_name", - "response_fields", - "status", - "url", - "variable_names", - # Current src-py-lib logs variable names only. Keep these optional fields - # so raw trace artifacts automatically include values if the GraphQL log - # event grows an opt-in sanitized-variable field later. - "input_variables", - "variable_values", - "variables", - ): - if field_name in record: - metadata[field_name] = record[field_name] - return {key: value for key, value in metadata.items() if value is not None} - - -def graphql_query_span_key(record: dict[str, Any]) -> tuple[str, str] | None: - """Return the `(trace_id, span_id)` key for a GraphQL query log span.""" - trace_id = optional_string(record.get("trace")) - span_id = optional_string(record.get("span")) - if trace_id is None or span_id is None: - return None - return trace_id, span_id - - -def graphql_trace_request_from_record(record: dict[str, Any]) -> dict[str, Any] | None: - if record.get("event") != "http_request" or record.get("phase") != "end": - return None - if not str(record.get("url", "")).endswith("/.api/graphql"): - return None - trace = sourcegraph_trace_from_record(record) - if trace is None: - return None - return trace.to_json() | { - "duration_ms": float_field(record, "duration_ms") or 0.0, - "timestamp": record.get("ts"), - "status": record.get("status"), - "status_code": record.get("status_code"), - "error_type": record.get("error_type"), - } - - -def trace_summary_duration_ms(summary: dict[str, Any]) -> float: - duration_ms = summary.get("duration_ms") - return float(duration_ms) if isinstance(duration_ms, int | float) else 0.0 - - -def sourcegraph_trace_from_record(record: dict[str, Any]) -> src.SourcegraphTrace | None: - request_headers = string_headers(record.get("request_headers")) - response_headers = string_headers(record.get("response_headers")) - trace = sourcegraph_trace_from_headers(response_headers, request_headers) - if trace is not None: - return trace - trace_id = trace_id_from_traceparent(header_value(request_headers, "traceparent")) - if trace_id is None: - return None - return src.SourcegraphTrace( - trace_id=trace_id, - trace_url=header_value(response_headers, "x-trace-url"), - ) - - -def sourcegraph_trace_from_request(trace_request: dict[str, Any]) -> src.SourcegraphTrace: - return src.SourcegraphTrace( - trace_id=str(trace_request["trace_id"]), - span_id=optional_string(trace_request.get("span_id")), - trace_url=optional_string(trace_request.get("trace_url")), - parent_trace_id=optional_string(trace_request.get("parent_trace_id")), - parent_span_id=optional_string(trace_request.get("parent_span_id")), - ) - - -def trace_id_from_traceparent(traceparent: str | None) -> str | None: - if traceparent is None: - return None - parts = traceparent.split("-") - if len(parts) != 4: - return None - trace_id = parts[1] - if len(trace_id) != 32 or not all(character in "0123456789abcdef" for character in trace_id): - return None - return trace_id - - -def string_headers(headers: object) -> dict[str, str]: - if not isinstance(headers, dict): - return {} - values: dict[str, str] = {} - typed_headers = cast(dict[object, object], headers) - for header_name, value in typed_headers.items(): - if not isinstance(header_name, str): - continue - if isinstance(value, str): - values[header_name] = value - elif isinstance(value, list): - value_items = cast(list[object], value) - string_values = [item for item in value_items if isinstance(item, str)] - if string_values: - values[header_name] = string_values[0] - return values - - -def header_value(headers: Mapping[str, str], name: str) -> str | None: - lower_name = name.lower() - for header_name, value in headers.items(): - if header_name.lower() == lower_name: - return value - return None - - -def optional_string(value: object) -> str | None: - return value if isinstance(value, str) else None - - -def print_jaeger_trace_summaries(summaries: list[dict[str, Any]]) -> None: - found = sum(1 for summary in summaries if summary.get("jaeger_found") is True) - print(f"Jaeger trace summaries: fetched {found} / {len(summaries)}.") - for summary in summaries: - duration_ms = float(summary.get("duration_ms") or 0) - trace_id = summary.get("trace_id") - if summary.get("jaeger_found") is not True: - print(f" {duration_ms:.0f}ms {trace_id}: {summary.get('error')}") - continue - hot_text = format_hot_operations(summary.get("hot_operations")) - print( - f" {duration_ms:.0f}ms {trace_id}: {summary.get('span_count', 0)} span(s); {hot_text}" - ) - - -def format_hot_operations(value: object) -> str: - if not isinstance(value, list): - return "" - return "; ".join( - format_hot_operation(cast(dict[object, object], operation)) - for operation in cast(list[object], value)[:3] - if isinstance(operation, dict) - ) - - -def format_hot_operation(operation: dict[object, object]) -> str: - return ( - f"{operation.get('operation')} x{operation.get('count')} " - f"sum={operation.get('sum_ms')}ms max={operation.get('max_ms')}ms" - ) - - -def process_tree_rss_mb(root_process_identifier: int) -> float | None: - """Return current RSS for the process and descendants, in MiB.""" - try: - process_result = subprocess.run( - ["ps", "-axo", "pid=,ppid=,rss="], - capture_output=True, - text=True, - timeout=2, - check=False, - ) - except (OSError, subprocess.SubprocessError): - return None - if process_result.returncode != 0: - return None - parent_by_process: dict[int, int] = {} - resident_kib_by_process: dict[int, int] = {} - for raw_line in process_result.stdout.splitlines(): - fields = raw_line.split() - if len(fields) != 3: - continue - try: - process_identifier = int(fields[0]) - parent_process_identifier = int(fields[1]) - resident_kib = int(fields[2]) - except ValueError: - continue - parent_by_process[process_identifier] = parent_process_identifier - resident_kib_by_process[process_identifier] = resident_kib - if root_process_identifier not in resident_kib_by_process: - return None - descendants = {root_process_identifier} - changed = True - while changed: - changed = False - for process_identifier, parent_process_identifier in parent_by_process.items(): - if parent_process_identifier in descendants and process_identifier not in descendants: - descendants.add(process_identifier) - changed = True - total_resident_kib = sum( - resident_kib_by_process[process_identifier] - for process_identifier in descendants - if process_identifier in resident_kib_by_process - ) - return total_resident_kib / 1024.0 - - -def float_field(record: dict[str, Any], *names: str) -> float | None: - for name in names: - value = record.get(name) - if isinstance(value, int | float): - return float(value) - return None - - -def int_field(record: dict[str, Any], name: str) -> int | None: - value = record.get(name) - if isinstance(value, int): - return value - return None - - -def max_optional_float(current: float | None, new: float | None) -> float | None: - if new is None: - return current - if current is None: - return new - return max(current, new) - - -def max_optional_int(current: int | None, new: int | None) -> int | None: - if new is None: - return current - if current is None: - return new - return max(current, new) - - -def _memory_suffix(memory: MemorySummary | None) -> str: - if memory is None or memory.peak_rss_mb is None: - return "" - return f", peak RSS {memory.peak_rss_mb:.1f} MiB" - - -def print_memory_summary(results: list[CommandResult], limit: int) -> None: - rows = [result for result in results if result.memory and result.memory.peak_rss_mb is not None] - if not rows: - print("\nMemory summary: no structured peak_rss_mb records found.") - return - rows.sort(key=lambda result: result_peak_rss_mb(result) or 0.0, reverse=True) - print("\nMemory summary (highest peak RSS first):") - print( - "variant,iteration,case,peak_rss_mib,sampled_peak_rss_mib," - "external_peak_rss_mib,resource_samples,external_samples,max_fds," - "max_threads,artifact_json_bytes,seconds" - ) - for result in rows[:limit]: - assert result.memory is not None - sampled_peak = format_optional_float(result.memory.sampled_peak_rss_mb) - external_peak = format_optional_float(result.memory.external_peak_rss_mb) - max_fds = format_optional_int(result.memory.max_num_fds) - max_threads = format_optional_int(result.memory.max_num_threads) - artifact_bytes = result.artifact_sizes.get("__total_json_bytes", 0) - print( - ",".join( - [ - result.variant, - str(result.iteration), - result.case.name, - format_optional_float(result.memory.peak_rss_mb), - sampled_peak, - external_peak, - str(result.memory.resource_sample_count), - str(result.memory.external_sample_count), - max_fds, - max_threads, - str(artifact_bytes), - f"{result.elapsed_seconds:.1f}", - ] - ) - ) - - -def print_phase_memory_summary(results: list[CommandResult], limit: int) -> None: - rows = [ - (result, phase) - for result in results - for phase in result.phase_memory - if phase.sample_count > 0 - ] - if not rows: - print("\nPhase memory summary: no attributed resource samples found.") - return - rows.sort(key=lambda row: phase_memory_sort_key(row[1])) - print("\nPhase memory summary (highest attributed RSS first):") - print("variant,iteration,case,stage,event,peak_rss_mib,samples,duration_seconds") - for result, phase in rows[:limit]: - print( - ",".join( - [ - result.variant, - str(result.iteration), - result.case.name, - phase.stage or "", - phase.event, - f"{phase.peak_rss_mb:.2f}", - str(phase.sample_count), - f"{phase.total_duration_ms / 1000.0:.1f}", - ] - ) - ) - - -def compare_variants(results: list[CommandResult]) -> list[CaseComparison]: - variants = {result.variant for result in results} - if not {"baseline", "candidate"}.issubset(variants): - return [] - comparisons: list[CaseComparison] = [] - case_names = sorted({result.case.name for result in results}) - for case_name in case_names: - baseline_results = [ - result - for result in results - if result.variant == "baseline" and result.case.name == case_name - ] - candidate_results = [ - result - for result in results - if result.variant == "candidate" and result.case.name == case_name - ] - baseline_peak = median_optional(result_peak_rss_mb(result) for result in baseline_results) - candidate_peak = median_optional(result_peak_rss_mb(result) for result in candidate_results) - baseline_external_peak = median_optional( - result_external_peak_rss_mb(result) for result in baseline_results - ) - candidate_external_peak = median_optional( - result_external_peak_rss_mb(result) for result in candidate_results - ) - baseline_elapsed = median_optional(result.elapsed_seconds for result in baseline_results) - candidate_elapsed = median_optional(result.elapsed_seconds for result in candidate_results) - comparisons.append( - CaseComparison( - case_name=case_name, - baseline_count=len(baseline_results), - candidate_count=len(candidate_results), - baseline_peak_rss_mb=baseline_peak, - candidate_peak_rss_mb=candidate_peak, - peak_rss_delta_mb=delta(candidate_peak, baseline_peak), - peak_rss_delta_percent=percent_delta(candidate_peak, baseline_peak), - baseline_external_peak_rss_mb=baseline_external_peak, - candidate_external_peak_rss_mb=candidate_external_peak, - external_peak_rss_delta_mb=delta(candidate_external_peak, baseline_external_peak), - external_peak_rss_delta_percent=percent_delta( - candidate_external_peak, baseline_external_peak - ), - baseline_elapsed_seconds=baseline_elapsed, - candidate_elapsed_seconds=candidate_elapsed, - elapsed_delta_seconds=delta(candidate_elapsed, baseline_elapsed), - elapsed_delta_percent=percent_delta(candidate_elapsed, baseline_elapsed), - ) - ) - comparisons.sort( - key=lambda comparison: ( - comparison.peak_rss_delta_mb - if comparison.peak_rss_delta_mb is not None - else float("-inf") - ), - reverse=True, - ) - return comparisons - - -def print_comparison_summary(comparisons: list[CaseComparison]) -> None: - if not comparisons: - return - print("\nCandidate vs baseline median comparison:") - print( - "case,baseline_peak_rss_mib,candidate_peak_rss_mib,delta_mib,delta_percent," - "baseline_seconds,candidate_seconds,seconds_delta_percent" - ) - for comparison in comparisons: - print( - ",".join( - [ - comparison.case_name, - format_optional_float(comparison.baseline_peak_rss_mb), - format_optional_float(comparison.candidate_peak_rss_mb), - format_signed_optional_float(comparison.peak_rss_delta_mb), - format_signed_optional_float(comparison.peak_rss_delta_percent), - format_optional_float(comparison.baseline_elapsed_seconds), - format_optional_float(comparison.candidate_elapsed_seconds), - format_signed_optional_float(comparison.elapsed_delta_percent), - ] - ) - ) - - -def write_results_files( - results: list[CommandResult], - comparisons: list[CaseComparison], - config: EndToEndConfig, - sourcegraph_load_monitor: SourcegraphLoadMonitor | None, -) -> None: - if config.results_json is not None: - write_results_json(config.results_json, results, comparisons, sourcegraph_load_monitor) - if config.results_csv is not None: - write_results_csv(config.results_csv, results) - phase_csv = phase_results_csv_path(config.results_csv) - write_phase_results_csv(phase_csv, results) - - -def write_results_json( - path: Path, - results: list[CommandResult], - comparisons: list[CaseComparison], - sourcegraph_load_monitor: SourcegraphLoadMonitor | None, -) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - sourcegraph_monitor: dict[str, Any] | None = None - if sourcegraph_load_monitor is not None: - sourcegraph_monitor = { - "output_dir": str(sourcegraph_load_monitor.output_dir), - "log_path": str(sourcegraph_load_monitor.log_path), - } - with path.open("w", encoding="utf-8") as output_file: - json.dump( - { - "generated_at": datetime.datetime.now(datetime.UTC).isoformat(), - "sourcegraph_load_monitor": sourcegraph_monitor, - "results": [result_to_json(result) for result in results], - "comparisons": [comparison_to_json(comparison) for comparison in comparisons], - }, - output_file, - indent=2, - sort_keys=True, - ) - output_file.write("\n") - print(f"Wrote JSON results to {path}") - - -def write_results_csv(path: Path, results: list[CommandResult]) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - workload_fields = sorted({field_name for result in results for field_name in result.workload}) - artifact_fields = sorted( - {field_name for result in results for field_name in result.artifact_sizes} - ) - fieldnames = [ - "variant", - "iteration", - "case", - "return_code", - "elapsed_seconds", - "peak_rss_mb", - "sampled_peak_rss_mb", - "external_peak_rss_mb", - "resource_sample_count", - "external_sample_count", - "max_num_fds", - "max_num_threads", - "max_process_cpu_percent", - "jaeger_trace_count", - "jaeger_trace_found_count", - "jaeger_trace_error_count", - "slowest_graphql_trace_ms", - "slowest_graphql_trace_id", - *[f"artifact_{field_name}" for field_name in artifact_fields], - *[f"workload_{field_name}" for field_name in workload_fields], - ] - with path.open("w", encoding="utf-8", newline="") as output_file: - writer = csv.DictWriter(output_file, fieldnames=fieldnames) - writer.writeheader() - for result in results: - writer.writerow(result_to_csv_row(result, artifact_fields, workload_fields)) - print(f"Wrote CSV results to {path}") - - -def write_phase_results_csv(path: Path, results: list[CommandResult]) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - fieldnames = [ - "variant", - "iteration", - "case", - "stage", - "event", - "peak_rss_mb", - "sample_count", - "total_duration_ms", - ] - with path.open("w", encoding="utf-8", newline="") as output_file: - writer = csv.DictWriter(output_file, fieldnames=fieldnames) - writer.writeheader() - for result in results: - for phase in result.phase_memory: - writer.writerow( - { - "variant": result.variant, - "iteration": result.iteration, - "case": result.case.name, - "stage": phase.stage or "", - "event": phase.event, - "peak_rss_mb": f"{phase.peak_rss_mb:.2f}", - "sample_count": phase.sample_count, - "total_duration_ms": phase.total_duration_ms, - } - ) - print(f"Wrote phase CSV results to {path}") - - -def phase_results_csv_path(path: Path) -> Path: - return path.with_name(f"{path.stem}-phases{path.suffix}") - - -def raise_for_memory_regressions(comparisons: list[CaseComparison], config: EndToEndConfig) -> None: - percent_limit = config.fail_on_memory_regression_percent - mib_limit = config.fail_on_memory_regression_mib - if percent_limit is None and mib_limit is None: - return - failures: list[str] = [] - for comparison in comparisons: - if ( - mib_limit is not None - and comparison.peak_rss_delta_mb is not None - and comparison.peak_rss_delta_mb > mib_limit - ): - failures.append( - f"{comparison.case_name}: peak RSS regressed " - f"{comparison.peak_rss_delta_mb:.2f} MiB > {mib_limit:.2f} MiB" - ) - if ( - percent_limit is not None - and comparison.peak_rss_delta_percent is not None - and comparison.peak_rss_delta_percent > percent_limit - ): - failures.append( - f"{comparison.case_name}: peak RSS regressed " - f"{comparison.peak_rss_delta_percent:.2f}% > {percent_limit:.2f}%" - ) - if failures: - raise SystemExit("Memory regression threshold exceeded: " + "; ".join(failures)) - - -def result_to_json(result: CommandResult) -> dict[str, Any]: - return { - "variant": result.variant, - "iteration": result.iteration, - "case": result.case.name, - "arguments": list(result.case.arguments), - "return_code": result.return_code, - "elapsed_seconds": round(result.elapsed_seconds, 3), - "log_path": str(result.log_path) if result.log_path is not None else None, - "run_directory": str(result.run_directory) if result.run_directory is not None else None, - "command": result.run_record.get("command") if result.run_record else None, - "status": result.run_record.get("status") if result.run_record else None, - "jaeger_traces": result.jaeger_traces, - "memory": memory_to_json(result.memory), - "phase_memory": [phase_to_json(phase) for phase in result.phase_memory], - "artifact_sizes": result.artifact_sizes, - "workload": result.workload, - "normalized_memory": normalized_memory(result), - } - - -def memory_to_json(memory: MemorySummary | None) -> dict[str, Any] | None: - if memory is None: - return None - return { - "peak_rss_mb": memory.peak_rss_mb, - "sampled_peak_rss_mb": memory.sampled_peak_rss_mb, - "external_peak_rss_mb": memory.external_peak_rss_mb, - "resource_sample_count": memory.resource_sample_count, - "external_sample_count": memory.external_sample_count, - "max_num_fds": memory.max_num_fds, - "max_num_threads": memory.max_num_threads, - "max_process_cpu_percent": memory.max_process_cpu_percent, - } - - -def phase_to_json(phase: PhaseMemorySummary) -> dict[str, Any]: - return { - "event": phase.event, - "stage": phase.stage, - "peak_rss_mb": phase.peak_rss_mb, - "sample_count": phase.sample_count, - "total_duration_ms": phase.total_duration_ms, - } - - -def comparison_to_json(comparison: CaseComparison) -> dict[str, Any]: - return { - "case": comparison.case_name, - "baseline_count": comparison.baseline_count, - "candidate_count": comparison.candidate_count, - "baseline_peak_rss_mb": comparison.baseline_peak_rss_mb, - "candidate_peak_rss_mb": comparison.candidate_peak_rss_mb, - "peak_rss_delta_mb": comparison.peak_rss_delta_mb, - "peak_rss_delta_percent": comparison.peak_rss_delta_percent, - "baseline_external_peak_rss_mb": comparison.baseline_external_peak_rss_mb, - "candidate_external_peak_rss_mb": comparison.candidate_external_peak_rss_mb, - "external_peak_rss_delta_mb": comparison.external_peak_rss_delta_mb, - "external_peak_rss_delta_percent": comparison.external_peak_rss_delta_percent, - "baseline_elapsed_seconds": comparison.baseline_elapsed_seconds, - "candidate_elapsed_seconds": comparison.candidate_elapsed_seconds, - "elapsed_delta_seconds": comparison.elapsed_delta_seconds, - "elapsed_delta_percent": comparison.elapsed_delta_percent, - } - - -def result_to_csv_row( - result: CommandResult, artifact_fields: list[str], workload_fields: list[str] -) -> dict[str, object]: - memory = result.memory - row: dict[str, object] = { - "variant": result.variant, - "iteration": result.iteration, - "case": result.case.name, - "return_code": result.return_code, - "elapsed_seconds": f"{result.elapsed_seconds:.3f}", - "peak_rss_mb": format_optional_float(result_peak_rss_mb(result)), - "sampled_peak_rss_mb": format_optional_float( - memory.sampled_peak_rss_mb if memory is not None else None - ), - "external_peak_rss_mb": format_optional_float(result_external_peak_rss_mb(result)), - "resource_sample_count": memory.resource_sample_count if memory is not None else 0, - "external_sample_count": memory.external_sample_count if memory is not None else 0, - "max_num_fds": format_optional_int(memory.max_num_fds if memory is not None else None), - "max_num_threads": format_optional_int( - memory.max_num_threads if memory is not None else None - ), - "max_process_cpu_percent": format_optional_float( - memory.max_process_cpu_percent if memory is not None else None - ), - "jaeger_trace_count": len(result.jaeger_traces), - "jaeger_trace_found_count": sum( - 1 for trace in result.jaeger_traces if trace.get("jaeger_found") is True - ), - "jaeger_trace_error_count": sum( - 1 for trace in result.jaeger_traces if trace.get("jaeger_found") is not True - ), - "slowest_graphql_trace_ms": format_optional_float(slowest_graphql_trace_ms(result)), - "slowest_graphql_trace_id": slowest_graphql_trace_id(result) or "", - } - for field_name in artifact_fields: - row[f"artifact_{field_name}"] = result.artifact_sizes.get(field_name, "") - for field_name in workload_fields: - row[f"workload_{field_name}"] = result.workload.get(field_name, "") - return row - - -def normalized_memory(result: CommandResult) -> dict[str, float]: - peak_rss_mb = result_peak_rss_mb(result) - if peak_rss_mb is None: - return {} - normalized: dict[str, float] = {} - for field_name in ( - "memory_model_user_count", - "memory_model_repo_count", - "memory_model_grant_count", - ): - value = result.workload.get(field_name) - if isinstance(value, int | float) and value > 0: - normalized[f"peak_rss_mb_per_{field_name}"] = peak_rss_mb / float(value) - return normalized - - -def slowest_graphql_trace_ms(result: CommandResult) -> float | None: - if not result.jaeger_traces: - return None - duration = result.jaeger_traces[0].get("duration_ms") - return float(duration) if isinstance(duration, int | float) else None - - -def slowest_graphql_trace_id(result: CommandResult) -> str | None: - if not result.jaeger_traces: - return None - trace_id = result.jaeger_traces[0].get("trace_id") - return trace_id if isinstance(trace_id, str) else None - - -def result_peak_rss_mb(result: CommandResult) -> float | None: - if result.memory is None: - return None - return result.memory.peak_rss_mb - - -def result_external_peak_rss_mb(result: CommandResult) -> float | None: - if result.memory is None: - return None - return result.memory.external_peak_rss_mb - - -def median_optional(values: Iterable[object]) -> float | None: - numbers = [float(value) for value in values if isinstance(value, int | float)] - if not numbers: - return None - return float(statistics.median(numbers)) - - -def delta(new: float | None, old: float | None) -> float | None: - if new is None or old is None: - return None - return new - old - - -def percent_delta(new: float | None, old: float | None) -> float | None: - if new is None or old is None or old == 0: - return None - return (new - old) / old * 100.0 - - -def format_optional_float(value: float | None) -> str: - if value is None: - return "" - return f"{value:.2f}" - - -def format_signed_optional_float(value: float | None) -> str: - if value is None: - return "" - return f"{value:+.2f}" - - -def format_optional_int(value: int | None) -> str: - if value is None: - return "" - return str(value) - - -def snapshot_path(result: CommandResult) -> Path: - if result.run_directory is None: - raise CommandPermutationFailure(f"{result.case.name} did not produce a run directory") - path = result.run_directory / "before.json" - if not path.is_file(): - raise CommandPermutationFailure(f"{result.case.name} did not write {path}") - return path - - -def repositories_for_user(path: Path, username: str) -> set[str]: - snapshot = json.loads(path.read_text()) - repositories: set[str] = set() - for repository in snapshot.get("repos", {}).values(): - explicit_users = repository.get("explicit_permissions_users", []) - if username in explicit_users: - repositories.add(repository["name"]) - return repositories - - -if __name__ == "__main__": - main() diff --git a/tests/README.md b/tests/README.md index b2b43f1..ec542f9 100644 --- a/tests/README.md +++ b/tests/README.md @@ -86,6 +86,34 @@ of at most 2 (`SITE_USERS_PAGE_CAP` in `e2e/case_runner.py`), so a fixture with 4 users already spans 2 pages — that is what catches selection truncation bugs locally in milliseconds. +## Instance state: setup.py / setup.yaml + +[setup.py](./setup.py) converges the test instance to the desired state in +[setup.yaml](./setup.yaml) — run it BEFORE `run.py --live`: + +```bash +uv run tests/setup.py # report drift, change nothing +uv run tests/setup.py --apply # converge the instance +``` + +It verifies site config, synthetic user/repo counts, rewrites any legacy +real-looking addresses to `{username}@perms-sync.test`, fabricates SAML +external accounts (group claims for `samlGroups` live cases, written via +SQL on the pgsql pod and verified back through the product's own GraphQL +parser), deletes orphaned explicit grants on deleted repos, and clears +pending permissions. GraphQL is used for instance-level reads; bulk state +goes through `kubectl exec` + psql because it is orders of magnitude +faster. Everything it touches is synthetic (`test_user_*`); it never +creates or deletes users itself. + +Live cases declare their identity preconditions in tests.yaml: +`live.requiredSamlGroups` (preflight: fabricated accounts must match, with +a pointer to setup.py on drift) and `live.temporaryUsers` (the harness +creates the named users fresh via `createUser` — `created_at` = now — and +hard-deletes them afterwards; `{today}` in a cliCommand resolves to the +run's UTC date, which makes positive `--created-after` selection +deterministic against the long-pre-existing synthetic users). + ## PyPI install smoke (`--install`) `uv run tests/run.py --install` pip-installs the **published** package into a diff --git a/tests/e2e/fixtures/add-users-by-email-and-list/after.json b/tests/e2e/fixtures/add-users-by-email-and-list/after.json index 472e339..51c56b0 100644 --- a/tests/e2e/fixtures/add-users-by-email-and-list/after.json +++ b/tests/e2e/fixtures/add-users-by-email-and-list/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/add-users-by-email-and-list/before.json b/tests/e2e/fixtures/add-users-by-email-and-list/before.json index 44fb50f..bb8679d 100644 --- a/tests/e2e/fixtures/add-users-by-email-and-list/before.json +++ b/tests/e2e/fixtures/add-users-by-email-and-list/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/add-users-preserves-existing/after.json b/tests/e2e/fixtures/add-users-preserves-existing/after.json index 14cb19c..ea3cead 100644 --- a/tests/e2e/fixtures/add-users-preserves-existing/after.json +++ b/tests/e2e/fixtures/add-users-preserves-existing/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/add-users-preserves-existing/before.json b/tests/e2e/fixtures/add-users-preserves-existing/before.json index 0477656..bf361e1 100644 --- a/tests/e2e/fixtures/add-users-preserves-existing/before.json +++ b/tests/e2e/fixtures/add-users-preserves-existing/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/and-filters-intersect/after.json b/tests/e2e/fixtures/and-filters-intersect/after.json index c7e6f0b..be8483c 100644 --- a/tests/e2e/fixtures/and-filters-intersect/after.json +++ b/tests/e2e/fixtures/and-filters-intersect/after.json @@ -35,7 +35,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -65,7 +65,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/and-filters-intersect/before.json b/tests/e2e/fixtures/and-filters-intersect/before.json index 6ef40cd..4363606 100644 --- a/tests/e2e/fixtures/and-filters-intersect/before.json +++ b/tests/e2e/fixtures/and-filters-intersect/before.json @@ -35,7 +35,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -65,7 +65,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/and-filters-intersect/maps.yaml b/tests/e2e/fixtures/and-filters-intersect/maps.yaml index 718283e..057a979 100644 --- a/tests/e2e/fixtures/and-filters-intersect/maps.yaml +++ b/tests/e2e/fixtures/and-filters-intersect/maps.yaml @@ -5,7 +5,7 @@ maps: configID: okta samlGroup: engineering emails: - - marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com + - test_user_09991@perms-sync.test repos: names: - test-repo-49981 diff --git a/tests/e2e/fixtures/empty-maps-noop/before.json b/tests/e2e/fixtures/empty-maps-noop/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/empty-maps-noop/before.json +++ b/tests/e2e/fixtures/empty-maps-noop/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-dry-run/before.json b/tests/e2e/fixtures/full-overwrite-dry-run/before.json index d929f39..079efd5 100644 --- a/tests/e2e/fixtures/full-overwrite-dry-run/before.json +++ b/tests/e2e/fixtures/full-overwrite-dry-run/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09994-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09994@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json index 5485d5b..b649d67 100644 --- a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json +++ b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json index 49449d7..8cccf0b 100644 --- a/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json +++ b/tests/e2e/fixtures/full-overwrite-removes-stale-grant/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-unions/after.json b/tests/e2e/fixtures/full-overwrite-unions/after.json index 6f86372..d224cc3 100644 --- a/tests/e2e/fixtures/full-overwrite-unions/after.json +++ b/tests/e2e/fixtures/full-overwrite-unions/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09994-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09994@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-unions/before.json b/tests/e2e/fixtures/full-overwrite-unions/before.json index d929f39..079efd5 100644 --- a/tests/e2e/fixtures/full-overwrite-unions/before.json +++ b/tests/e2e/fixtures/full-overwrite-unions/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09994-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09994@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-with-backup/after.json b/tests/e2e/fixtures/full-overwrite-with-backup/after.json index 5485d5b..b649d67 100644 --- a/tests/e2e/fixtures/full-overwrite-with-backup/after.json +++ b/tests/e2e/fixtures/full-overwrite-with-backup/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/full-overwrite-with-backup/before.json b/tests/e2e/fixtures/full-overwrite-with-backup/before.json index 49449d7..8cccf0b 100644 --- a/tests/e2e/fixtures/full-overwrite-with-backup/before.json +++ b/tests/e2e/fixtures/full-overwrite-with-backup/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/get-full-snapshot/before.json b/tests/e2e/fixtures/get-full-snapshot/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/get-full-snapshot/before.json +++ b/tests/e2e/fixtures/get-full-snapshot/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json b/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json index da46d6d..688588a 100644 --- a/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json +++ b/tests/e2e/fixtures/get-repos-without-explicit-perms/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/get-user-grants/before.json b/tests/e2e/fixtures/get-user-grants/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/get-user-grants/before.json +++ b/tests/e2e/fixtures/get-user-grants/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-bad-regex/before.json b/tests/e2e/fixtures/invalid-bad-regex/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/invalid-bad-regex/before.json +++ b/tests/e2e/fixtures/invalid-bad-regex/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-missing-repos-section/before.json b/tests/e2e/fixtures/invalid-missing-repos-section/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/invalid-missing-repos-section/before.json +++ b/tests/e2e/fixtures/invalid-missing-repos-section/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json b/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json +++ b/tests/e2e/fixtures/invalid-restore-wrong-schema-version/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-set-created-after-date/before.json b/tests/e2e/fixtures/invalid-set-created-after-date/before.json index 568cbbe..7806467 100644 --- a/tests/e2e/fixtures/invalid-set-created-after-date/before.json +++ b/tests/e2e/fixtures/invalid-set-created-after-date/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json b/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json index 568cbbe..7806467 100644 --- a/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json +++ b/tests/e2e/fixtures/invalid-set-repos-created-after-date/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-set-unknown-repo/before.json b/tests/e2e/fixtures/invalid-set-unknown-repo/before.json index 568cbbe..7806467 100644 --- a/tests/e2e/fixtures/invalid-set-unknown-repo/before.json +++ b/tests/e2e/fixtures/invalid-set-unknown-repo/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-set-unknown-user/before.json b/tests/e2e/fixtures/invalid-set-unknown-user/before.json index 568cbbe..7806467 100644 --- a/tests/e2e/fixtures/invalid-set-unknown-user/before.json +++ b/tests/e2e/fixtures/invalid-set-unknown-user/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/invalid-unknown-selector-field/before.json b/tests/e2e/fixtures/invalid-unknown-selector-field/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/invalid-unknown-selector-field/before.json +++ b/tests/e2e/fixtures/invalid-unknown-selector-field/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/match-provider-and-host-fields/after.json b/tests/e2e/fixtures/match-provider-and-host-fields/after.json index edf6cb3..459996b 100644 --- a/tests/e2e/fixtures/match-provider-and-host-fields/after.json +++ b/tests/e2e/fixtures/match-provider-and-host-fields/after.json @@ -42,7 +42,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -75,7 +75,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -105,7 +105,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/match-provider-and-host-fields/before.json b/tests/e2e/fixtures/match-provider-and-host-fields/before.json index 4fd2193..2e3bb94 100644 --- a/tests/e2e/fixtures/match-provider-and-host-fields/before.json +++ b/tests/e2e/fixtures/match-provider-and-host-fields/before.json @@ -42,7 +42,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -75,7 +75,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -105,7 +105,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/no-match-noop/before.json b/tests/e2e/fixtures/no-match-noop/before.json index 41e4e90..6314eeb 100644 --- a/tests/e2e/fixtures/no-match-noop/before.json +++ b/tests/e2e/fixtures/no-match-noop/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/regex-filters-scope/after.json b/tests/e2e/fixtures/regex-filters-scope/after.json index 6f704d7..468c88f 100644 --- a/tests/e2e/fixtures/regex-filters-scope/after.json +++ b/tests/e2e/fixtures/regex-filters-scope/after.json @@ -34,7 +34,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -47,7 +47,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -60,7 +60,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/regex-filters-scope/before.json b/tests/e2e/fixtures/regex-filters-scope/before.json index 6785f7a..225b703 100644 --- a/tests/e2e/fixtures/regex-filters-scope/before.json +++ b/tests/e2e/fixtures/regex-filters-scope/before.json @@ -34,7 +34,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -47,7 +47,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -60,7 +60,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/regex-filters-scope/maps.yaml b/tests/e2e/fixtures/regex-filters-scope/maps.yaml index 2870d2a..d68f641 100644 --- a/tests/e2e/fixtures/regex-filters-scope/maps.yaml +++ b/tests/e2e/fixtures/regex-filters-scope/maps.yaml @@ -2,7 +2,7 @@ maps: - name: Email-regex-matched users get test-repo-49981 and test-repo-49982 users: emailRegexes: - - '\+test0999[12]-' + - '^test_user_0999[12]@perms-sync\.test$' repos: nameRegexes: - '^test-repo-4998[12]$' diff --git a/tests/e2e/fixtures/restore-applies-snapshot/after.json b/tests/e2e/fixtures/restore-applies-snapshot/after.json index 015e930..1d6ad93 100644 --- a/tests/e2e/fixtures/restore-applies-snapshot/after.json +++ b/tests/e2e/fixtures/restore-applies-snapshot/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/restore-applies-snapshot/before.json b/tests/e2e/fixtures/restore-applies-snapshot/before.json index 71dda29..1d30851 100644 --- a/tests/e2e/fixtures/restore-applies-snapshot/before.json +++ b/tests/e2e/fixtures/restore-applies-snapshot/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/restore-dry-run-noop/before.json b/tests/e2e/fixtures/restore-dry-run-noop/before.json index 71dda29..1d30851 100644 --- a/tests/e2e/fixtures/restore-dry-run-noop/before.json +++ b/tests/e2e/fixtures/restore-dry-run-noop/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/restore-missing-file/before.json b/tests/e2e/fixtures/restore-missing-file/before.json index 2612a8e..edf425c 100644 --- a/tests/e2e/fixtures/restore-missing-file/before.json +++ b/tests/e2e/fixtures/restore-missing-file/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/saml-group-filter/after.json b/tests/e2e/fixtures/saml-group-filter/after.json index 15a6abd..89586bc 100644 --- a/tests/e2e/fixtures/saml-group-filter/after.json +++ b/tests/e2e/fixtures/saml-group-filter/after.json @@ -42,7 +42,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -75,7 +75,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -105,7 +105,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/saml-group-filter/before.json b/tests/e2e/fixtures/saml-group-filter/before.json index 4fd2193..2e3bb94 100644 --- a/tests/e2e/fixtures/saml-group-filter/before.json +++ b/tests/e2e/fixtures/saml-group-filter/before.json @@ -42,7 +42,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -75,7 +75,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -105,7 +105,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/saml-group-live/after.json b/tests/e2e/fixtures/saml-group-live/after.json new file mode 100644 index 0000000..5c48316 --- /dev/null +++ b/tests/e2e/fixtures/saml-group-live/after.json @@ -0,0 +1,181 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "perms-sync-test-eng" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "perms-sync-test-eng" + }, + { + "Value": "perms-sync-test-sales" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09993@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "perms-sync-test-sales" + } + ] + } + } + } + } + ] + }, + { + "id": 4, + "username": "test_user_09994", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09994@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [] + } + } + } + } + ] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49991", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49992", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991", + "test_user_09992" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 103, + "name": "test-repo-49993", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/saml-group-live/before.json b/tests/e2e/fixtures/saml-group-live/before.json new file mode 100644 index 0000000..a63271b --- /dev/null +++ b/tests/e2e/fixtures/saml-group-live/before.json @@ -0,0 +1,175 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "perms-sync-test-eng" + } + ] + } + } + } + } + ] + }, + { + "id": 2, + "username": "test_user_09992", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "perms-sync-test-eng" + }, + { + "Value": "perms-sync-test-sales" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09993", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09993@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "perms-sync-test-sales" + } + ] + } + } + } + } + ] + }, + { + "id": 4, + "username": "test_user_09994", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09994@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/exk12oznaq73Mq18t698", + "clientID": "https://m.eks.m.ps.sgdev.org/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [] + } + } + } + } + ] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49991", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 102, + "name": "test-repo-49992", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 103, + "name": "test-repo-49993", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/saml-group-live/maps.yaml b/tests/e2e/fixtures/saml-group-live/maps.yaml new file mode 100644 index 0000000..cd4b228 --- /dev/null +++ b/tests/e2e/fixtures/saml-group-live/maps.yaml @@ -0,0 +1,10 @@ +maps: + - name: perms-sync-test-eng members own the SAML test repos + users: + authProvider: + configID: okta + samlGroup: perms-sync-test-eng + repos: + names: + - test-repo-49991 + - test-repo-49992 diff --git a/tests/e2e/fixtures/set-created-after-temp-user/after.json b/tests/e2e/fixtures/set-created-after-temp-user/after.json new file mode 100644 index 0000000..6481ccd --- /dev/null +++ b/tests/e2e/fixtures/set-created-after-temp-user/after.json @@ -0,0 +1,42 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [], + "repos": [ + { + "id": 111, + "name": "test-repo-49911", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "perms_sync_temp_user_01" + ], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 112, + "name": "test-repo-49912", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-created-after-temp-user/before.json b/tests/e2e/fixtures/set-created-after-temp-user/before.json new file mode 100644 index 0000000..9f7ccf0 --- /dev/null +++ b/tests/e2e/fixtures/set-created-after-temp-user/before.json @@ -0,0 +1,40 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [], + "repos": [ + { + "id": 111, + "name": "test-repo-49911", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + }, + { + "id": 112, + "name": "test-repo-49912", + "externalServiceID": 1, + "explicitPermissionsUsers": [], + "createdAt": "2026-01-01T00:00:00Z" + } + ], + "pendingBindIDs": [] +} diff --git a/tests/e2e/fixtures/set-created-after-temp-user/maps.yaml b/tests/e2e/fixtures/set-created-after-temp-user/maps.yaml new file mode 100644 index 0000000..1afe14e --- /dev/null +++ b/tests/e2e/fixtures/set-created-after-temp-user/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: The temporary user owns test-repo-49911 + users: + usernames: + - perms_sync_temp_user_01 + repos: + names: + - test-repo-49911 diff --git a/tests/e2e/fixtures/set-repos-created-after-noop/before.json b/tests/e2e/fixtures/set-repos-created-after-noop/before.json index aeb72a8..bae1db6 100644 --- a/tests/e2e/fixtures/set-repos-created-after-noop/before.json +++ b/tests/e2e/fixtures/set-repos-created-after-noop/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09941-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09941@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-repos-created-after/after.json b/tests/e2e/fixtures/set-repos-created-after/after.json index d8add10..d3e643b 100644 --- a/tests/e2e/fixtures/set-repos-created-after/after.json +++ b/tests/e2e/fixtures/set-repos-created-after/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-repos-created-after/before.json b/tests/e2e/fixtures/set-repos-created-after/before.json index 0c5d31b..214c6ec 100644 --- a/tests/e2e/fixtures/set-repos-created-after/before.json +++ b/tests/e2e/fixtures/set-repos-created-after/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-repos-filter/after.json b/tests/e2e/fixtures/set-repos-filter/after.json index b6f2af0..ab54308 100644 --- a/tests/e2e/fixtures/set-repos-filter/after.json +++ b/tests/e2e/fixtures/set-repos-filter/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09971-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09971@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09972-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09972@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-repos-filter/before.json b/tests/e2e/fixtures/set-repos-filter/before.json index 5c92ffc..dfa4d02 100644 --- a/tests/e2e/fixtures/set-repos-filter/before.json +++ b/tests/e2e/fixtures/set-repos-filter/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09971-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09971@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09972-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09972@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json b/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json index 2b3a4e7..5bb4d46 100644 --- a/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json +++ b/tests/e2e/fixtures/set-repos-without-explicit-perms/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json b/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json index 29b5a24..3f93c98 100644 --- a/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json +++ b/tests/e2e/fixtures/set-repos-without-explicit-perms/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-users-created-after-noop/before.json b/tests/e2e/fixtures/set-users-created-after-noop/before.json index 70f5790..a006764 100644 --- a/tests/e2e/fixtures/set-users-created-after-noop/before.json +++ b/tests/e2e/fixtures/set-users-created-after-noop/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09951-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09951@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-users-created-after/after.json b/tests/e2e/fixtures/set-users-created-after/after.json index 8f4f705..ac85e70 100644 --- a/tests/e2e/fixtures/set-users-created-after/after.json +++ b/tests/e2e/fixtures/set-users-created-after/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-02-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-03-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-users-created-after/before.json b/tests/e2e/fixtures/set-users-created-after/before.json index 3cb9a19..afb1aef 100644 --- a/tests/e2e/fixtures/set-users-created-after/before.json +++ b/tests/e2e/fixtures/set-users-created-after/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09991@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-02-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09992-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09992@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-03-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09993-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09993@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/after.json b/tests/e2e/fixtures/set-users-without-explicit-perms/after.json index 069ddc1..24d247f 100644 --- a/tests/e2e/fixtures/set-users-without-explicit-perms/after.json +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/after.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09961-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09961@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09963-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09963@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09964-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09964@perms-sync.test", "verified": true } ], @@ -66,7 +66,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09962-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09962@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/fixtures/set-users-without-explicit-perms/before.json b/tests/e2e/fixtures/set-users-without-explicit-perms/before.json index 60aac3e..15100ad 100644 --- a/tests/e2e/fixtures/set-users-without-explicit-perms/before.json +++ b/tests/e2e/fixtures/set-users-without-explicit-perms/before.json @@ -27,7 +27,7 @@ "createdAt": "2026-01-01T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09961-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09961@perms-sync.test", "verified": true } ], @@ -40,7 +40,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09963-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09963@perms-sync.test", "verified": true } ], @@ -53,7 +53,7 @@ "createdAt": "2026-01-03T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09964-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09964@perms-sync.test", "verified": true } ], @@ -66,7 +66,7 @@ "createdAt": "2026-01-02T00:00:00Z", "emails": [ { - "email": "marc.leblanc+test09962-m.eks.m.ps.sgdev.org@sourcegraph.com", + "email": "test_user_09962@perms-sync.test", "verified": true } ], diff --git a/tests/e2e/test_local_cases.py b/tests/e2e/test_local_cases.py index 92486ea..2aa6305 100644 --- a/tests/e2e/test_local_cases.py +++ b/tests/e2e/test_local_cases.py @@ -49,12 +49,16 @@ def test_registry_cases_are_runnable(self) -> None: path = FIXTURES_DIR / case_name / file_name self.assertTrue(path.is_file(), f"case requires {path}") cli_command = case.get("cliCommand", "") - if "{user}" in cli_command: - self.assertNotIn( - "local", - case_modes(case), - "{user} resolves to the live --user; local mode cannot use it", - ) + for placeholder, meaning in ( + ("{user}", "the live --user"), + ("{today}", "the run date (UTC)"), + ): + if placeholder in cli_command: + self.assertNotIn( + "local", + case_modes(case), + f"{placeholder} resolves to {meaning}; local mode cannot use it", + ) argv = shlex.split(cli_command) if argv[:1] == ["restore"] and {"live", "performance"} & set(case_modes(case)): self.assertNotIn( diff --git a/tests/run.py b/tests/run.py index 4872457..2f406c3 100644 --- a/tests/run.py +++ b/tests/run.py @@ -1036,6 +1036,41 @@ def read_back_explicit_repo_names(self, username: str) -> set[str] | None: return names after_cursor = cast("str | None", page_info.get("endCursor")) + def read_back_saml_groups(self, username: str) -> list[str] | None: + """Return a user's sorted SAML group claims, or None if user/account missing. + + Reads the same `externalAccounts.accountData` surface the product + parses; used to verify the fabricated accounts from tests/setup.py. + """ + data = self.graphql( + "query TestSamlGroups($username: String!) { user(username: $username) {" + " externalAccounts(first: 50) { nodes { serviceType accountData } } } }", + {"username": username}, + ) + user = cast("dict[str, Any] | None", data.get("user")) + if user is None: + return None + for account in cast("list[dict[str, Any]]", user["externalAccounts"]["nodes"]): + if account["serviceType"] != "saml": + continue + account_data = cast("object", account.get("accountData")) + if isinstance(account_data, str): + account_data = cast("object", json.loads(account_data)) + if not isinstance(account_data, dict): + return [] + attributes = cast("dict[str, Any]", account_data) + groups_attribute = cast( + "dict[str, Any]", + cast("dict[str, Any]", attributes.get("Values") or {}).get("groups") or {}, + ) + group_values = cast("list[object]", groups_attribute.get("Values") or []) + return sorted( + cast(str, cast("dict[str, Any]", value)["Value"]) + for value in group_values + if isinstance(value, dict) and "Value" in cast("dict[str, Any]", value) + ) + return None + def read_back_repository_explicit_users( self, repository_name: str ) -> tuple[int, set[str]] | None: @@ -1157,10 +1192,41 @@ def run_live(self) -> None: return self.record("live prerequisites", "live", True, 0.0) + self.check_live_hygiene() if self.test_selected("wheel install smoke"): self.run_wheel_install_smoke() self.run_live_fixture_cases(environment) self.run_live_permission_cycles(environment) + self.check_live_hygiene() + + def check_live_hygiene(self) -> None: + """Cheap small-state guard: no pending bindIDs should ever persist. + + Deep hygiene (grant-table counts, orphan cleanup, SAML fixtures, + synthetic emails) is `uv run tests/setup.py`'s job before the run. + """ + if not self.test_selected("live hygiene"): + return + started = time.monotonic() + try: + pending = cast( + "list[str]", + self.graphql("query TestPending { usersWithPendingPermissions }", {})[ + "usersWithPendingPermissions" + ], + ) + except Exception as exception: + self.record("live hygiene: pending bindIDs", "live", False, 0.0, str(exception)) + return + self.record( + "live hygiene: pending bindIDs", + "live", + not pending, + time.monotonic() - started, + "none" + if not pending + else f"leftover pending bindIDs: {pending[:5]} — run `uv run tests/setup.py --apply`", + ) def run_wheel_install_smoke(self) -> None: log.info("\n--- Live: wheel build + pip install smoke ---") @@ -1422,13 +1488,41 @@ def run_seeded_fixture_apply( ) return + # Preflight: SAML cases need the fabricated accounts from + # tests/setup.py (setup.yaml samlAccounts). Verify groups through + # the same GraphQL surface the product reads. + required_saml_groups = cast( + "dict[str, list[str]]", live_settings.get("requiredSamlGroups") or {} + ) + for username, expected_groups in required_saml_groups.items(): + actual_groups = self.read_back_saml_groups(username) + if actual_groups != sorted(expected_groups): + self.record( + label, + level, + False, + 0.0, + f"SAML fixture drift for {username}: expected {sorted(expected_groups)}, " + f"found {actual_groups}; run `uv run tests/setup.py --apply`", + ) + return + # Repos in scope but absent from after.json must come back exactly as # seeded — these are the canaries that detect widened selectors. expected_after = { name: after_grants.get(name, before_grants.get(name, set())) for name in involved_names } + temporary_usernames = cast("list[str]", live_settings.get("temporaryUsers") or []) + created_temporary_user_ids: dict[str, str] = {} try: + for username in temporary_usernames: + user_id = self.create_temporary_user(username) + if user_id is None: + self.record(label, level, False, 0.0, f"could not create temp user {username}") + return + created_temporary_user_ids[username] = user_id + seeded = self.set_repository_states( f"{label} [seed before-state]", level, @@ -1445,9 +1539,14 @@ def run_seeded_fixture_apply( {name: before_grants.get(name, set()) for name in involved_names}, ) + today = datetime.datetime.now(datetime.UTC).date().isoformat() + main_arguments = tuple( + token.replace("{user}", self.test_user).replace("{today}", today) + for token in case_cli_arguments(cast("Any", case), case_name) + ) main_case = CliCase( label, - tuple(case_cli_arguments(cast("Any", case), case_name)), + main_arguments, 1 if expected_errors else 0, expected_errors, ) @@ -1476,6 +1575,39 @@ def run_seeded_fixture_apply( level, {name: state[1] for name, state in original_state.items()}, ) + for username, user_id in created_temporary_user_ids.items(): + self.delete_temporary_user(label, level, username, user_id) + + def create_temporary_user(self, username: str) -> str | None: + """Create a throwaway user (created_at = now) for created-after cases.""" + try: + data = self.graphql( + "mutation TestCreateUser($username: String!) {" + " createUser(username: $username) { user { id } } }", + {"username": username}, + ) + return cast(str, data["createUser"]["user"]["id"]) + except Exception as exception: + log.error("createUser %s failed: %s", username, exception) + return None + + def delete_temporary_user(self, label: str, level: str, username: str, user_id: str) -> None: + """Hard-delete a temp user (also cascades its permission rows).""" + try: + self.graphql( + "mutation TestDeleteUser($user: ID!) {" + " deleteUser(user: $user, hard: true) { alwaysNil } }", + {"user": user_id}, + ) + self.record(f"{label} [temp user removed]", level, True, 0.0, username) + except Exception as exception: + self.record( + f"{label} [temp user removed]", + level, + False, + 0.0, + f"hard-delete of {username} failed: {exception}", + ) def set_repository_states( self, name: str, level: str, target_grants: dict[str, tuple[int, set[str]]] diff --git a/tests/setup.py b/tests/setup.py new file mode 100644 index 0000000..9478b98 --- /dev/null +++ b/tests/setup.py @@ -0,0 +1,416 @@ +"""Converge the test Sourcegraph instance to the state in tests/setup.yaml. + +Run BEFORE tests/run.py. Dry-run by default; --apply mutates. + + uv run tests/setup.py # report drift + uv run tests/setup.py --apply # fix drift + +Reads SRC_ENDPOINT / SRC_ACCESS_TOKEN from .env. GraphQL is used for +instance-level reads (site config, auth providers, SAML verification); +raw SQL via `kubectl exec` against the pgsql pod is used for bulk state +(user/repo counts, email rewrites, fabricated SAML accounts, permission +hygiene) because it is orders of magnitude faster than per-user GraphQL. +""" + +from __future__ import annotations + +import argparse +import json +import logging +import re +import subprocess +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, cast + +import src_py_lib as src +import yaml +from dotenv import dotenv_values + +sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src")) + +from src_auth_perms_sync.shared import saml_groups # noqa: E402 +from src_auth_perms_sync.shared import site_config as shared_site_config # noqa: E402 + +logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") +log = logging.getLogger("setup") + +SETUP_CONFIG_PATH = Path(__file__).with_name("setup.yaml") +ENV_PATH = Path(__file__).resolve().parents[1] / ".env" + +SAFE_NAME_PATTERN = re.compile(r"^[A-Za-z0-9._@+-]+$") + +EXTERNAL_ACCOUNTS_QUERY = """ +query SetupSamlAccounts($username: String!) { + user(username: $username) { + externalAccounts(first: 50) { + nodes { serviceType serviceID clientID accountData } + } + } +} +""" + +AUTH_PROVIDERS_QUERY = """ +query SetupAuthProviders { + site { + authProviders { + nodes { serviceType serviceID clientID } + } + } +} +""" + +PENDING_PERMISSIONS_QUERY = "query SetupPending { usersWithPendingPermissions }" + + +@dataclass +class Outcome: + """One named check: in-sync, fixed, or needing attention.""" + + name: str + ok: bool + detail: str + + +@dataclass +class Setup: + config: dict[str, Any] + client: src.SourcegraphClient + apply: bool + outcomes: list[Outcome] = field(default_factory=lambda: list[Outcome]()) + + # -- helpers ------------------------------------------------------------ + + def record(self, name: str, ok: bool, detail: str) -> None: + self.outcomes.append(Outcome(name, ok, detail)) + log.log( + logging.INFO if ok else logging.ERROR, "%s %s — %s", "✓" if ok else "✗", name, detail + ) + + def sql(self, statement: str) -> list[list[str]]: + """Run SQL on the pgsql pod; return rows of pipe-separated fields.""" + kubectl_config = self.config["kubectl"] + script = f"SET app.current_tenant = '{int(kubectl_config['tenantID'])}';\n{statement}" + command = [ + "kubectl", + "exec", + "-i", + "-n", + str(kubectl_config["namespace"]), + f"pod/{kubectl_config['pod']}", + "--", + "psql", + "-X", + "-q", + "-At", + "-v", + "ON_ERROR_STOP=1", + "-U", + str(kubectl_config["databaseUser"]), + "-d", + str(kubectl_config["database"]), + ] + completed = subprocess.run( + command, input=script, capture_output=True, text=True, timeout=120 + ) + if completed.returncode != 0: + raise RuntimeError(f"psql failed: {completed.stderr.strip()}") + return [line.split("|") for line in completed.stdout.splitlines() if line] + + def sql_value(self, statement: str) -> str: + rows = self.sql(statement) + return rows[0][0] if rows and rows[0] else "" + + # -- checks ------------------------------------------------------------- + + def check_site_config(self) -> None: + try: + validated = shared_site_config.validate_site_config(self.client) + self.record("site-config", True, f"bindID={validated.bind_id_mode}") + except SystemExit as exception: + self.record("site-config", False, str(exception)) + + def saml_provider(self) -> tuple[str, str] | None: + data = self.client.graphql(AUTH_PROVIDERS_QUERY, follow_pages=False) + providers = cast( + "list[dict[str, str]]", + cast("dict[str, Any]", cast("dict[str, Any]", data)["site"])["authProviders"]["nodes"], + ) + for provider in providers: + if provider["serviceType"] == "saml": + return provider["serviceID"], provider["clientID"] + return None + + def check_users_and_repos(self) -> None: + users_config = self.config["users"] + repos_config = self.config["repos"] + user_count = int( + self.sql_value( + "SELECT count(*) FROM users " + f"WHERE username ~ '{users_config['usernamePattern']}' AND deleted_at IS NULL;" + ) + ) + repo_count = int( + self.sql_value( + "SELECT count(*) FROM repo " + f"WHERE name ~ '{repos_config['namePattern']}' AND deleted_at IS NULL;" + ) + ) + self.record( + "users", + user_count >= int(users_config["count"]), + f"{user_count} live synthetic users (need {users_config['count']}); " + "bulk creation is out of setup's scope — reprovision the instance if short", + ) + self.record( + "repos", + repo_count >= int(repos_config["count"]), + f"{repo_count} live synthetic repos (need {repos_config['count']})", + ) + + def check_emails(self) -> None: + users_config = self.config["users"] + template = str(users_config["emailTemplate"]) + legacy = str(users_config["legacyEmailPattern"]) + stale = int( + self.sql_value( + "SELECT count(*) FROM user_emails ue JOIN users u ON u.id = ue.user_id " + f"WHERE u.username ~ '{users_config['usernamePattern']}' " + "AND u.deleted_at IS NULL AND ue.deleted_at IS NULL " + f"AND ue.email LIKE '{legacy}';" + ) + ) + if stale == 0: + self.record("emails", True, "no legacy addresses on live synthetic users") + return + if not self.apply: + self.record( + "emails", False, f"{stale} legacy address(es) to rewrite (run with --apply)" + ) + return + suffix = template.replace("{username}", "") + if ( + not SAFE_NAME_PATTERN.match(suffix.lstrip("@")) + or template[: len("{username}")] != "{username}" + ): + raise RuntimeError(f"emailTemplate must be '{{username}}@': {template!r}") + updated = self.sql_value( + "WITH updated AS (" + " UPDATE user_emails ue SET email = u.username || '" + suffix + "' " + " FROM users u " + f" WHERE u.id = ue.user_id AND u.username ~ '{users_config['usernamePattern']}' " + " AND u.deleted_at IS NULL AND ue.deleted_at IS NULL " + f" AND ue.email LIKE '{legacy}' RETURNING 1" + ") SELECT count(*) FROM updated;" + ) + self.record("emails", True, f"rewrote {updated} address(es) to {template}") + + def check_saml_accounts(self) -> None: + provider = self.saml_provider() + if provider is None: + self.record("saml-provider", False, "no SAML auth provider on the instance") + return + service_id, client_id = provider + self.record("saml-provider", True, f"serviceID={service_id}") + + email_template = str(self.config["users"]["emailTemplate"]) + accounts = cast("dict[str, list[str]]", self.config["samlAccounts"]) + drift: list[str] = [] + for username, groups in accounts.items(): + if not SAFE_NAME_PATTERN.match(username) or not all( + SAFE_NAME_PATTERN.match(group) for group in groups + ): + raise RuntimeError(f"unsafe username/group name for {username!r}") + current = self.fabricated_groups_on_instance(username, service_id, client_id) + if current == list(groups): + continue + drift.append(f"{username}: {current} → {list(groups)}") + if self.apply: + self.upsert_saml_account( + username, + groups, + service_id=service_id, + client_id=client_id, + account_id=email_template.replace("{username}", username), + ) + if not drift: + self.record("saml-accounts", True, f"{len(accounts)} fabricated account(s) in sync") + elif self.apply: + for username in accounts: + expected = list(accounts[username]) + actual = self.fabricated_groups_on_instance(username, service_id, client_id) + if actual != expected: + self.record( + "saml-accounts", False, f"{username}: wrote {expected}, read back {actual}" + ) + return + self.record("saml-accounts", True, f"converged: {'; '.join(drift)}") + else: + self.record("saml-accounts", False, f"drift (run with --apply): {'; '.join(drift)}") + + def fabricated_groups_on_instance( + self, username: str, service_id: str, client_id: str + ) -> list[str] | None: + """Read the user's SAML groups back through the REAL consumer path: + GraphQL accountData parsed by the product's own extract_saml_groups.""" + data = self.client.graphql( + EXTERNAL_ACCOUNTS_QUERY, {"username": username}, follow_pages=False + ) + user = cast("dict[str, Any] | None", cast("dict[str, Any]", data).get("user")) + if user is None: + return None + for account in cast( + "list[dict[str, Any]]", cast("dict[str, Any]", user["externalAccounts"])["nodes"] + ): + if ( + account["serviceType"] == "saml" + and account["serviceID"] == service_id + and account["clientID"] == client_id + ): + raw = account.get("accountData") + if isinstance(raw, str): + raw = json.loads(raw) + return saml_groups.extract_saml_groups(cast("dict[str, Any] | None", raw)) + return None + + def upsert_saml_account( + self, + username: str, + groups: list[str], + *, + service_id: str, + client_id: str, + account_id: str, + ) -> None: + account_data = json.dumps( + { + "NameID": account_id, + "Values": { + "groups": { + "Name": "groups", + "Values": [{"Value": group} for group in groups], + }, + "Email": {"Name": "Email", "Values": [{"Value": account_id}]}, + }, + } + ) + self.sql( + "INSERT INTO user_external_accounts " + " (user_id, service_type, service_id, client_id, account_id, " + " account_data, encryption_key_id, kind) " + f"SELECT u.id, 'saml', '{service_id}', '{client_id}', '{account_id}', " + f" '{account_data}', '', 'AUTH' " + f"FROM users u WHERE u.username = '{username}' AND u.deleted_at IS NULL " + "ON CONFLICT (tenant_id, user_id, service_type, service_id, client_id, " + " account_id, kind) WHERE deleted_at IS NULL " + "DO UPDATE SET account_data = EXCLUDED.account_data, updated_at = now();" + ) + + def check_permissions_hygiene(self) -> None: + hygiene = self.config["permissionsHygiene"] + orphaned = int( + self.sql_value( + "SELECT count(*) FROM user_repo_permissions urp " + "JOIN repo r ON r.id = urp.repo_id " + "WHERE urp.source = 'api' AND r.deleted_at IS NOT NULL;" + ) + ) + if orphaned and self.apply: + self.sql( + "DELETE FROM user_repo_permissions urp USING repo r " + "WHERE r.id = urp.repo_id AND urp.source = 'api' " + "AND r.deleted_at IS NOT NULL;" + ) + self.record("orphaned-grants", True, f"deleted {orphaned} grant(s) on deleted repos") + else: + self.record( + "orphaned-grants", + orphaned == 0, + "none" + if orphaned == 0 + else f"{orphaned} grant(s) on deleted repos (--apply deletes)", + ) + + live_grants = int( + self.sql_value( + "SELECT count(*) FROM user_repo_permissions urp " + "JOIN repo r ON r.id = urp.repo_id " + "WHERE urp.source = 'api' AND r.deleted_at IS NULL;" + ) + ) + threshold = int(hygiene["maxExplicitGrants"]) + detail = f"{live_grants} explicit grant(s) on live repos (threshold {threshold})" + if live_grants > threshold: + top_rows = self.sql( + "SELECT r.name, count(*) FROM user_repo_permissions urp " + "JOIN repo r ON r.id = urp.repo_id " + "WHERE urp.source = 'api' AND r.deleted_at IS NULL " + "GROUP BY r.name ORDER BY count(*) DESC LIMIT 5;" + ) + top = ", ".join(f"{name}={count}" for name, count in top_rows) + detail += f"; leftovers from an unfinished run? top: {top}" + self.record("live-grants", live_grants <= threshold, detail) + + pending = cast( + "list[str]", + cast("dict[str, Any]", self.client.graphql(PENDING_PERMISSIONS_QUERY))[ + "usersWithPendingPermissions" + ], + ) + if pending and self.apply: + self.sql("DELETE FROM pending_repo_permissions;") + self.record("pending-permissions", True, f"cleared {len(pending)} pending bindID(s)") + else: + self.record( + "pending-permissions", + not pending, + "none" if not pending else f"{len(pending)} pending bindID(s) (--apply clears)", + ) + + def run(self) -> int: + self.check_site_config() + self.check_users_and_repos() + self.check_emails() + self.check_saml_accounts() + self.check_permissions_hygiene() + failed = [outcome for outcome in self.outcomes if not outcome.ok] + log.info( + "Summary: %d ok, %d need attention.%s", + len(self.outcomes) - len(failed), + len(failed), + "" if self.apply or not failed else " Re-run with --apply to converge.", + ) + return 1 if failed else 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--apply", action="store_true", help="converge the instance (default: report only)" + ) + arguments = parser.parse_args() + + environment = {key: value for key, value in dotenv_values(ENV_PATH).items() if value} + endpoint = environment.get("SRC_ENDPOINT") + token = environment.get("SRC_ACCESS_TOKEN") + if not endpoint or not token: + log.error("SRC_ENDPOINT / SRC_ACCESS_TOKEN missing from %s", ENV_PATH) + return 1 + + config = cast("dict[str, Any]", yaml.safe_load(SETUP_CONFIG_PATH.read_text())) + client = src.SourcegraphClient(endpoint=endpoint, token=token, http=src.HTTPClient(timeout=60)) + log.info( + "Converging %s to %s (%s)", + endpoint, + SETUP_CONFIG_PATH.name, + "apply" if arguments.apply else "dry-run", + ) + try: + return Setup(config=config, client=client, apply=arguments.apply).run() + finally: + client.http.close() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/tests.yaml b/tests/tests.yaml index 8d5186e..f77c52c 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -335,6 +335,49 @@ cases: cliCommand: set --full --apply --no-backup expectedMutations: 2 + saml-group-live: + description: >- + authProvider samlGroup filter against the FABRICATED SAML accounts + that tests/setup.py provisions (setup.yaml samlAccounts): eng-group + members gain the mapped repos; the sales-only and no-group users are + canaries proving the selector does not widen. The local fixture + mirrors the instance state exactly, so local and live assert the + same behavior. + modes: + - local + - live + live: + requiredSamlGroups: + test_user_09991: + - perms-sync-test-eng + test_user_09992: + - perms-sync-test-eng + - perms-sync-test-sales + test_user_09993: + - perms-sync-test-sales + test_user_09994: [] + involvedRepos: + - test-repo-49993 + cliCommand: set --full --apply --no-backup + expectedMutations: 2 + + set-created-after-temp-user: + description: >- + POSITIVE created-after selection on the real instance: the harness + creates a fresh temporary user (created today), so + --created-after {today} selects exactly that user out of 10k + pre-existing ones; the mapped grant lands and the canary repo stays + empty. The temp user is hard-deleted afterwards. + modes: + - live + live: + temporaryUsers: + - perms_sync_temp_user_01 + involvedRepos: + - test-repo-49912 + cliCommand: set --created-after {today} --apply --no-backup + expectedMutations: 1 + set-users-created-after: description: >- createdAfter mode additively grants mapped repos to users created @@ -428,7 +471,7 @@ cases: - local cliCommand: >- set --users - marc.leblanc+test09991-m.eks.m.ps.sgdev.org@sourcegraph.com,test_user_09993 + test_user_09991@perms-sync.test,test_user_09993 --apply --no-backup expectedMutations: 4 From d4a994b68b22ba8c5094bfdcfe68654781f799b6 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:05:21 -0600 Subject: [PATCH 8/9] setup.py: email check by template drift; never delete unknown state - Drop legacyEmailPattern (migration done): the email check now compares every live synthetic user against emailTemplate, so any future drift is caught, not just the old marc.leblanc+ pattern - Pending permissions are now REPORT-ONLY everywhere (setup.py and the live hygiene bookend): nothing in this suite creates them, so rows have unknown origin and removal is a human decision. The only state setup ever deletes is explicit grants attached to soft-deleted repos (unreachable rows) Amp-Thread-ID: https://ampcode.com/threads/T-019eaec8-b78c-7386-b977-d93720ad3219 Co-authored-by: Amp --- dev/TODO.md | 7 +++--- tests/README.md | 15 ++++++++----- tests/run.py | 4 +++- tests/setup.py | 57 +++++++++++++++++++++++++----------------------- tests/setup.yaml | 51 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 37 deletions(-) create mode 100644 tests/setup.yaml diff --git a/dev/TODO.md b/dev/TODO.md index d82e06b..8160ef4 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -12,9 +12,10 @@ sync-saml-orgs-apply only covers the single real Okta user, add-only). ## Decide: pendingBindIDs / usersWithPendingPermissions The CLI cannot create pending permissions (it validates users exist), but -snapshots record `pending_bindIDs` and setup.py clears leftovers. Decide -whether "grant before first login" is a customer need; if not, consider -dropping the snapshot field. See the thread discussion 2026-06-11. +snapshots record `pending_bindIDs`, and setup.py / the live hygiene check +report (never delete) any that appear. Decide whether "grant before first +login" is a customer need; if not, consider dropping the snapshot field. +See the thread discussion 2026-06-11. ## High priority: Remote trigger on demand diff --git a/tests/README.md b/tests/README.md index ec542f9..7485923 100644 --- a/tests/README.md +++ b/tests/README.md @@ -96,12 +96,15 @@ uv run tests/setup.py # report drift, change nothing uv run tests/setup.py --apply # converge the instance ``` -It verifies site config, synthetic user/repo counts, rewrites any legacy -real-looking addresses to `{username}@perms-sync.test`, fabricates SAML -external accounts (group claims for `samlGroups` live cases, written via -SQL on the pgsql pod and verified back through the product's own GraphQL -parser), deletes orphaned explicit grants on deleted repos, and clears -pending permissions. GraphQL is used for instance-level reads; bulk state +It verifies site config, synthetic user/repo counts, rewrites any synthetic +user's email that drifted from `{username}@perms-sync.test`, fabricates +SAML external accounts (group claims for `samlGroups` live cases, written +via SQL on the pgsql pod and verified back through the product's own +GraphQL parser), and deletes orphaned explicit grants attached to +soft-deleted repos (unreachable rows — the only state it ever removes). +Pending permissions and grants on live repos are REPORTED, never deleted: +our suite doesn't create them, so their origin is unknown and removal is +a human decision. GraphQL is used for instance-level reads; bulk state goes through `kubectl exec` + psql because it is orders of magnitude faster. Everything it touches is synthetic (`test_user_*`); it never creates or deletes users itself. diff --git a/tests/run.py b/tests/run.py index 2f406c3..2aa3bb3 100644 --- a/tests/run.py +++ b/tests/run.py @@ -1225,7 +1225,9 @@ def check_live_hygiene(self) -> None: time.monotonic() - started, "none" if not pending - else f"leftover pending bindIDs: {pending[:5]} — run `uv run tests/setup.py --apply`", + else f"pending bindIDs of unknown origin: {pending[:5]} — investigate " + "before clearing (an empty setRepositoryPermissionsForUsers on the " + "affected repo removes its pending rows)", ) def run_wheel_install_smoke(self) -> None: diff --git a/tests/setup.py b/tests/setup.py index 9478b98..2e6531a 100644 --- a/tests/setup.py +++ b/tests/setup.py @@ -172,36 +172,35 @@ def check_users_and_repos(self) -> None: def check_emails(self) -> None: users_config = self.config["users"] template = str(users_config["emailTemplate"]) - legacy = str(users_config["legacyEmailPattern"]) + suffix = template.replace("{username}", "") + if ( + not SAFE_NAME_PATTERN.match(suffix.lstrip("@")) + or template[: len("{username}")] != "{username}" + ): + raise RuntimeError(f"emailTemplate must be '{{username}}@': {template!r}") + drift_condition = ( + "u.id = ue.user_id " + f"AND u.username ~ '{users_config['usernamePattern']}' " + "AND u.deleted_at IS NULL AND ue.deleted_at IS NULL " + f"AND ue.email <> u.username || '{suffix}'" + ) stale = int( self.sql_value( - "SELECT count(*) FROM user_emails ue JOIN users u ON u.id = ue.user_id " - f"WHERE u.username ~ '{users_config['usernamePattern']}' " - "AND u.deleted_at IS NULL AND ue.deleted_at IS NULL " - f"AND ue.email LIKE '{legacy}';" + f"SELECT count(*) FROM user_emails ue JOIN users u ON {drift_condition};" ) ) if stale == 0: - self.record("emails", True, "no legacy addresses on live synthetic users") + self.record("emails", True, f"all live synthetic users match {template}") return if not self.apply: self.record( - "emails", False, f"{stale} legacy address(es) to rewrite (run with --apply)" + "emails", False, f"{stale} address(es) to rewrite to {template} (run with --apply)" ) return - suffix = template.replace("{username}", "") - if ( - not SAFE_NAME_PATTERN.match(suffix.lstrip("@")) - or template[: len("{username}")] != "{username}" - ): - raise RuntimeError(f"emailTemplate must be '{{username}}@': {template!r}") updated = self.sql_value( "WITH updated AS (" - " UPDATE user_emails ue SET email = u.username || '" + suffix + "' " - " FROM users u " - f" WHERE u.id = ue.user_id AND u.username ~ '{users_config['usernamePattern']}' " - " AND u.deleted_at IS NULL AND ue.deleted_at IS NULL " - f" AND ue.email LIKE '{legacy}' RETURNING 1" + f" UPDATE user_emails ue SET email = u.username || '{suffix}' " + f" FROM users u WHERE {drift_condition} RETURNING 1" ") SELECT count(*) FROM updated;" ) self.record("emails", True, f"rewrote {updated} address(es) to {template}") @@ -352,21 +351,25 @@ def check_permissions_hygiene(self) -> None: detail += f"; leftovers from an unfinished run? top: {top}" self.record("live-grants", live_grants <= threshold, detail) + # Report-only: nothing in this test suite creates pending + # permissions, so any rows here have an UNKNOWN origin — setup must + # not silently destroy them. Investigate, then clear deliberately + # (an empty setRepositoryPermissionsForUsers on the affected repo + # removes its pending rows). pending = cast( "list[str]", cast("dict[str, Any]", self.client.graphql(PENDING_PERMISSIONS_QUERY))[ "usersWithPendingPermissions" ], ) - if pending and self.apply: - self.sql("DELETE FROM pending_repo_permissions;") - self.record("pending-permissions", True, f"cleared {len(pending)} pending bindID(s)") - else: - self.record( - "pending-permissions", - not pending, - "none" if not pending else f"{len(pending)} pending bindID(s) (--apply clears)", - ) + self.record( + "pending-permissions", + not pending, + "none" + if not pending + else f"{len(pending)} pending bindID(s) of unknown origin: {pending[:5]} — " + "investigate before clearing (setup never deletes these)", + ) def run(self) -> int: self.check_site_config() diff --git a/tests/setup.yaml b/tests/setup.yaml new file mode 100644 index 0000000..f4b5209 --- /dev/null +++ b/tests/setup.yaml @@ -0,0 +1,51 @@ +# Desired state of the test Sourcegraph instance, applied by tests/setup.py +# BEFORE tests/run.py runs. Everything here is synthetic: the test_user_* +# accounts get no human usage, so setup may freely rewrite them. +# +# uv run tests/setup.py # report drift (no changes) +# uv run tests/setup.py --apply # converge the instance to this file + +kubectl: + namespace: m + pod: pgsql-0 + databaseUser: sg + database: sg + tenantID: 1 + +users: + # Synthetic users are pre-provisioned in bulk; setup verifies the count + # and pattern but does not mass-create them. + usernamePattern: "^test_user_[0-9]{5}$" + count: 10000 + # Synthetic users must not carry real-looking addresses. setup --apply + # rewrites any live synthetic user's email that differs from this. + emailTemplate: "{username}@perms-sync.test" + +repos: + namePattern: "^test-repo-[0-9]{5}$" + count: 50000 + +# Fabricated SAML external accounts (account_data is a minimal gosaml2 +# SAMLValues blob the product parses for group claims). service_id and +# client_id are discovered from the instance's live SAML auth provider, so +# the fabricated rows always match what mapping rules resolve against. +# Group names are deliberately synthetic so they can never collide with +# real Okta groups. +samlAccounts: + test_user_09991: + - perms-sync-test-eng + test_user_09992: + - perms-sync-test-eng + - perms-sync-test-sales + test_user_09993: + - perms-sync-test-sales + # SAML account with no test groups: canary for samlGroups selectors. + test_user_09994: [] + +permissionsHygiene: + # Explicit API grants on LIVE repos beyond this count are leftovers from + # crashed or unfinished runs; setup reports them (per repo) and fails. + # Grants attached to soft-deleted repos are pure orphans: --apply deletes + # them. Pending permissions should always be empty between runs: --apply + # clears them. + maxExplicitGrants: 50 From 9cb8405b48af39e9588072ec7fe81fd9c30b1f73 Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:25:01 -0600 Subject: [PATCH 9/9] Run every state case through BOTH the CLI and the Python import API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit case_runners now returns cli+import for every state case with a cliCommand. The import run derives its Config from the same command line: parse argv, diff against a default Config, rebuild via keyword construction + model_copy — the documented library-consumer path. Both entrypoints must produce the exact after.json state, proving CLI/import parity for all 29 state cases (local tier: 68 → 97 checks). Explicit importConfig still overrides the derived one to pin specific kwargs spellings; replay cases stay CLI-only (parser behavior has no import equivalent). Amp-Thread-ID: https://ampcode.com/threads/T-019eaec8-b78c-7386-b977-d93720ad3219 Co-authored-by: Amp --- tests/README.md | 11 +++++++---- tests/e2e/case_runner.py | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/tests/README.md b/tests/README.md index 7485923..8c1fd11 100644 --- a/tests/README.md +++ b/tests/README.md @@ -50,10 +50,13 @@ files (e.g. `test_user_09991`, `test-repo-49981`), and exact selectors only ## What each mode does with a case -- **local** — runs the case's `cliCommand` through the real argument parser - (and `importConfig` through the Python import API, when present) against an - in-memory instance built from `before.json`, then asserts the full - resulting state against `after.json`. Replay-style cases +- **local** — runs every state case TWICE against an in-memory instance + built from `before.json`: once with `cliCommand` through the real + argument parser, and once through the Python import API with a Config + derived from the same command line — both must produce the exact + `after.json` state, proving CLI/import parity for every behavior. An + explicit `importConfig` overrides the derived one (to pin specific + kwargs spellings). Replay-style cases (`expectedExitCode`/`expectedOutput`) assert parser behavior instead and need no files. - **live** — FUNCTIONAL tier: fast, scoped checks against the `.env` test diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index 9d67158..50bca43 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -556,11 +556,21 @@ def case_modes(case: FixtureCase) -> list[str]: def case_runners(case: FixtureCase) -> list[str]: - """Return how a case runs in local mode: parsed argv and/or import API.""" + """Return how a case runs in local mode: parsed argv and/or import API. + + Every state case with a cliCommand runs BOTH ways: once through the + real argument parser, and once through the Python import API with a + Config derived from the same command line — proving each behavior for + CLI consumers and library consumers alike. An explicit importConfig + overrides the derived one (for testing specific kwargs spellings). + Replay cases assert parser behavior, which has no import equivalent. + """ + if is_replay_case(case): + return ["cli"] if "cliCommand" in case else [] runners: list[str] = [] if "cliCommand" in case: - runners.append("cli") - if "importConfig" in case: + runners += ["cli", "import"] + elif "importConfig" in case: runners.append("import") return runners @@ -657,6 +667,27 @@ def required_case_files(case: FixtureCase) -> set[str]: return files +def derived_import_input(case: FixtureCase, case_name: str, endpoint: str) -> cli.CliInput: + """Build the import-API equivalent of a case's command line. + + Parses the cliCommand, then reconstructs the Config the way a library + consumer would: keyword construction plus model_copy of the fields + that differ from defaults. Asserting the same expected state through + both entrypoints proves CLI and import parity for every case. + """ + argv = case_cli_arguments(case, case_name) + argv += ["--src-endpoint", endpoint, "--src-access-token", "fixture-token"] + parsed = cli.load_cli(argv) + defaults = cli.Config(src_endpoint=endpoint, src_access_token="fixture-token") + updates = { + name: getattr(parsed.config, name) + for name in type(parsed.config).model_fields + if getattr(parsed.config, name) != getattr(defaults, name) + } + config = defaults.model_copy(update=updates) + return cli.CliInput(command_name=parsed.command_name, config=config) + + def cli_input_for_case( case: FixtureCase, case_name: str, endpoint: str, runner: str ) -> cli.CliInput: @@ -667,7 +698,7 @@ def cli_input_for_case( return cli.load_cli(argv) import_config = case.get("importConfig") if import_config is None: - raise ValueError(f"case {case_name!r} has no importConfig") + return derived_import_input(case, case_name, endpoint) options = dict(import_config) command_name = cast(cli.CommandName, options.pop("command")) updates: dict[str, object] = {