From 079ed4bb985a7a78db010855166447157449bc7e Mon Sep 17 00:00:00 2001 From: Marc LeBlanc <7050295+marcleblanc2@users.noreply.github.com> Date: Fri, 12 Jun 2026 09:48:10 -0600 Subject: [PATCH] Let module callers pass in-memory mapping rules to Set Completes dev/PLAN.md Track A Phase A4: Set(config, mapping_rules=[...]) runs permission sync from parsed rules with no maps YAML file, so the full get -> assemble -> dry-run loop never touches disk under no_files. - In-memory rules go through the same structural validation as file-loaded rules (validate_mapping_rules), at the edge before any network or path resolution - With files enabled, the rules actually used are serialized into the run directory as the maps.yaml audit copy, keeping the audit trail faithful (a stale customer maps file is never copied) - Snapshots still gate apply: no_files + apply keeps requiring no_backup; reversibility invariant unchanged - MappingRule exported from the package for caller typing - Module-mode diagnostics: swallowed SystemExit messages now surface through the package logger so host applications can see why a run failed Tests: rules resolution/validation unit matrix; audit serializer round-trip; e2e parity (file vs in-memory vs in-memory+no_files produce identical mutations and instance state on dry-run and apply fixtures); audit-copy content equality; invalid-rules fail-fast with zero mutations; module-API falsy-result and logger-diagnostic guarantees. Verification: tests/run.py 102/102 fast, 120/120 --live against the sgdev test instance. Amp-Thread-ID: https://ampcode.com/threads/T-019eba67-c19d-7166-9690-dcb2f0eed165 Co-authored-by: Amp --- README.md | 14 +++ dev/PLAN.md | 12 +- dev/TODO.md | 10 -- src/src_auth_perms_sync/__init__.py | 2 + src/src_auth_perms_sync/cli.py | 70 +++++++++-- .../permissions/command.py | 25 +++- .../permissions/full_set.py | 5 +- src/src_auth_perms_sync/permissions/maps.py | 22 ++++ .../permissions/workflow.py | 16 +++ tests/e2e/case_runner.py | 31 ++++- tests/e2e/test_local_cases.py | 84 +++++++++++++ tests/unit/test_cli_config.py | 5 + tests/unit/test_mapping_rules.py | 116 ++++++++++++++++++ tests/unit/test_module_api.py | 57 +++++++++ 14 files changed, 433 insertions(+), 36 deletions(-) create mode 100644 tests/unit/test_mapping_rules.py diff --git a/README.md b/README.md index 914d631..c1eeb88 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,20 @@ for provider in get_result.auth_providers: for code_host in get_result.code_hosts: ... +# Mapping rules can be passed in memory instead of a maps YAML file — +# same structure and validation as maps.yaml entries: +rules: list[src.MappingRule] = [ + { + "name": "Map 1", + "users": {"usernameRegexes": [".*"]}, + "repos": {"codeHostConnection": {"kind": "GITHUB"}}, + }, +] +result = src.Set(config, mapping_rules=rules) +# When files are enabled, the rules actually used are written into the +# run directory for auditability. Snapshots still gate apply=True unless +# no_files=True and no_backup=True are both set explicitly. + # Other command wrappers: # result = src.Restore(config) # result = src.SyncSamlOrgs(config) diff --git a/dev/PLAN.md b/dev/PLAN.md index d7eb5de..3834247 100644 --- a/dev/PLAN.md +++ b/dev/PLAN.md @@ -1,11 +1,11 @@ # PLAN: file handling + observability redesign -> **Status (2026-06-12): implemented**, except Track A Phase A4 -> (in-memory mapping rules for `Set`), which the plan marks optional — -> tracked in [TODO.md](./TODO.md). Shipped as src-py-lib v0.3.0 and the -> src-auth-perms-sync `refactor-logging-and-files` PR. Phases were -> compressed into one PR per repo (no ContextVar bridge phase was -> needed); everything else landed as specified. +> **Status (2026-06-12): fully implemented**, including Track A Phase A4 +> (in-memory mapping rules for `Set`). Shipped as src-py-lib v0.3.0 and +> src-auth-perms-sync v0.5.0 (`refactor-logging-and-files` PR), with A4 +> following in the `in-memory-mapping-rules` PR. Phases were compressed +> into one PR per repo (no ContextVar bridge phase was needed); +> everything landed as specified. Spans both repos (we own both, both greenfield, no external users): diff --git a/dev/TODO.md b/dev/TODO.md index 50f0aa0..b4cb5ea 100644 --- a/dev/TODO.md +++ b/dev/TODO.md @@ -1,15 +1,5 @@ # TODO -## Follow-up: in-memory mapping rules for Set (PLAN.md Track A Phase A4) - -The rest of [PLAN.md](./PLAN.md) is implemented (src-py-lib v0.3.0 + -the consumer refactor-logging-and-files PR). The one deliberately -deferred piece, marked optional in the plan: let module callers pass -parsed mapping rules to `Set` instead of a maps file, so the full -get → assemble → dry-run loop never touches disk. Snapshots must stay -on disk for `--apply` (reversibility invariant); `no_files` + `apply` -must keep requiring `no_backup`. - ## High priority: Remote trigger on demand - Sourcegraph webhook for new user coming in v7.4.0 diff --git a/src/src_auth_perms_sync/__init__.py b/src/src_auth_perms_sync/__init__.py index ba228f8..36c7770 100644 --- a/src/src_auth_perms_sync/__init__.py +++ b/src/src_auth_perms_sync/__init__.py @@ -16,6 +16,7 @@ ) from .cli import CommandResult, Config, Get, GetResult, Restore, Set, SyncSamlOrgs +from .permissions.types import MappingRule from .shared.backups import RunPaths __all__ = [ @@ -28,6 +29,7 @@ "GetResult", "InMemoryEventSink", "JSONLEventSink", + "MappingRule", "NullEventSink", "Restore", "RunPaths", diff --git a/src/src_auth_perms_sync/cli.py b/src/src_auth_perms_sync/cli.py index 82f9595..dd0347a 100644 --- a/src/src_auth_perms_sync/cli.py +++ b/src/src_auth_perms_sync/cli.py @@ -26,6 +26,7 @@ from .orgs import command as organizations_command from .permissions import command as permissions_command +from .permissions import mapping as permissions_mapping from .permissions import maps as permissions_maps from .permissions import types as permission_types from .shared import backups, run_context, site_config @@ -741,6 +742,7 @@ def run_with_client( endpoint: str, run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: """Create a client, run the selected command, and always close HTTP resources.""" http = src.HTTPClient( @@ -756,7 +758,7 @@ def run_with_client( fetch_sg_traces=config.fetch_sg_traces, ) try: - return run_command(config, command, client, run_paths, worker_pool) + return run_command(config, command, client, run_paths, worker_pool, mapping_rules) finally: client.http.close() @@ -767,6 +769,7 @@ def run_command( client: src.SourcegraphClient, run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: """Dispatch the selected command.""" sourcegraph_site_config = site_config.validate_site_config(client) @@ -775,7 +778,13 @@ def run_command( command_data = run_get(config, client, sourcegraph_site_config, run_paths, worker_pool) elif command.name == "set": command_data = run_set( - config, command, client, sourcegraph_site_config, run_paths, worker_pool + config, + command, + client, + sourcegraph_site_config, + run_paths, + worker_pool, + mapping_rules=mapping_rules, ) elif command.name == "restore": run_restore(config, client, sourcegraph_site_config, run_paths, worker_pool) @@ -810,10 +819,22 @@ def run_set( sourcegraph_site_config: site_config.SiteConfig, run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: - """Run the selected repo-permission sync command.""" + """Run the selected repo-permission sync command. + + With in-memory `mapping_rules` (module callers), no maps file is read; + when files are enabled, the rules actually used are written into the run + directory so the audit trail stays faithful. + """ assert command.set_options is not None - require_set_input_file(run_paths.maps_path) + if mapping_rules is None: + require_set_input_file(run_paths.maps_path) + elif run_paths.write_files: + materialized_maps_path = run_paths.input_copy_path(backups.DEFAULT_MAPS_FILE_NAME) + permissions_maps.dump_mapping_rules_yaml(materialized_maps_path, mapping_rules) + run_paths = dataclasses.replace(run_paths, maps_path=materialized_maps_path) + log.info("Wrote in-memory mapping rules for audit: %s", materialized_maps_path) return permissions_command.cmd_set( client, run_paths, @@ -828,6 +849,7 @@ def run_set( do_backup=not config.no_backup, retain_saml_group_users=command.sync_saml_organizations, worker_pool=worker_pool, + mapping_rules=mapping_rules, ) @@ -970,9 +992,22 @@ def Get(config: Config, *, event_sink: src.EventSink | None = None) -> GetResult ) -def Set(config: Config, *, event_sink: src.EventSink | None = None) -> CommandResult: - """Run repository permission reconciliation.""" - succeeded, _, run_paths = _run("set", config, event_sink) +def Set( + config: Config, + *, + mapping_rules: list[permission_types.MappingRule] | None = None, + event_sink: src.EventSink | None = None, +) -> CommandResult: + """Run repository permission reconciliation. + + `mapping_rules` lets module callers pass parsed rules directly (e.g. + assembled from `GetResult` data) instead of a maps YAML file. The rules + go through the same structural validation as file-loaded rules. When + files are enabled, the rules actually used are written into the run + directory for auditability; snapshots still gate `apply` unless + `no_files` and `no_backup` are both set explicitly. + """ + succeeded, _, run_paths = _run("set", config, event_sink, mapping_rules=mapping_rules) return CommandResult(succeeded=succeeded, paths=run_paths) @@ -992,11 +1027,21 @@ def _run( command_name: CommandName, config: Config, event_sink: src.EventSink | None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> tuple[bool, run_context.CommandData | None, backups.RunPaths | None]: """Run a module-mode command, reporting success instead of raising.""" try: - command_data, run_paths = _run_or_raise(command_name, config, event_sink=event_sink) + command_data, run_paths = _run_or_raise( + command_name, + config, + event_sink=event_sink, + mapping_rules=mapping_rules, + ) except SystemExit as exception: + if isinstance(exception.code, str): + # Module mode swallows the exit; surface the diagnostic through + # the package logger so host applications can see why it failed. + log.error("%s", exception.code) return exception.code in (None, 0), None, None except Exception: log.exception("src-auth-perms-sync run failed.") @@ -1035,6 +1080,7 @@ def _run_or_raise( *, cli_mode: bool = False, event_sink: src.EventSink | None = None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> tuple[run_context.CommandData, backups.RunPaths]: """Run src-auth-perms-sync, preserving CLI-style exceptions. @@ -1043,6 +1089,10 @@ def _run_or_raise( application's logging configuration stays in charge. """ validate_config(command_name, config) + if mapping_rules is not None: + if command_name != "set": + config_error("mapping_rules can only be passed to Set") + permissions_mapping.validate_mapping_rules(mapping_rules) command = resolve_command(command_name, config) try: endpoint = src.normalize_sourcegraph_endpoint(config.src_endpoint) @@ -1102,7 +1152,9 @@ def _run_or_raise( ) worker_pool = stack.enter_context(run_context.thread_pool(config.parallelism)) try: - command_data = run_with_client(config, command, endpoint, run_paths, worker_pool) + command_data = run_with_client( + config, command, endpoint, run_paths, worker_pool, mapping_rules + ) except SystemExit as exception: reraise_system_exit_with_logged_error(exception) return command_data, run_paths diff --git a/src/src_auth_perms_sync/permissions/command.py b/src/src_auth_perms_sync/permissions/command.py index 5605277..b968a18 100644 --- a/src/src_auth_perms_sync/permissions/command.py +++ b/src/src_auth_perms_sync/permissions/command.py @@ -26,11 +26,11 @@ from .workflow import ( load_discovery, load_mapping_context_discovery, - load_mapping_rules, load_repos_for_mapping_context, load_repository_candidates_by_names, load_repository_candidates_created_on_or_after, parse_cli_date, + resolve_mapping_rules, sourcegraph_datetime_filter, user_ids_created_on_or_after, write_maps_backup, @@ -594,8 +594,13 @@ def cmd_set( do_backup: bool, retain_saml_group_users: bool = False, worker_pool: ThreadPoolExecutor | None = None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: - """Dispatch the selected set mode.""" + """Dispatch the selected set mode. + + `mapping_rules` carries in-memory rules from module callers; when None, + rules are loaded from `run_paths.maps_path`. + """ options = set_options if options.mode == "full": return permissions_full_set.cmd_set_full( @@ -613,6 +618,7 @@ def cmd_set( do_backup=do_backup, retain_saml_group_users=retain_saml_group_users, worker_pool=worker_pool, + mapping_rules=mapping_rules, ) if options.mode == "repos": assert options.repository_names @@ -631,6 +637,7 @@ def cmd_set( do_backup=do_backup, retain_saml_group_users=retain_saml_group_users, worker_pool=worker_pool, + mapping_rules=mapping_rules, ) if options.mode == "repos_without_explicit_perms": return permissions_full_set.cmd_set_full( @@ -648,6 +655,7 @@ def cmd_set( do_backup=do_backup, retain_saml_group_users=retain_saml_group_users, worker_pool=worker_pool, + mapping_rules=mapping_rules, ) if options.mode == "repos_created_after": assert options.repository_created_after is not None @@ -666,6 +674,7 @@ def cmd_set( do_backup=do_backup, retain_saml_group_users=retain_saml_group_users, worker_pool=worker_pool, + mapping_rules=mapping_rules, ) if options.mode == "users": assert options.user_identifiers @@ -680,6 +689,7 @@ def cmd_set( saml_groups_attribute_name_by_config_id, do_backup, worker_pool, + mapping_rules=mapping_rules, ) if options.mode == "users_without_explicit_perms": return cmd_set_additive_users_without_explicit_perms( @@ -693,6 +703,7 @@ def cmd_set( saml_groups_attribute_name_by_config_id, do_backup, worker_pool, + mapping_rules=mapping_rules, ) if options.mode == "created_after": assert options.user_created_after is not None @@ -706,6 +717,7 @@ def cmd_set( saml_groups_attribute_name_by_config_id, do_backup, worker_pool, + mapping_rules=mapping_rules, ) return run_context.CommandData() @@ -721,6 +733,7 @@ def cmd_set_additive_users( saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, worker_pool: ThreadPoolExecutor | None = None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: """Add missing mapped permissions for resolved users.""" with src.span( @@ -732,7 +745,7 @@ def cmd_set_additive_users( parallelism=parallelism, do_backup=do_backup, ): - mapping_rules = load_mapping_rules(run_paths.maps_path) + mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) return run_context.CommandData() @@ -844,6 +857,7 @@ def cmd_set_additive_users_without_explicit_perms( saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, worker_pool: ThreadPoolExecutor | None = None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: """Add mapped permissions for users with no explicit API grants.""" created_after_filter: str | None = None @@ -859,7 +873,7 @@ def cmd_set_additive_users_without_explicit_perms( parallelism=parallelism, do_backup=do_backup, ): - mapping_rules = load_mapping_rules(run_paths.maps_path) + mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) return run_context.CommandData() @@ -1019,6 +1033,7 @@ def cmd_set_additive_created_after( saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, worker_pool: ThreadPoolExecutor | None = None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: """Add missing mapped permissions for users created on or after a date.""" created_after_filter = sourcegraph_datetime_filter( @@ -1032,7 +1047,7 @@ def cmd_set_additive_created_after( parallelism=parallelism, do_backup=do_backup, ): - mapping_rules = load_mapping_rules(run_paths.maps_path) + mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) return run_context.CommandData() diff --git a/src/src_auth_perms_sync/permissions/full_set.py b/src/src_auth_perms_sync/permissions/full_set.py index 2c69fad..6248fa5 100644 --- a/src/src_auth_perms_sync/permissions/full_set.py +++ b/src/src_auth_perms_sync/permissions/full_set.py @@ -21,12 +21,12 @@ from .workflow import ( load_mapping_context_discovery, load_mapping_context_for_rules, - load_mapping_rules, load_repository_candidates_by_names, load_repository_candidates_created_on_or_after, mapping_context_with_repository_candidates, projected_snapshot_shell, render_projected_snapshot_diff, + resolve_mapping_rules, user_ids_created_on_or_after, validate_post_apply, write_maps_backup, @@ -968,6 +968,7 @@ def cmd_set_full( do_backup: bool, retain_saml_group_users: bool, worker_pool: ThreadPoolExecutor | None = None, + mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: """Overwrite each mapped repo with the union of users from all rules.""" with src.span( @@ -981,7 +982,7 @@ def cmd_set_full( parallelism=parallelism, do_backup=do_backup, ) as command_event: - mapping_rules = load_mapping_rules(run_paths.maps_path) + mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: _finish_empty_full_set_mapping_rules( client, diff --git a/src/src_auth_perms_sync/permissions/maps.py b/src/src_auth_perms_sync/permissions/maps.py index f723d15..fc0a0c3 100644 --- a/src/src_auth_perms_sync/permissions/maps.py +++ b/src/src_auth_perms_sync/permissions/maps.py @@ -269,6 +269,28 @@ def create_maps_yaml_if_missing(path: Path) -> bool: return True +def dump_mapping_rules_yaml( + path: Path, + mapping_rules: list[permission_types.MappingRule], +) -> None: + """Write in-memory mapping rules as a maps YAML file for run auditability.""" + content = yaml.safe_dump( + {"maps": mapping_rules}, + sort_keys=False, + default_flow_style=False, + ) + with src.span( + "disk_io", + level="DEBUG", + op="write", + path=str(path), + file_kind="yaml", + ) as disk_event: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content) + disk_event["bytes"] = len(content) + + def load_maps_yaml(path: Path) -> permission_types.ConfigFile: with src.span( "disk_io", diff --git a/src/src_auth_perms_sync/permissions/workflow.py b/src/src_auth_perms_sync/permissions/workflow.py index 58d17a1..662c70b 100644 --- a/src/src_auth_perms_sync/permissions/workflow.py +++ b/src/src_auth_perms_sync/permissions/workflow.py @@ -106,6 +106,22 @@ def load_mapping_rules(input_path: Path) -> list[permission_types.MappingRule]: return mapping_rules +def resolve_mapping_rules( + provided_rules: list[permission_types.MappingRule] | None, + maps_path: Path, +) -> list[permission_types.MappingRule]: + """Return validated in-memory mapping rules, or load them from the maps file. + + In-memory rules go through the same structural validation as rules read + from YAML, so module callers and CLI operators share one contract. + """ + if provided_rules is None: + return load_mapping_rules(maps_path) + if provided_rules: + permissions_mapping.validate_mapping_rules(provided_rules) + return provided_rules + + def load_mapping_context( client: src.SourcegraphClient, input_path: Path, diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index df35a3d..85362d9 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -28,6 +28,7 @@ from src_py_lib.utils.config import config_options from src_auth_perms_sync import cli +from src_auth_perms_sync.permissions import types as permission_types from src_auth_perms_sync.shared import backups from src_auth_perms_sync.shared import types as shared_types @@ -782,8 +783,20 @@ def cli_input_for_case( def run_fixture_case( - case_name: str, runner: str = "cli", *, no_files: bool = False + case_name: str, + runner: str = "cli", + *, + no_files: bool = False, + mapping_rules: list[permission_types.MappingRule] | None = None, + preserve_artifacts_into: Path | None = None, ) -> FixtureRunResult: + """Run one fixture case; `mapping_rules` exercises the in-memory Set path. + + With `mapping_rules` set, the case's maps file is NOT passed to the run + (maps_path is cleared), proving the rules alone drive the command. + `preserve_artifacts_into` routes run artifacts to a caller-owned + directory (not cleaned up here) so tests can inspect their contents. + """ case = load_e2e_cases()[case_name] case_dir = FIXTURES_DIR / case_name before_state = load_state(case_dir / "before.json") @@ -799,9 +812,16 @@ def run_fixture_case( # Route run artifacts (snapshots, maps copies, generated maps.yaml) into # a per-case temporary directory so local test runs never pollute the # repo's ./src-auth-perms-sync-runs tree; the directory is removed when - # the case finishes. - with tempfile.TemporaryDirectory(prefix="src-auth-perms-sync-case-") as temp_directory: - artifacts_dir = Path(temp_directory) + # the case finishes (unless the caller supplied its own directory). + with contextlib.ExitStack() as artifact_stack: + if preserve_artifacts_into is not None: + artifacts_dir = preserve_artifacts_into + else: + artifacts_dir = Path( + artifact_stack.enter_context( + tempfile.TemporaryDirectory(prefix="src-auth-perms-sync-case-") + ) + ) try: cli_input = cli_input_for_case(case, case_name, client.endpoint, runner) # Local runs execute in-process against the in-memory fake, where @@ -811,6 +831,8 @@ def run_fixture_case( config_updates: dict[str, object] = {"parallelism": 1} if no_files: config_updates["no_files"] = True + if mapping_rules is not None: + config_updates["maps_path"] = None local_config = cli_input.config.model_copy(update=config_updates) command = cli.resolve_command(cli_input.command_name, local_config) run_paths = backups.resolve_run_paths( @@ -827,6 +849,7 @@ def run_fixture_case( cast(src.SourcegraphClient, client), run_paths, worker_pool, + mapping_rules=mapping_rules, ) except SystemExit as exception: command_failure = f"SystemExit: {exception.code!r}" diff --git a/tests/e2e/test_local_cases.py b/tests/e2e/test_local_cases.py index 0ee338d..f899213 100644 --- a/tests/e2e/test_local_cases.py +++ b/tests/e2e/test_local_cases.py @@ -8,9 +8,16 @@ from __future__ import annotations +import tempfile import unittest +from pathlib import Path +from typing import cast + +import yaml from src_auth_perms_sync import cli +from src_auth_perms_sync.permissions import types as permission_types +from src_auth_perms_sync.permissions.workflow import load_mapping_rules from tests.e2e.case_runner import ( FIXTURES_DIR, case_cli_arguments, @@ -93,6 +100,83 @@ def test_local_replay_cases(self) -> None: with self.subTest(case=case_name): self.assertEqual("", run_local_replay_case(case_name)) + def test_in_memory_mapping_rules_match_maps_file(self) -> None: + """In-memory rules must drive set exactly like the same rules from YAML. + + The same fixture case runs through the import API three ways: from + its maps.yaml file, from the equivalent parsed rules in memory (no + maps file passed at all), and from in-memory rules with no_files. + All three must plan identical mutations and end in identical + instance state. The files-enabled in-memory run must write the + rules it actually used into the run directory as the audit copy, + and the no_files run must write nothing. + """ + for case_name in ("full-overwrite-dry-run", "full-overwrite-unions"): + with self.subTest(case=case_name): + mapping_rules = load_mapping_rules(FIXTURES_DIR / case_name / "maps.yaml") + self.assertTrue(mapping_rules, "fixture case must define mapping rules") + + from_file = run_fixture_case(case_name, "import") + from_memory = run_fixture_case(case_name, "import", mapping_rules=mapping_rules) + from_memory_no_files = run_fixture_case( + case_name, + "import", + mapping_rules=mapping_rules, + no_files=True, + ) + + self.assertIsNone(from_file.failure) + self.assertIsNone(from_memory.failure) + self.assertIsNone(from_memory_no_files.failure) + self.assertEqual(from_file.actual_mutations, from_memory.actual_mutations) + self.assertEqual(from_file.actual_state, from_memory.actual_state) + self.assertEqual(from_file.actual_mutations, from_memory_no_files.actual_mutations) + self.assertEqual(from_file.actual_state, from_memory_no_files.actual_state) + + audit_copies = [ + name for name in from_memory.artifact_file_names if name.endswith("maps.yaml") + ] + self.assertTrue( + audit_copies, + "in-memory run with files enabled must write the rules audit copy", + ) + self.assertEqual( + (), + from_memory_no_files.artifact_file_names, + "in-memory run with no_files must write nothing", + ) + + def test_in_memory_mapping_rules_audit_copy_contains_rules_used(self) -> None: + """The audit maps.yaml must contain exactly the in-memory rules used.""" + case_name = "full-overwrite-dry-run" + mapping_rules = load_mapping_rules(FIXTURES_DIR / case_name / "maps.yaml") + + with tempfile.TemporaryDirectory(prefix="rules-audit-") as temp_directory: + audit_directory = Path(temp_directory) + result = run_fixture_case( + case_name, + "import", + mapping_rules=mapping_rules, + preserve_artifacts_into=audit_directory, + ) + self.assertIsNone(result.failure) + audit_copies = sorted(audit_directory.rglob("maps.yaml")) + self.assertEqual(1, len(audit_copies), "expected exactly one rules audit copy") + audit_content = yaml.safe_load(audit_copies[0].read_text()) + self.assertEqual({"maps": mapping_rules}, audit_content) + + def test_in_memory_mapping_rules_reject_invalid_structure(self) -> None: + """Structurally invalid in-memory rules fail fast, before any mutation.""" + case_name = "full-overwrite-unions" + invalid_rules = [{"name": "Broken", "users": {"unknownField": ["x"]}}] + result = run_fixture_case( + case_name, + "import", + mapping_rules=cast("list[permission_types.MappingRule]", invalid_rules), + ) + self.assertIsNotNone(result.failure) + self.assertEqual(0, result.actual_mutations, "invalid rules must mutate nothing") + def test_no_files_set_dry_run_matches_files_enabled(self) -> None: """no_files must not change a set dry-run's decisions, and writes nothing. diff --git a/tests/unit/test_cli_config.py b/tests/unit/test_cli_config.py index 887ff44..b581552 100644 --- a/tests/unit/test_cli_config.py +++ b/tests/unit/test_cli_config.py @@ -560,6 +560,7 @@ def capture_client( client: src.SourcegraphClient, _run_paths: backups.RunPaths, _worker_pool: ThreadPoolExecutor, + _mapping_rules: object = None, ) -> None: captured_clients.append(client) @@ -589,6 +590,7 @@ def capture_client( client: src.SourcegraphClient, _run_paths: backups.RunPaths, _worker_pool: ThreadPoolExecutor, + _mapping_rules: object = None, ) -> None: captured_clients.append(client) @@ -817,6 +819,7 @@ def test_run_command_passes_set_data_to_combined_sync(self) -> None: sourcegraph_site_config, run_paths, worker_pool, + mapping_rules=None, ) run_sync_saml_orgs.assert_called_once_with( configuration, @@ -849,6 +852,7 @@ def test_package_exports_programmatic_runner_and_config(self) -> None: "GetResult", "InMemoryEventSink", "JSONLEventSink", + "MappingRule", "NullEventSink", "Restore", "RunPaths", @@ -868,6 +872,7 @@ def capture_run( endpoint: str, _run_paths: backups.RunPaths, _worker_pool: ThreadPoolExecutor, + _mapping_rules: object = None, ) -> cli.run_context.CommandData: captured.append((scoped_config, command, endpoint)) return cli.run_context.CommandData() diff --git a/tests/unit/test_mapping_rules.py b/tests/unit/test_mapping_rules.py new file mode 100644 index 0000000..06b7087 --- /dev/null +++ b/tests/unit/test_mapping_rules.py @@ -0,0 +1,116 @@ +"""In-memory mapping rules: resolution, validation, and audit serialization. + +Module callers can pass parsed mapping rules to `Set` instead of a maps +YAML file. These tests pin the contract: in-memory rules go through the +same structural validation as file-loaded rules, the maps file is ignored +when rules are provided, and the audit serializer round-trips losslessly. +""" + +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path +from typing import cast + +import yaml + +from src_auth_perms_sync.permissions import maps as permissions_maps +from src_auth_perms_sync.permissions import types as permission_types +from src_auth_perms_sync.permissions.workflow import ( + load_mapping_rules, + resolve_mapping_rules, +) + +VALID_RULES: list[permission_types.MappingRule] = [ + { + "name": "Engineering", + "users": {"usernameRegexes": ["^eng-.*$"]}, + "repos": {"nameRegexes": ["^github\\.example\\.com/eng/.*$"]}, + }, + { + "name": "Auditors get the ledger repos", + "users": {"emails": ["auditor@example.com"]}, + "repos": {"names": ["github.example.com/finance/ledger"]}, + }, +] + + +def write_maps_yaml(path: Path, rules: list[permission_types.MappingRule]) -> None: + path.write_text(yaml.safe_dump({"maps": rules})) + + +class ResolveMappingRulesTests(unittest.TestCase): + def setUp(self) -> None: + self._temporary_directory = tempfile.TemporaryDirectory() + self.maps_path = Path(self._temporary_directory.name) / "maps.yaml" + + def tearDown(self) -> None: + self._temporary_directory.cleanup() + + def test_none_loads_rules_from_the_maps_file(self) -> None: + write_maps_yaml(self.maps_path, VALID_RULES) + self.assertEqual(VALID_RULES, resolve_mapping_rules(None, self.maps_path)) + + def test_provided_rules_are_returned_and_the_maps_file_is_ignored(self) -> None: + decoy_rules: list[permission_types.MappingRule] = [ + { + "name": "Decoy that must not be used", + "users": {"usernameRegexes": [".*"]}, + "repos": {"nameRegexes": [".*"]}, + } + ] + write_maps_yaml(self.maps_path, decoy_rules) + self.assertEqual(VALID_RULES, resolve_mapping_rules(VALID_RULES, self.maps_path)) + + def test_provided_rules_work_without_any_maps_file(self) -> None: + self.assertFalse(self.maps_path.exists()) + self.assertEqual(VALID_RULES, resolve_mapping_rules(VALID_RULES, self.maps_path)) + + def test_provided_empty_rules_return_empty_without_reading_the_file(self) -> None: + self.assertEqual([], resolve_mapping_rules([], self.maps_path)) + + def test_provided_rules_get_the_same_structural_validation_as_yaml(self) -> None: + invalid_rules = [{"name": "Broken", "users": {"unknownField": ["x"]}}] + with self.assertRaises(SystemExit): + resolve_mapping_rules( + cast("list[permission_types.MappingRule]", invalid_rules), + self.maps_path, + ) + + def test_provided_rules_missing_repos_section_are_rejected(self) -> None: + invalid_rules = [{"name": "No repos", "users": {"usernames": ["alice"]}}] + with self.assertRaises(SystemExit): + resolve_mapping_rules( + cast("list[permission_types.MappingRule]", invalid_rules), + self.maps_path, + ) + + def test_provided_non_mapping_entries_are_rejected(self) -> None: + invalid_rules = ["just a string"] + with self.assertRaises(SystemExit): + resolve_mapping_rules( + cast("list[permission_types.MappingRule]", invalid_rules), + self.maps_path, + ) + + +class DumpMappingRulesYamlTests(unittest.TestCase): + def setUp(self) -> None: + self._temporary_directory = tempfile.TemporaryDirectory() + self.output_path = Path(self._temporary_directory.name) / "run" / "maps.yaml" + + def tearDown(self) -> None: + self._temporary_directory.cleanup() + + def test_round_trips_through_the_standard_loader_with_validation(self) -> None: + permissions_maps.dump_mapping_rules_yaml(self.output_path, VALID_RULES) + self.assertEqual(VALID_RULES, load_mapping_rules(self.output_path)) + + def test_writes_the_maps_top_level_key_and_creates_parent_directories(self) -> None: + permissions_maps.dump_mapping_rules_yaml(self.output_path, VALID_RULES) + self.assertEqual({"maps": VALID_RULES}, yaml.safe_load(self.output_path.read_text())) + + def test_empty_rules_serialize_to_an_empty_maps_list(self) -> None: + permissions_maps.dump_mapping_rules_yaml(self.output_path, []) + self.assertEqual({"maps": []}, yaml.safe_load(self.output_path.read_text())) diff --git a/tests/unit/test_module_api.py b/tests/unit/test_module_api.py index 7b380ff..886a117 100644 --- a/tests/unit/test_module_api.py +++ b/tests/unit/test_module_api.py @@ -88,6 +88,63 @@ def test_set_with_no_files_apply_without_no_backup_returns_falsy(self) -> None: # Validation fails before any path resolution, so nothing is created. self.assertEqual([], list(self.working_directory.iterdir())) + def test_set_with_invalid_in_memory_rules_returns_falsy_without_raising(self) -> None: + """Invalid in-memory rules fail validation before any network or disk IO.""" + invalid_rules = [{"name": "Broken", "users": {"unknownField": ["x"]}}] + config = make_config(full=True) + + with self.assertLogs("src_auth_perms_sync", level="ERROR") as captured_logs: + result = src_auth_perms_sync.Set( + config, + mapping_rules=cast( + "list[src_auth_perms_sync.MappingRule]", + invalid_rules, + ), + ) + + self.assertIsInstance(result, cli.CommandResult) + self.assertFalse(result) + # The validation diagnostic reaches host applications through the + # package logger, naming the offending field. + self.assertIn("unknownField", "\n".join(captured_logs.output)) + # Validation fails before path resolution, so nothing is created. + self.assertEqual([], list(self.working_directory.iterdir())) + + def test_set_with_in_memory_rules_needs_no_maps_file(self) -> None: + """With in-memory rules, a missing maps file must not be the failure. + + The unreachable endpoint makes the run fail at site-config + validation, proving it got PAST the maps-file requirement that a + file-based run would fail first (no maps file exists in this cwd). + """ + rules: list[src_auth_perms_sync.MappingRule] = [ + { + "name": "Everyone everywhere", + "users": {"usernameRegexes": [".*"]}, + "repos": {"nameRegexes": [".*"]}, + } + ] + sink = src.InMemoryEventSink() + config = make_config(full=True, no_files=True) + + result = src_auth_perms_sync.Set(config, mapping_rules=rules, event_sink=sink) + + self.assertFalse(result) + self.assertEqual([], list(self.working_directory.iterdir())) + run_end_attributes = [ + self.event_attributes(event) + for event in sink.events + if event.get("event_name") == "run" + and self.event_attributes(event).get("phase") == "end" + ] + self.assertEqual(1, len(run_end_attributes)) + # The failure is the unreachable endpoint (transport), not a missing + # maps file (which would have exited with the set-input message + # before any HTTP attempt was recorded). + self.assertGreaterEqual( + cast("int", run_end_attributes[0].get("http.client.request.count")), 1 + ) + def test_event_sink_receives_run_start_and_error_end_events(self) -> None: sink = src.InMemoryEventSink() config = make_config(no_files=True)