websentry-ai · vigneshsubbiah16 · Jun 13, 2026 · Jun 13, 2026
diff --git a/.github/workflows/static-checks.yml b/.github/workflows/static-checks.yml
@@ -42,3 +42,16 @@ jobs:
             exit 1
           fi
           echo "No undefined names."
+
+      # Fail-open contract gate (WEB-4808). packaging/ holds the hermetic
+      # boundary tests for the scheduled root discovery daemon's credential
+      # channel: the SACRED fail-open path (absent/unreadable/malformed config
+      # must idle-and-exit-0) and the writer side (atomic 0600 file, 0750 dir).
+      # These tests need no built bundle and no upstream package, so they run
+      # here on EVERY PR — the parity suite under binary/tests/ only runs on
+      # the macOS release build, which would let a fail-open regression merge.
+      - name: Install pytest
+        run: pip install pytest==9.0.3
+
+      - name: Run packaging boundary tests (discovery fail-open + writer)
+        run: python -m pytest packaging/ -q
diff --git a/binary/src/unbound_hook/setup_cmd.py b/binary/src/unbound_hook/setup_cmd.py
@@ -516,13 +516,90 @@ def _setup_copilot(opts):
     return ("configured", None)
 
 
+# Root-only credential file the scheduled discovery LaunchDaemon reads to find
+# its api key + domain (WEB-4808). Lives in the pkg-provisioned config dir
+# (postinstall: install -d -m 0750 root:wheel /opt/unbound/etc). The recurring
+# daemon runs as root with no shell env and a world-readable plist that
+# deliberately carries no secrets, so this file is its only credential channel.
+# The entry point (packaging/unbound_discovery_entry.py:DISCOVERY_CONFIG_PATH)
+# reads it as an OPTIONAL fallback; keep the two paths in sync.
+DISCOVERY_CONFIG_PATH = Path("/opt/unbound/etc/discovery.json")
+
+
+def _write_discovery_config(opts):
+    """Persist the discovery key + domain for the scheduled daemon.
+
+    Writes /opt/unbound/etc/discovery.json atomically as 0600 root:wheel (it
+    holds a credential). The `etc` dir itself is created/enforced as 0750
+    root-owned (not the umask-default 0755) so it is not world-traversable.
+    Best-effort: a failure here is reported but never aborts onboarding — the
+    one-shot scan below still runs, and a daemon with no config simply idles
+    fail-open. Returns None on success or a short reason string on failure.
+    """
+    import tempfile
+
+    payload = json.dumps(
+        {"api_key": opts["discovery_key"], "domain": opts["backend_url"]},
+        indent=2,
+    ) + "\n"
+    parent = DISCOVERY_CONFIG_PATH.parent
+    try:
+        # Create ONLY the leaf `etc` dir, not the whole tree: /opt/unbound is
+        # provisioned (and owned) by the pkg postinstall, so its absence means
+        # something is wrong — building the tree here would risk leaving an
+        # ancestor world-readable. Refuse rather than mask that.
+        if not parent.parent.is_dir():
+            return (f"could not persist discovery config: parent "
+                    f"{parent.parent} missing (expected pkg-provisioned)")
+        # 0750 (not the umask default 0755) so the dir is not world-traversable
+        # — it reinforces the directory-level symlink/TOCTOU barrier the 0600
+        # credential file relies on. When root (the production path), enforce
+        # mode + root:root ownership even if the dir already existed; tolerate
+        # non-root/dev contexts where chmod/chown would be denied.
+        os.makedirs(parent, mode=0o750, exist_ok=True)
+        if hasattr(os, "geteuid") and os.geteuid() == 0:
+            try:
+                os.chmod(parent, 0o750)
+                os.chown(parent, 0, 0)
+            except OSError:
+                pass  # best-effort hardening; never block the credential write
+        # Write to a temp file in the same dir (mkstemp is already 0600 by
+        # default), set explicit perms + owner, then atomically rename so a
+        # reader never observes a partial or wrong-permission file.
+        fd, tmp = tempfile.mkstemp(dir=str(parent), prefix=".discovery.", suffix=".json")
+        try:
+            os.fchmod(fd, 0o600)
+            try:
+                os.fchown(fd, 0, 0)  # root:wheel; no-op-equivalent if already root
+            except (PermissionError, OSError):
+                pass  # not root (shouldn't happen — setup requires sudo); keep going
+            with os.fdopen(fd, "w") as f:
+                f.write(payload)
+            os.replace(tmp, str(DISCOVERY_CONFIG_PATH))
+        except Exception:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
+    except Exception as e:
+        return f"could not persist discovery config: {e}"
+    return None
+
+
 def _run_discovery(opts):
     """Run the locally installed discovery binary (no install.sh download).
     Mirrors onboard.py's process-group + backstop-kill discipline."""
     if not opts["discovery_key"]:
         return ("skipped", "no --discovery-key provided")
     if not DISCOVERY_BINARY.is_file():
         return ("deferred", f"discovery binary not installed at {DISCOVERY_BINARY}")
+    # Persist creds so the scheduled LaunchDaemon (which has no shell env and a
+    # secret-free plist) can run the recurring 12h scan, not just this one-shot
+    # (WEB-4808). Fail-open: a persist failure is logged, not fatal.
+    config_err = _write_discovery_config(opts)
+    if config_err:
+        print(f"[setup] discovery: {config_err}", file=sys.stderr)
     # Key via env, never argv — the scan runs up to 90 min and argv is
     # visible to every local user via ps (same contract the hook modules'
     # frozen discovery dispatch uses).

diff --git a/packaging/test-discovery-daemon.sh b/packaging/test-discovery-daemon.sh
@@ -28,10 +28,26 @@ PORT="${SINK_PORT:-18924}"
 TIMEOUT_S="${DAEMON_TEST_TIMEOUT:-900}"
 FAIL=0
 
+# Variant 3 (WEB-4808) writes the real production config path so the frozen
+# binary's own fallback read is exercised. Back up any pre-existing config and
+# restore it on exit so running this on a real install is non-destructive.
+DISCOVERY_CONFIG="/opt/unbound/etc/discovery.json"
+CONFIG_BACKUP="/var/tmp/$LABEL.discovery.json.bak"
+CONFIG_WAS_PRESENT=0
+CONFIG_WE_CREATED=0
+[ -f "$DISCOVERY_CONFIG" ] && { cp -p "$DISCOVERY_CONFIG" "$CONFIG_BACKUP" && CONFIG_WAS_PRESENT=1; }
+
 cleanup() {
     launchctl bootout "system/$LABEL" 2>/dev/null || true
     rm -f "$PLIST"
     rm -rf "$STAGE"
+    # Restore the host's discovery config exactly as we found it.
+    if [ "$CONFIG_WAS_PRESENT" -eq 1 ]; then
+        cp -p "$CONFIG_BACKUP" "$DISCOVERY_CONFIG" 2>/dev/null || true
+        rm -f "$CONFIG_BACKUP"
+    elif [ "$CONFIG_WE_CREATED" -eq 1 ]; then
+        rm -f "$DISCOVERY_CONFIG"
+    fi
     [ -n "${SINK_PID:-}" ] && kill "$SINK_PID" 2>/dev/null || true
 }
 trap cleanup EXIT
@@ -208,7 +224,45 @@ else
     FAIL=1
 fi
 
-echo "daemon logs: $LOG, $NOCONF_LOG; sink artifacts: $SINK_DIR"
+# --- variant 3: production repro (WEB-4808) ----------------------------------
+# The exact production shape: plist carries the `scan` token and NO creds
+# (secret-free, world-readable plist), creds live ONLY in the root-only config
+# file. The daemon must read the file, strip `scan`, scan, and POST — proving
+# the scheduled run no longer idles. This is the case the live plist ships.
+launchctl bootout "system/$LABEL" 2>/dev/null || true
+rm -f /var/tmp/unbound-0/discovery-cache.json /var/root/.unbound/discovery-cache.json
+REPORTS_BEFORE=$(ls "$SINK_DIR"/report_*.json 2>/dev/null | wc -l | tr -d ' ')
+install -d -o root -g wheel -m 0750 /opt/unbound/etc 2>/dev/null || true
+umask 077
+cat > "$DISCOVERY_CONFIG" <<JSONEOF
+{"api_key": "daemon-cfgfile-key", "domain": "http://127.0.0.1:$PORT"}
+JSONEOF
+chown root:wheel "$DISCOVERY_CONFIG" 2>/dev/null || true
+chmod 600 "$DISCOVERY_CONFIG"
+[ "$CONFIG_WAS_PRESENT" -eq 1 ] || CONFIG_WE_CREATED=1
+
+CFG_LOG="/var/tmp/$LABEL.cfgfile.log"
+rm -f "$CFG_LOG"
+# `scan` token in argv + HOME=/var/empty + NO creds in the plist — production.
+write_plist "$CFG_LOG" varempty scan
+launchctl bootstrap system "$PLIST"
+echo "daemon bootstrapped (system/$LABEL, scan token, creds via config file); waiting..."
+wait_for_daemon_exit "$TIMEOUT_S" || { tail -20 "$CFG_LOG" 2>/dev/null; exit 1; }
+
+CFG_REPORTS=$(ls "$SINK_DIR"/report_*.json 2>/dev/null | wc -l | tr -d ' ')
+echo "--- variant 3 results (WEB-4808 production repro) -----------"
+echo "daemon exit code: $DAEMON_EXIT; new report payloads: $((CFG_REPORTS - REPORTS_BEFORE))"
+if [ "$DAEMON_EXIT" = "0" ] && [ "$CFG_REPORTS" -gt "$REPORTS_BEFORE" ] && \
+   ! grep -q "No configuration provided" "$CFG_LOG" 2>/dev/null && \
+   ! grep -q "unrecognized arguments" "$CFG_LOG" 2>/dev/null; then
+    echo "PASS: config-file daemon (scan token, no plist creds) scanned + POSTed"
+else
+    echo "FAIL: config-file daemon — exit $DAEMON_EXIT, reports $CFG_REPORTS, log:"
+    tail -8 "$CFG_LOG" 2>/dev/null
+    FAIL=1
+fi
+
+echo "daemon logs: $LOG, $NOCONF_LOG, $CFG_LOG; sink artifacts: $SINK_DIR"
 if [ "$FAIL" -eq 0 ]; then
     echo "DAEMON TEST PASSED"
 else

diff --git a/packaging/test_discovery_entry.py b/packaging/test_discovery_entry.py
@@ -0,0 +1,177 @@
+"""Boundary tests for the unbound-discovery entry point's config resolution.
+
+Exercises packaging/unbound_discovery_entry.py at its outermost in-process
+seams — `main()` and the config resolver it delegates to — without needing a
+built PyInstaller bundle or the upstream `coding_discovery_tools` package
+(which is fetched only at build time from packaging/discovery.lock).
+
+The behavior under test is WEB-4808: the scheduled root LaunchDaemon runs
+`unbound-discovery scan` with NO --api-key/--domain flags and NO shell env, so
+it must instead pick up credentials from the persisted root-only config file
+/opt/unbound/etc/discovery.json. The two anchors:
+
+  * config file present  -> resolves creds, proceeds to the real sweep
+  * config file absent   -> idles fail-open and exits 0 (unchanged contract)
+
+The upstream sweep is stubbed so we can assert main() reached it (the
+credentialed path) and observe the creds it would run with, while keeping the
+test hermetic.
+"""
+import importlib.util
+import sys
+import types
+from pathlib import Path
+
+import pytest
+
+ENTRY_PATH = Path(__file__).resolve().parent / "unbound_discovery_entry.py"
+
+
+def _load_entry():
+    """Import the entry module fresh from source (it is not a package member)."""
+    spec = importlib.util.spec_from_file_location("unbound_discovery_entry", ENTRY_PATH)
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+
+
+@pytest.fixture
+def entry(tmp_path, monkeypatch):
+    """Entry module with the config path redirected into a temp dir and a
+    stubbed `coding_discovery_tools.ai_tools_discovery` so the credentialed
+    path is observable without the real upstream package."""
+    mod = _load_entry()
+    cfg_path = tmp_path / "discovery.json"
+    monkeypatch.setattr(mod, "DISCOVERY_CONFIG_PATH", str(cfg_path))
+
+    calls = {"sweep": 0}
+
+    def fake_sweep():
+        calls["sweep"] += 1
+        # Capture what the sweep would actually run with: the entry feeds the
+        # key via UNBOUND_API_KEY env and the domain via --domain argv.
+        import argparse
+        import os
+
+        p = argparse.ArgumentParser(add_help=False)
+        p.add_argument("--domain")
+        ns, _ = p.parse_known_args(sys.argv[1:])
+        calls["api_key"] = os.environ.get("UNBOUND_API_KEY", "")
+        calls["domain"] = ns.domain or ""
+        return 0
+
+    pkg = types.ModuleType("coding_discovery_tools")
+    sub = types.ModuleType("coding_discovery_tools.ai_tools_discovery")
+    sub.main = fake_sweep
+    pkg.ai_tools_discovery = sub
+    monkeypatch.setitem(sys.modules, "coding_discovery_tools", pkg)
+    monkeypatch.setitem(sys.modules, "coding_discovery_tools.ai_tools_discovery", sub)
+
+    # Default to a clean env so flags/env never leak across cases.
+    monkeypatch.delenv("UNBOUND_API_KEY", raising=False)
+
+    mod._test_cfg_path = cfg_path
+    mod._test_calls = calls
+    return mod
+
+
+def _run(mod, argv, monkeypatch):
+    monkeypatch.setattr(sys, "argv", ["unbound-discovery", *argv])
+    return mod.main()
+
+
+# --- config file present: the daemon's `scan` invocation now proceeds ---------
+
+def test_config_file_supplies_creds_for_daemon_scan(entry, monkeypatch):
+    """The production case: `scan` with no flags/env, creds only in the file.
+    main() must strip `scan`, resolve creds from the file, and run the sweep."""
+    entry._test_cfg_path.write_text(
+        '{"api_key": "file-key", "domain": "https://backend.example.com"}'
+    )
+    rc = _run(entry, ["scan"], monkeypatch)
+    assert rc == 0
+    assert entry._test_calls["sweep"] == 1, "credentialed sweep did not run"
+    assert entry._test_calls["api_key"] == "file-key"
+    assert entry._test_calls["domain"] == "https://backend.example.com"
+
+
+def test_config_file_resolves_without_scan_token(entry, monkeypatch):
+    """Bare invocation (no `scan`) with file-only creds also proceeds."""
+    entry._test_cfg_path.write_text(
+        '{"api_key": "file-key", "domain": "https://backend.example.com"}'
+    )
+    assert entry._missing_required_config([]) == []
+    rc = _run(entry, [], monkeypatch)
+    assert rc == 0
+    assert entry._test_calls["sweep"] == 1
+
+
+def test_flags_win_over_config_file(entry, monkeypatch):
+    """Explicit flags/env take precedence; the file only fills absent fields."""
+    entry._test_cfg_path.write_text(
+        '{"api_key": "file-key", "domain": "https://file.example.com"}'
+    )
+    rc = _run(
+        entry,
+        ["--api-key", "flag-key", "--domain", "https://flag.example.com"],
+        monkeypatch,
+    )
+    assert rc == 0
+    assert entry._test_calls["api_key"] == "flag-key"
+    assert entry._test_calls["domain"] == "https://flag.example.com"
+
+
+def test_config_file_fills_only_missing_domain(entry, monkeypatch):
+    """Key from env, domain from file -> both resolve, sweep runs."""
+    entry._test_cfg_path.write_text('{"domain": "https://file.example.com"}')
+    monkeypatch.setenv("UNBOUND_API_KEY", "env-key")
+    rc = _run(entry, ["scan"], monkeypatch)
+    assert rc == 0
+    assert entry._test_calls["api_key"] == "env-key"
+    assert entry._test_calls["domain"] == "https://file.example.com"
+
+
+# --- config file absent / broken: fail-open idle must hold (exit 0) ----------
+
+def test_no_config_file_idles_exit_0(entry, monkeypatch, caplog):
+    """No file, no flags, no env: idle fail-open, exit 0, never touch sweep."""
+    assert not entry._test_cfg_path.exists()
+    rc = _run(entry, ["scan"], monkeypatch)
+    assert rc == 0
+    assert entry._test_calls["sweep"] == 0, "sweep ran without any config (should idle)"
+
+
+def test_partial_config_file_idles_when_key_missing(entry, monkeypatch):
+    """Domain-only file, no env key: still missing the key -> idle, exit 0."""
+    entry._test_cfg_path.write_text('{"domain": "https://file.example.com"}')
+    rc = _run(entry, ["scan"], monkeypatch)
+    assert rc == 0
+    assert entry._test_calls["sweep"] == 0
+
+
+@pytest.mark.parametrize(
+    "contents",
+    ["not json at all", "[1, 2, 3]", '{"api_key": 123, "domain": 456}', ""],
+)
+def test_unreadable_or_malformed_config_idles(entry, monkeypatch, contents):
+    """A corrupt/empty/wrong-typed file must not crash; it idles fail-open."""
+    entry._test_cfg_path.write_text(contents)
+    rc = _run(entry, ["scan"], monkeypatch)
+    assert rc == 0
+    assert entry._test_calls["sweep"] == 0
+
+
+def test_load_discovery_config_never_raises_on_missing(entry):
+    """The loader returns {} (not an exception) for a path that does not exist."""
+    assert entry._load_discovery_config("/no/such/path/discovery.json") == {}
+
+
+# --- the `scan` token strip must not disturb other routing -------------------
+
+def test_version_short_circuits_before_config(entry, monkeypatch, capsys):
+    """--version prints and exits 0 even with a config file present (WEB-4802)."""
+    entry._test_cfg_path.write_text('{"api_key": "k", "domain": "d"}')
+    rc = _run(entry, ["--version"], monkeypatch)
+    assert rc == 0
+    assert "unbound-discovery" in capsys.readouterr().out
+    assert entry._test_calls["sweep"] == 0