Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions binary/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ def run_binary(args, payload, home, extra_env=None, stdin_close=False):
return _run([str(BUILT_BINARY)] + args, payload, home, extra_env, stdin_close)


def run_go_binary(args, payload, home, extra_env=None, stdin_close=False):
"""The Go rewrite (WEB-4809); opt-in via UNBOUND_GO_BINARY, else skipped."""
go_binary = os.environ.get("UNBOUND_GO_BINARY")
if not go_binary:
pytest.skip("UNBOUND_GO_BINARY not set; Go parity is opt-in")
return _run([go_binary] + args, payload, home, extra_env, stdin_close)


@pytest.fixture
def sandbox_home(tmp_path):
home = tmp_path / "home"
Expand Down
11 changes: 10 additions & 1 deletion binary/tests/test_hook_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import pytest

from conftest import run_binary, run_cli_dev, run_python_path
from conftest import run_binary, run_cli_dev, run_go_binary, run_python_path

S = {"session_id": "test-session", "transcript_path": "/nonexistent/transcript.jsonl"}

Expand Down Expand Up @@ -96,6 +96,15 @@ def test_frozen_binary_matches_python_path(tool, event, sandbox_home):
assert got.returncode == ref.returncode


@pytest.mark.parametrize("tool,event", CASES)
def test_go_binary_matches_python_path(tool, event, sandbox_home):
payload = json.dumps(EVENT_PAYLOADS[tool][event])
ref = run_python_path(tool, payload, sandbox_home)
got = run_go_binary(["hook", tool, event], payload, sandbox_home)
assert got.stdout == ref.stdout
assert got.returncode == ref.returncode


@pytest.mark.parametrize("tool", list(EVENT_PAYLOADS))
@pytest.mark.parametrize("junk", ["", "not json at all"])
def test_malformed_stdin_parity(tool, junk, sandbox_home):
Expand Down
2 changes: 2 additions & 0 deletions go/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dist/
build/
62 changes: 62 additions & 0 deletions go/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# unbound-hook — Go rewrite (WEB-4809)

Phase 1 scaffold of a Go port of the PyInstaller `unbound-hook` binary in
`binary/`. Rationale: a single static Go binary avoids the PyInstaller
onedir bundle that EDR/AV agents flag and slow-scan on managed fleets.

## Contract

The CLI surface mirrors `binary/` exactly — see `binary/README.md` and
`binary/src/unbound_hook/main.py` / `hook_cmd.py`:

- `unbound-hook hook <tool> [<event>]` — tools: claude-code | cursor |
copilot | codex. stdin event JSON → stdout response JSON. **Fail-open:**
unknown tool, bad input, or any dispatcher failure prints `{}` and exits 0;
this process sits between the user and their editor.
- `unbound-hook setup|backfill|clear` — admin commands, NOT fail-open;
currently exit 1 with "not implemented".
- `unbound-hook --version` / `version` — `unbound-hook <version>`, never
reads stdin (pkg postinstall pre-warm contract, packaging/README.md
"Version contract"). Version is baked via `-ldflags "-X main.Version=..."`.

Phase 1 status: each tool handler is a fail-open stub (reads stdin, prints
`{}`, exits 0). The real per-tool ports come next; sources are named in the
TODO header of each `internal/hooks/*.go` file.

Phase 2 status: the shared core the four python hook modules duplicate is
ported as stdlib-only packages (not yet wired into the stubs). Each package
doc comment names the python lines it mirrors; `claude-code/hooks/unbound.py`
is the canonical reference:

- `internal/pyjson` — python-`json.dumps`-byte-identical encode/decode
(ordered objects, ensure_ascii, repr(float)); required for stdout and
audit-line parity, since Go's encoding/json formats differently
- `internal/config` — ~/.unbound/config.json + UNBOUND_GATEWAY_URL /
UNBOUND_<TOOL>_API_KEY precedence (codex is env-only, quirk kept)
- `internal/httpc` — HTTP via curl subprocess (house rule: corporate-CA /
Zscaler compat; never net/http), exact python argv, fail-open
- `internal/report` — error.log (25-line cap) + rate-limited best-effort
POST to /v1/hooks/errors
- `internal/audit` — agent-audit.log JSONL load/append/save + session-keyed
cleanup (grouping key is per tool, supplied by callers)
- `internal/locks` — mtime-TTL lock files (self-update lock, dispatch
claim-and-steal, staleness probe, touch)
- `internal/transcript` — claude-code Stop-path transcript JSONL parsing
(parse_transcript_file), including its abort-on-exception quirks

## Build & test

```
./build.sh # Go 1.22+ + lipo: universal2 dist/unbound-hook/unbound-hook
# UNBOUND_HOOK_VERSION=1.2.3 bakes the release version

UNBOUND_GO_BINARY=$PWD/dist/unbound-hook/unbound-hook \
python3 -m pytest ../binary/tests/ -q # opt-in tool×event parity
```

Stdlib only — zero Go dependencies.

The python path in `binary/` remains the golden reference; the parity
harness in `binary/tests/test_hook_cli.py` compares this binary's stdout +
exit code against `python3 <tool>/unbound.py` for every tool × event when
`UNBOUND_GO_BINARY` is set (skipped otherwise, so CI is unchanged).
35 changes: 35 additions & 0 deletions go/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash
# Build the unbound-hook Go universal2 binary (WEB-4809).
# Requires Go 1.22+ and macOS lipo. UNBOUND_BUILD_GO overrides the toolchain.
set -euo pipefail
cd "$(dirname "$0")"

GO="${UNBOUND_BUILD_GO:-go}"
command -v "$GO" >/dev/null 2>&1 || { echo "ERROR: go toolchain not found (set UNBOUND_BUILD_GO)"; exit 1; }
command -v lipo >/dev/null 2>&1 || { echo "ERROR: lipo not found (macOS required)"; exit 1; }

# packaging/README.md "Version contract": --version must self-identify with
# the release version as a whitespace-delimited token.
VERSION="${UNBOUND_HOOK_VERSION:-0.0.0-dev}"
LDFLAGS="-s -w -X main.Version=${VERSION}"

OUT=dist/unbound-hook
mkdir -p "$OUT"

CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 "$GO" build -trimpath -ldflags "$LDFLAGS" \
-o "$OUT/unbound-hook.arm64" ./cmd/unbound-hook
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 "$GO" build -trimpath -ldflags "$LDFLAGS" \
-o "$OUT/unbound-hook.amd64" ./cmd/unbound-hook
lipo -create -output "$OUT/unbound-hook" "$OUT/unbound-hook.arm64" "$OUT/unbound-hook.amd64"
rm "$OUT/unbound-hook.arm64" "$OUT/unbound-hook.amd64"

echo "--- verifying universal2 ---"
archs=$(lipo -archs "$OUT/unbound-hook")
case "$archs" in
*x86_64*arm64*|*arm64*x86_64*) echo "OK: universal2 ($archs)" ;;
*) echo "NOT-UNIVERSAL: $archs"; exit 1 ;;
esac

echo "--- smoke ---"
"./$OUT/unbound-hook" --version
echo '{}' | "./$OUT/unbound-hook" hook claude-code PreToolUse
77 changes: 77 additions & 0 deletions go/cmd/unbound-hook/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Command unbound-hook is the Go rewrite of the PyInstaller hook binary.
// The python implementation under binary/ is the golden reference; this
// dispatcher mirrors binary/src/unbound_hook/main.py exactly.
//
// Subcommands:
//
// hook <tool> [<event>] stdin/stdout hook dispatch (fail-open, exit 0)
// setup [...] MDM onboarding (not implemented yet)
// backfill [...] historical transcript seeding (not implemented yet)
// clear full deregistration (not implemented yet)
// --version / version print version (pkg postinstall pre-warm contract:
// must exit fast without reading stdin)
package main

import (
"fmt"
"os"

"github.com/websentry-ai/setup/go/internal/hooks"
)

// Version is baked at build time via -ldflags "-X main.Version=...".
var Version = "0.0.0-dev"

func usage() string {
return fmt.Sprintf(`unbound-hook %s

Usage:
unbound-hook hook <tool> [<event>] tools: claude-code|cursor|copilot|codex
unbound-hook setup --api-key <key> [--discovery-key <key>] [options]
unbound-hook backfill (--all | --user <name>) [--dry-run] [options]
unbound-hook clear
unbound-hook --version
`, Version)
}

func run(args []string) int {
if len(args) > 0 && (args[0] == "--version" || args[0] == "-V" || args[0] == "version") {
// Pre-warm contract: print and exit, never touch stdin.
fmt.Printf("unbound-hook %s\n", Version)
return 0
}
if len(args) == 0 {
fmt.Println(usage())
return 2
}
if args[0] == "-h" || args[0] == "--help" || args[0] == "help" {
fmt.Println(usage())
return 0
}

cmd, rest := args[0], args[1:]
switch cmd {
case "hook":
tool, event := "", ""
if len(rest) > 0 {
tool = rest[0]
}
if len(rest) > 1 {
event = rest[1]
}
return hooks.Dispatch(tool, event, os.Stdin, os.Stdout)
case "setup", "backfill", "clear":
// Admin commands are NOT fail-open: a silent no-op here would look
// like a successful install/backfill/deregistration.
fmt.Fprintf(os.Stderr, "unbound-hook %s: not implemented\n", cmd)
return 1
}

fmt.Fprintf(os.Stderr, "Unknown command: %s\n", cmd)
fmt.Fprintln(os.Stderr, usage())
return 2
}

func main() {
os.Exit(run(os.Args[1:]))
}
3 changes: 3 additions & 0 deletions go/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module github.com/websentry-ai/setup/go

go 1.22
127 changes: 127 additions & 0 deletions go/internal/audit/audit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Package audit ports the per-tool agent-audit.log JSONL handling:
// load_existing_logs / save_logs / append_to_audit_log
// (claude-code/hooks/unbound.py lines 205-238) and cleanup_old_logs
// (lines 1103-1126; cursor/unbound.py keys on event.conversation_id
// instead of the top-level session_id, so the grouping key is a caller
// parameter here).
//
// Entries are decoded pyjson values so that Save re-renders each line
// byte-identically to python's json.dumps(json.loads(line)). Paths are
// per tool and owned by callers (~/.claude/hooks/agent-audit.log,
// ~/.cursor/hooks/..., ~/.codex/hooks/..., ~/.copilot/hooks/...). There is
// no size-based rotation — only the session-scoped cleanup; files are
// created with the process umask like python's open().
//
// Quirk copied as-is: a non-object JSONL line loads fine in python and
// only blows up later (AttributeError in cleanup, caught by main's blanket
// handler). Here Cleanup's key func decides what such entries map to.
package audit

import (
"bufio"
"bytes"
"os"
"path/filepath"

"github.com/websentry-ai/setup/go/internal/pyjson"
)

// Load reads every parseable JSONL entry. Blank and undecodable lines are
// skipped; an unreadable file yields whatever was collected (python
// swallows the exception and returns the partial list).
func Load(path string) []any {
logs := []any{}
f, err := os.Open(path)
if err != nil {
return logs
}
defer f.Close()
r := bufio.NewReader(f)
for {
line, err := r.ReadBytes('\n')
trimmed := bytes.TrimSpace(line)
if len(trimmed) > 0 {
if entry, perr := pyjson.Loads(trimmed); perr == nil {
logs = append(logs, entry)
}
}
if err != nil {
return logs
}
}
}

// Save rewrites the file with one python-format JSON line per entry.
// Errors are swallowed; an entry that fails to encode aborts the rest,
// leaving a partial file exactly like a mid-write python exception would.
func Save(path string, logs []any) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return
}
f, err := os.Create(path)
if err != nil {
return
}
defer f.Close()
for _, entry := range logs {
line, err := pyjson.Dumps(entry)
if err != nil {
return
}
if _, err := f.WriteString(line + "\n"); err != nil {
return
}
}
}

// Append adds one entry to the log. Errors are swallowed.
func Append(path string, entry any) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return
}
line, err := pyjson.Dumps(entry)
if err != nil {
return
}
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o644)
if err != nil {
return
}
defer f.Close()
_, _ = f.WriteString(line + "\n")
}

// Cleanup trims the log once it exceeds limit entries. key extracts each
// entry's grouping id ("" for none): with more than one distinct id, only
// the most recently first-seen id's entries survive (entries with other or
// missing ids are dropped, including the size headroom — python keeps the
// whole last session however large); with at most one, the newest `limit`
// entries survive.
func Cleanup(path string, limit int, key func(entry any) string) {
logs := Load(path)
if len(logs) <= limit {
return
}

var order []string
seen := map[string]bool{}
for _, entry := range logs {
if id := key(entry); id != "" && !seen[id] {
order = append(order, id)
seen[id] = true
}
}

if len(order) > 1 {
latest := order[len(order)-1]
kept := []any{}
for _, entry := range logs {
if key(entry) == latest {
kept = append(kept, entry)
}
}
Save(path, kept)
} else if len(logs) > limit {
Save(path, logs[len(logs)-limit:])
}
}
Loading