Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,74 @@ _No unreleased changes._

---

## 26.13 — 2026-05-27

Change detection + alert sinks (P2-02). The agent now diffs the host
inventory pre- and post-cycle and emits `host.discovered` /
`host.vanished` events to a configurable HTTP webhook, syslog, or both.
Operators no longer have to grep logs to notice a new device appeared.

### Added

- **`internal/alerts` package** — `Event` (JSON-tagged for wire reuse),
`EventType` (`host.discovered`, `host.vanished`), `Emitter`
interface, `Multiplexer` that fans events out to N sinks in
parallel, `NoopEmitter` for the alerts-disabled deployment.
- **WebhookSink** — HTTP POST JSON. One retry on transient failures
(network error or 5xx); 4xx is final. Optional `Authorization`
header passed verbatim (so `Bearer …` and `Basic …` both work
without per-scheme code).
- **SyslogSink** — RFC 5424 over UDP/TCP. Hand-rolled because
stdlib `log/syslog` is Unix-only and the agent runs on Windows.
Message body is the same JSON as the webhook payload, so syslog
parsers (rsyslog mmjsonparse, syslog-ng, Splunk) get structured
fields for free.
- **`config.AlertsConfig`** — new optional top-level section:
```json
"alerts": {
"webhook": { "url": "https://hooks.example/x", "auth_header": "Bearer …" },
"syslog": { "addr": "udp://syslog.example:514", "tag": "inventory" }
}
```
Either or both sub-sections may be set; absence of both silently
disables alerting (no change for existing deployments).

### Changed

- **`agent.New` gained an `alerts.Emitter` parameter** (8th positional
arg). Pass `nil` for the noop emitter; the constructor substitutes
one automatically so tests stay terse.
- **`agent.runCycle` snapshots the host inventory** before scanning
and again after, then diffs the two by IP. Discovered hosts get the
fresh enrichment (Hostname/Vendor/DeviceType); vanished hosts get
the last-known pre-cycle row (since the post-cycle one is gone).
Cycle-failed runs intentionally skip the diff to avoid alert spam
on transient DB blips.
- **`mockHostStore.Upsert`** in the agent test suite now mirrors the
sqlite UPSERT (overwrite-on-conflict) so test stores stay parity
with production — same fix landed in the scanner mock during 26.11.

### Tests

- 11 new tests covering: multiplexer fan-out, sibling sinks surviving
a peer-sink error, webhook auth-header round-trip, 5xx retry, 4xx
no-retry, nil-on-empty-URL/addr guards, syslog RFC 5424 format
against a real UDP listener (PRI calculation, JSON MSG, MSGID =
event type), bad-scheme rejection, agent-level diff producing
`host.vanished` on prune and `host.discovered` on a mid-cycle insert.

### Notes

- Sinks deliver in goroutines. Multiplexer.Emit returns immediately;
failures show up in slog warnings, not back at the call site. This
matches operator expectations for alert pipelines and keeps the
scan-cycle hot path off the network.
- Port-level events (`port.appeared` / `port.vanished`) and watchdog
events are deliberately deferred — host-level coverage satisfies the
headline operator ask first.

---

## 26.12 — 2026-05-27

Service / application discovery (P2-01). Turns "port 22 is open" into
Expand Down
30 changes: 29 additions & 1 deletion cmd/internal/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/Ronin48/NetworkInventoryAgent/internal/admin"
"github.com/Ronin48/NetworkInventoryAgent/internal/agent"
"github.com/Ronin48/NetworkInventoryAgent/internal/alerts"
"github.com/Ronin48/NetworkInventoryAgent/internal/config"
"github.com/Ronin48/NetworkInventoryAgent/internal/health"
"github.com/Ronin48/NetworkInventoryAgent/internal/logging"
Expand Down Expand Up @@ -131,7 +132,13 @@ func Run(opts Options) int {
}
slog.Info("health server started", "addr", healthSrv.Addr(), "tls", healthTLS != nil)

a := agent.New(opts.Name, cfg.Scanner, db.Hosts(), db.Ports(), db.Scans(), tracker)
alertSinks := buildAlertSinks(cfg.Alerts)
mux := alerts.NewMultiplexer(alertSinks...)
if len(alertSinks) > 0 {
slog.Info("alert sinks configured", "count", len(alertSinks))
}

a := agent.New(opts.Name, cfg.Scanner, db.Hosts(), db.Ports(), db.Scans(), tracker, mux)

adminSrv, err := admin.NewServer(
cfg.Admin.Addr, opts.Name,
Expand Down Expand Up @@ -185,10 +192,31 @@ func Run(opts Options) int {
if err := traceShutdown(shutdownCtx); err != nil {
slog.Warn("tracer shutdown error", "err", err)
}
mux.Close()
slog.Info("agent stopped", "name", opts.Name)
return 0
}

// buildAlertSinks instantiates the configured event sinks. A sink that
// fails to construct (bad syslog URL, etc.) is logged and skipped; the
// other sinks still ship. Returns nil when nothing is configured, which
// NewMultiplexer handles cleanly.
func buildAlertSinks(cfg config.AlertsConfig) []alerts.Sink {
var sinks []alerts.Sink
if w := alerts.NewWebhookSink(cfg.Webhook.URL, cfg.Webhook.AuthHeader); w != nil {
sinks = append(sinks, w)
}
if cfg.Syslog.Addr != "" {
s, err := alerts.NewSyslogSink(cfg.Syslog.Addr, cfg.Syslog.Tag, cfg.Syslog.Facility)
if err != nil {
slog.Warn("syslog sink disabled", "err", err)
} else if s != nil {
sinks = append(sinks, s)
}
}
return sinks
}

// buildPeerClient assembles the watchdog's health.Client with a
// tracing-instrumented transport. TLS is layered onto the transport before
// otelhttp wraps it, so spans cover the full handshake-through-read path.
Expand Down
85 changes: 84 additions & 1 deletion internal/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ import (
"log/slog"
"time"

"github.com/Ronin48/NetworkInventoryAgent/internal/alerts"
"github.com/Ronin48/NetworkInventoryAgent/internal/config"
"github.com/Ronin48/NetworkInventoryAgent/internal/health"
"github.com/Ronin48/NetworkInventoryAgent/internal/metrics"
"github.com/Ronin48/NetworkInventoryAgent/internal/scanner"
"github.com/Ronin48/NetworkInventoryAgent/internal/store"
"github.com/Ronin48/NetworkInventoryAgent/models"
)

// Agent runs a periodic scan loop and publishes results to a health.Tracker.
Expand All @@ -22,6 +24,7 @@ type Agent struct {
hosts store.HostStore
scanner *scanner.Scanner
tracker *health.Tracker
alerts alerts.Emitter
now func() time.Time

// trigger is a buffered channel that lets external callers
Expand All @@ -31,15 +34,22 @@ type Agent struct {
}

// New creates an Agent. The caller is responsible for starting the health
// server and watchdog; this constructor only wires the scan loop.
// server and watchdog; this constructor only wires the scan loop. Pass
// alerts.NoopEmitter() when alerts are unconfigured (the constructor
// substitutes one if alertEmitter is nil to avoid an awkward call-site
// guard at every binding).
func New(
name string,
cfg config.ScannerConfig,
hosts store.HostStore,
ports store.PortStore,
scans store.ScanStore,
tracker *health.Tracker,
alertEmitter alerts.Emitter,
) *Agent {
if alertEmitter == nil {
alertEmitter = alerts.NoopEmitter()
}
return &Agent{
name: name,
cfg: cfg,
Expand All @@ -58,6 +68,7 @@ func New(
EnrichARP: cfg.EnrichARP,
}),
tracker: tracker,
alerts: alertEmitter,
now: time.Now,
trigger: make(chan struct{}, 1),
}
Expand Down Expand Up @@ -105,6 +116,15 @@ func (a *Agent) Run(ctx context.Context) {
func (a *Agent) runCycle(ctx context.Context, log *slog.Logger) {
log.Info("scan cycle started", "subnets", len(a.cfg.Subnets))
started := a.now()

// Snapshot the pre-cycle host inventory so we can diff it against
// the post-cycle list and fire HostDiscovered / HostVanished events.
// Snapshotting before the scan (rather than tracking what the
// scanner returned) means the diff correctly reflects "ground truth
// changed", including hosts the operator added or removed
// externally.
prevHosts := snapshotByIP(ctx, a.hosts, log)

cycleHosts := 0
cycleHealthy := true
for _, subnet := range a.cfg.Subnets {
Expand All @@ -125,6 +145,13 @@ func (a *Agent) runCycle(ctx context.Context, log *slog.Logger) {
log.Info("pruned stale hosts", "count", pruned)
}

// Diff and fire events. Only meaningful when the cycle didn't
// itself fail mid-way — declaring hosts "vanished" because of a
// transient DB error would be alert spam.
if cycleHealthy {
a.emitChangeEvents(ctx, log, prevHosts, started)
}

// Use the actual DB count so the tracker reflects total accumulated
// inventory, not just hosts found in this cycle.
total, err := a.hosts.Count(ctx)
Expand Down Expand Up @@ -181,3 +208,59 @@ func (a *Agent) pruneStale(ctx context.Context, log *slog.Logger, now time.Time)
}
return pruned
}

// snapshotByIP lists the current host inventory keyed by IP. Used pre-
// cycle so the change-detection diff has a stable view to compare
// against. A List failure logs and returns nil — the diff will then
// produce no events (better than misleading ones based on a partial set).
func snapshotByIP(ctx context.Context, hs store.HostStore, log *slog.Logger) map[string]*models.Host {
hosts, err := hs.List(ctx)
if err != nil {
log.Warn("change-detect: pre-cycle snapshot failed", "err", err)
return nil
}
out := make(map[string]*models.Host, len(hosts))
for _, h := range hosts {
out[h.IPAddress] = h
}
return out
}

// emitChangeEvents compares the post-cycle host set with the pre-cycle
// snapshot and fires one alert event per change. Discovered events use
// the post-cycle enrichment; vanished events use the pre-cycle row
// (since the post-cycle one is gone).
func (a *Agent) emitChangeEvents(ctx context.Context, log *slog.Logger, prev map[string]*models.Host, cycleStart time.Time) {
curr := snapshotByIP(ctx, a.hosts, log)
if curr == nil {
return
}
for ip, h := range curr {
if _, was := prev[ip]; !was {
a.alerts.Emit(ctx, alerts.Event{
Type: alerts.HostDiscovered,
IP: ip,
Hostname: h.Hostname,
MACAddress: h.MACAddress,
Vendor: h.Vendor,
DeviceType: h.DeviceType,
Time: cycleStart,
Agent: a.name,
})
}
}
for ip, h := range prev {
if _, still := curr[ip]; !still {
a.alerts.Emit(ctx, alerts.Event{
Type: alerts.HostVanished,
IP: ip,
Hostname: h.Hostname,
MACAddress: h.MACAddress,
Vendor: h.Vendor,
DeviceType: h.DeviceType,
Time: cycleStart,
Agent: a.name,
})
}
}
}
Loading
Loading