diff --git a/ChangeLog.md b/ChangeLog.md index f08410a..c03bf7d 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,111 @@ _No unreleased changes._ --- +## 26.15 — 2026-05-27 + +Per-subnet scan profiles (P2-05). Operators can now run aggressive +hourly deep scans on critical infrastructure while leaving the guest +network on a lazy daily liveness sweep — all from one config file, one +agent process. Closes the original P2 operator-feedback batch. + +### Added + +- **`config.SubnetProfile`** — per-subnet overrides for `ScanInterval`, + `Timeout`, `ProbePorts`, `DeepProbe`, `DeepProbePorts`, `UDPPorts`, + `EnrichARP`. Bool fields are `*bool` so a profile can explicitly + disable deep probing even when the global default is on (the zero + value would be ambiguous). +- **`config.ScannerConfig.Profiles []SubnetProfile`** — the new + per-subnet list. Mutually exclusive with the legacy `Subnets` + field; both are validated at boot. +- **`config.ResolvedProfile` + `ScannerConfig.Resolve()`** — flattens + Subnets + Profiles into one fully-defaulted list. Every field is + populated from either the profile override or the global default, + so the agent's runtime path has no further fallback logic. +- **`scanner.SubnetOptions`** — new struct passed to `Scan(ctx, + subnet, opts)`. Carries the per-call probe configuration so the + scanner can serve multiple profiles without per-scan reconstruction. +- **`config.True()` / `config.False()`** — bool-pointer helpers for + building profiles programmatically. + +### Changed + +- **`scanner.Scan(ctx, subnet)` → `scanner.Scan(ctx, subnet, SubnetOptions{})`**. + In-tree callers updated; out-of-tree callers pass `SubnetOptions{}` + to retain pre-26.15 behaviour. +- **`scanner.probe`, `deepScan`, `udpScan` helpers** now take their + timeout + port-list parameters explicitly rather than reading from + the Scanner struct. The Scanner-level fields remain as defaults + consulted by `resolve()` at the top of Scan. +- **`agent.New` now returns `(*Agent, error)`** — `Resolve()` runs at + construction so config errors (duplicate subnets, mutually-exclusive + flat list + profiles) surface at boot, not on the first scan tick. + Test suites updated. +- **Agent scheduling**: the single global `ScanInterval` ticker is + replaced with one that ticks at the *shortest* per-profile + interval. Each profile keeps its own `nextDue` timestamp; only + profiles past their due time get scanned on each tick. The + housekeeping pass (prune, change-detect diff, tracker updates) runs + every tick regardless, so zero-profile deployments — watchdog-only + mode — still work as before. +- **Tick-interval safety floor** of 1 second. Prevents pathological + busy-loops if an operator types `"1ns"` by accident. + +### Tests + +- 7 new tests in `internal/config` covering: legacy-Subnets path, + profile-overrides-win, explicit-False-beats-global-True, mutually- + exclusive validation, duplicate-subnet rejection, empty-subnet + rejection, zero-profile happy path. +- Scanner + agent test suites updated for the new `SubnetOptions{}` + third argument and `(a, err)` constructor. + +### Migration notes + +Existing configs keep working — set `scanner.subnets` and the global +fields (`scan_interval`, `timeout`, `probe_ports`, …) as before. +**Operators who want per-subnet tuning** switch to: + +```json +{ + "scanner": { + "profiles": [ + { "subnet": "10.0.0.0/24", "scan_interval": "1h", "deep_probe": true }, + { "subnet": "192.168.1.0/24", "scan_interval": "24h" } + ], + "scan_interval": "5m", + "timeout": "2s", + "workers": 50 + } +} +``` + +Any field absent from a profile inherits the corresponding global. +The flat `scanner.subnets` and per-subnet `scanner.profiles` fields +are mutually exclusive — boot fails fast if both are set. + +--- + +## P2 operator-feedback batch — complete + +Original asks from the operator pass: **all five shipped.** + +| # | Item | Sprint | +|---|---|---| +| 1 | Service / application discovery | 26.12 | +| 2 | Change detection + webhook/syslog alerts | 26.13 | +| 3 | Device-type classifier | 26.11 | +| 4 | Query API beyond bulk export | 26.14 | +| 5 | Per-subnet scan profiles | 26.15 | + +The agent now does end-to-end inventory: discovery → enrichment → +classification → change detection → alerting → queryable API, with +per-subnet scheduling. Next-feature backlog is empty; future work +should be driven by a fresh round of operator feedback or `/ultrareview` +findings. + +--- + ## 26.14 — 2026-05-27 JSON query API (P2-04). Adds filterable, paginated `/api/v1/hosts` and diff --git a/cmd/internal/runtime/runtime.go b/cmd/internal/runtime/runtime.go index 80eb56c..ce77024 100644 --- a/cmd/internal/runtime/runtime.go +++ b/cmd/internal/runtime/runtime.go @@ -138,7 +138,11 @@ func Run(opts Options) int { slog.Info("alert sinks configured", "count", len(alertSinks)) } - a := agent.New(opts.Name, cfg.Scanner, db.Hosts(), db.Ports(), db.Scans(), tracker, mux) + a, err := agent.New(opts.Name, cfg.Scanner, db.Hosts(), db.Ports(), db.Scans(), tracker, mux) + if err != nil { + slog.Error("agent setup failed", "err", err) + return 1 + } adminSrv, err := admin.NewServer( cfg.Admin.Addr, opts.Name, diff --git a/internal/admin/api.go b/internal/admin/api.go index 99d685f..8eeb954 100644 --- a/internal/admin/api.go +++ b/internal/admin/api.go @@ -163,9 +163,9 @@ func (s *Server) handleAPIHostDetail(w http.ResponseWriter, r *http.Request) { type hostFilter struct { vendor string deviceType string - hostname string // lowercase, for case-insensitive substring + hostname string // lowercase, for case-insensitive substring subnet *net.IPNet // nil = no subnet filter - port int // 0 = no port filter + port int // 0 = no port filter } func parseHostFilter(q url.Values) (hostFilter, error) { diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 9efc6d4..32dd4a3 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -27,6 +27,12 @@ type Agent struct { alerts alerts.Emitter now func() time.Time + // profiles is the resolved per-subnet config built at New time. + // Each entry carries its own ScanInterval; nextDue tracks when its + // next scan is permitted. + profiles []config.ResolvedProfile + nextDue map[string]time.Time + // trigger is a buffered channel that lets external callers // (e.g. POST /scan) force an immediate cycle without waiting for the // next ticker firing. Capacity 1 so concurrent triggers coalesce. @@ -46,10 +52,14 @@ func New( scans store.ScanStore, tracker *health.Tracker, alertEmitter alerts.Emitter, -) *Agent { +) (*Agent, error) { if alertEmitter == nil { alertEmitter = alerts.NoopEmitter() } + profiles, err := cfg.Resolve() + if err != nil { + return nil, err + } return &Agent{ name: name, cfg: cfg, @@ -67,11 +77,13 @@ func New( UDPPorts: cfg.UDPPorts, EnrichARP: cfg.EnrichARP, }), - tracker: tracker, - alerts: alertEmitter, - now: time.Now, - trigger: make(chan struct{}, 1), - } + tracker: tracker, + alerts: alertEmitter, + now: time.Now, + profiles: profiles, + nextDue: make(map[string]time.Time, len(profiles)), + trigger: make(chan struct{}, 1), + }, nil } // Trigger requests an out-of-cycle scan. Returns true if the request was @@ -88,15 +100,20 @@ func (a *Agent) Trigger() bool { } } -// Run starts the scan loop. It executes one scan immediately, then repeats on -// cfg.ScanInterval. It blocks until ctx is cancelled. +// Run starts the scan loop. It executes one cycle immediately (every +// profile, regardless of its own interval), then ticks at the shortest +// configured per-profile interval. Each subsequent tick only scans +// profiles whose next-due time has passed. Blocks until ctx is cancelled. func (a *Agent) Run(ctx context.Context) { log := slog.With("agent", a.name) - log.Info("scan loop started", "subnets", a.cfg.Subnets, "interval", a.cfg.ScanInterval) + log.Info("scan loop started", + "profiles", len(a.profiles), + "tick_interval", a.tickInterval(), + ) - a.runCycle(ctx, log) + a.runCycle(ctx, log, true /* forceAll */) - ticker := time.NewTicker(a.cfg.ScanInterval.Duration) + ticker := time.NewTicker(a.tickInterval()) defer ticker.Stop() for { @@ -105,39 +122,82 @@ func (a *Agent) Run(ctx context.Context) { log.Info("scan loop stopped") return case <-ticker.C: - a.runCycle(ctx, log) + a.runCycle(ctx, log, false) case <-a.trigger: log.Info("on-demand scan triggered") - a.runCycle(ctx, log) + a.runCycle(ctx, log, true) + } + } +} + +// tickInterval is the shortest configured profile interval. The main +// loop ticks at this cadence and selects per-tick which profiles are +// actually due. A safety floor of one second prevents pathological +// busy-loops when an operator types "1ns" by accident. +func (a *Agent) tickInterval() time.Duration { + const floor = time.Second + min := time.Duration(0) + for _, p := range a.profiles { + if p.ScanInterval <= 0 { + continue + } + if min == 0 || p.ScanInterval < min { + min = p.ScanInterval } } + if min < floor { + min = floor + } + return min } -func (a *Agent) runCycle(ctx context.Context, log *slog.Logger) { - log.Info("scan cycle started", "subnets", len(a.cfg.Subnets)) +// runCycle scans every profile whose nextDue has passed (or all of them +// when forceAll is true — used for the initial cycle and on-demand +// triggers). +func (a *Agent) runCycle(ctx context.Context, log *slog.Logger, forceAll bool) { started := a.now() // Snapshot the pre-cycle host inventory so we can diff it against // the post-cycle list and fire HostDiscovered / HostVanished events. - // Snapshotting before the scan (rather than tracking what the - // scanner returned) means the diff correctly reflects "ground truth - // changed", including hosts the operator added or removed - // externally. prevHosts := snapshotByIP(ctx, a.hosts, log) + // Select due profiles. With no profiles configured (zero-config + // "watchdog only" deployment), due is empty but housekeeping below + // — prune, diff, tracker updates — still runs. + due := make([]config.ResolvedProfile, 0, len(a.profiles)) + for _, p := range a.profiles { + if forceAll || !started.Before(a.nextDue[p.Subnet]) { + due = append(due, p) + } + } + + if len(due) > 0 { + log.Info("scan cycle started", "due_profiles", len(due), "total_profiles", len(a.profiles)) + } + cycleHosts := 0 cycleHealthy := true - for _, subnet := range a.cfg.Subnets { + for _, p := range due { metrics.ScansTotal.Inc() - n, err := a.scanner.Scan(ctx, subnet) + n, err := a.scanner.Scan(ctx, p.Subnet, scanner.SubnetOptions{ + Timeout: p.Timeout, + ProbePorts: p.ProbePorts, + DeepProbe: boolPtr(p.DeepProbe), + DeepProbePorts: p.DeepProbePorts, + UDPPorts: p.UDPPorts, + EnrichARP: boolPtr(p.EnrichARP), + }) if err != nil { metrics.ScanErrorsTotal.Inc() - log.Warn("subnet scan failed", "subnet", subnet, "err", err) + log.Warn("subnet scan failed", "subnet", p.Subnet, "err", err) cycleHealthy = false continue } - log.Debug("subnet scanned", "subnet", subnet, "hosts", n) + log.Debug("subnet scanned", "subnet", p.Subnet, "hosts", n, "interval", p.ScanInterval) cycleHosts += n + // Schedule the next due time. Computed off the start of THIS + // cycle (not now) so a slow scan doesn't drift the cadence. + a.nextDue[p.Subnet] = started.Add(p.ScanInterval) } if pruned := a.pruneStale(ctx, log, started); pruned > 0 { @@ -167,11 +227,13 @@ func (a *Agent) runCycle(ctx context.Context, log *slog.Logger) { a.tracker.SetHealthy(cycleHealthy) duration := a.now().Sub(started) - interval := a.cfg.ScanInterval.Duration + // Warning threshold: half the tick interval. Slower than that and + // the loop is at risk of dropped firings. + interval := a.tickInterval() if interval > 0 && duration > interval/2 { - log.Warn("scan cycle nearly exceeded interval", + log.Warn("scan cycle nearly exceeded tick interval", "duration", duration.Round(time.Millisecond), - "interval", interval, + "tick_interval", interval, ) } log.Info("scan cycle complete", @@ -182,6 +244,12 @@ func (a *Agent) runCycle(ctx context.Context, log *slog.Logger) { ) } +// boolPtr is a small inline helper for passing a value bool to a *bool +// option field. Pointer-bools let a profile distinguish "explicit false" +// from "inherit default"; from the resolved-profile side we know which +// way the bool was already resolved, so we always pass a non-nil pointer. +func boolPtr(b bool) *bool { return &b } + // pruneStale deletes hosts whose last_seen is older than the configured // HostTTL. Returns the number of hosts pruned. Disabled when HostTTL is 0 // (the default), so existing deployments don't lose history silently. diff --git a/internal/agent/agent_test.go b/internal/agent/agent_test.go index 37ddea9..48b3440 100644 --- a/internal/agent/agent_test.go +++ b/internal/agent/agent_test.go @@ -176,7 +176,7 @@ func (m *mockScanStore) List(_ context.Context) ([]*models.Scan, error) { // call to Trigger() succeeds, subsequent calls return false until the queued // trigger has been consumed. func TestAgent_TriggerCoalesces(t *testing.T) { - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{}, newMockHostStore(), @@ -185,6 +185,7 @@ func TestAgent_TriggerCoalesces(t *testing.T) { health.NewTracker("test"), nil, ) + require.NoError(t, err) assert.True(t, a.Trigger(), "first Trigger() must enqueue") assert.False(t, a.Trigger(), "second Trigger() must coalesce, not enqueue") } @@ -195,7 +196,7 @@ func TestAgent_CycleMarksHealthyOnCleanRun(t *testing.T) { tracker := health.NewTracker("test") tracker.SetHealthy(false) // start unhealthy so we can observe the flip - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{ Subnets: nil, @@ -207,6 +208,7 @@ func TestAgent_CycleMarksHealthyOnCleanRun(t *testing.T) { tracker, nil, ) + require.NoError(t, err) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() @@ -224,7 +226,7 @@ func TestAgent_CycleMarksUnhealthyOnCountFailure(t *testing.T) { hosts.countErr = errors.New("db gone") tracker := health.NewTracker("test") - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{ Subnets: nil, @@ -236,6 +238,7 @@ func TestAgent_CycleMarksUnhealthyOnCountFailure(t *testing.T) { tracker, nil, ) + require.NoError(t, err) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() @@ -260,7 +263,7 @@ func TestAgent_PrunesStaleHosts(t *testing.T) { require.NoError(t, err) tracker := health.NewTracker("test") - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{ Subnets: nil, @@ -273,6 +276,7 @@ func TestAgent_PrunesStaleHosts(t *testing.T) { tracker, nil, ) + require.NoError(t, err) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() @@ -294,7 +298,7 @@ func TestAgent_PruneDisabledWithoutTTL(t *testing.T) { LastSeen: time.Now().Add(-24 * time.Hour), }) - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{ Subnets: nil, @@ -307,6 +311,7 @@ func TestAgent_PruneDisabledWithoutTTL(t *testing.T) { health.NewTracker("test"), nil, ) + require.NoError(t, err) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() @@ -330,7 +335,7 @@ func TestAgent_EmitsHostVanishedOnPrune(t *testing.T) { require.NoError(t, err) rec := &recordingEmitter{} - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{ Subnets: nil, @@ -343,6 +348,7 @@ func TestAgent_EmitsHostVanishedOnPrune(t *testing.T) { health.NewTracker("test"), rec, ) + require.NoError(t, err) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() @@ -373,7 +379,7 @@ func TestAgent_EmitsHostDiscoveredOnNewHost(t *testing.T) { } rec := &recordingEmitter{} - a := agent.New( + a, err := agent.New( "test", config.ScannerConfig{ Subnets: nil, @@ -385,6 +391,7 @@ func TestAgent_EmitsHostDiscoveredOnNewHost(t *testing.T) { health.NewTracker("test"), rec, ) + require.NoError(t, err) ctx, cancel := context.WithTimeout(context.Background(), 75*time.Millisecond) defer cancel() diff --git a/internal/config/config.go b/internal/config/config.go index f351964..04b89d2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -96,7 +96,17 @@ type DatabaseConfig struct { } type ScannerConfig struct { - Subnets []string `json:"subnets"` + // Subnets is the legacy flat list — every entry uses the global + // defaults below. Mutually exclusive with Profiles; setting both + // is a config error. + Subnets []string `json:"subnets,omitempty"` + // Profiles is the per-subnet override list. Any field a profile + // leaves empty falls back to the corresponding global default + // (ScanInterval, Timeout, ProbePorts, …). Operators tune critical + // infrastructure with an aggressive profile while leaving the + // guest network on the lazy default. + Profiles []SubnetProfile `json:"profiles,omitempty"` + ScanInterval Duration `json:"scan_interval"` Timeout Duration `json:"timeout"` // Workers is the global cap on concurrent probe goroutines across all @@ -139,6 +149,134 @@ type ScannerConfig struct { EnrichARP bool `json:"enrich_arp,omitempty"` } +// SubnetProfile overrides the scanner defaults for one subnet. Each field +// is optional; the zero value means "use the global ScannerConfig default". +// The Subnet field is the only required entry — everything else inherits. +type SubnetProfile struct { + // Subnet is the CIDR (required). Must be unique across profiles. + Subnet string `json:"subnet"` + + // ScanInterval overrides ScannerConfig.ScanInterval. Use a long + // interval for guest networks, short for critical infra. Zero = + // inherit global. + ScanInterval Duration `json:"scan_interval,omitempty"` + // Timeout overrides ScannerConfig.Timeout. Slow links benefit from + // a longer per-dial budget. Zero = inherit global. + Timeout Duration `json:"timeout,omitempty"` + // ProbePorts overrides ScannerConfig.ProbePorts. nil = inherit + // global; an empty (non-nil) slice means "no liveness probe" — not + // supported, validate rejects the empty case. + ProbePorts []int `json:"probe_ports,omitempty"` + // DeepProbe overrides ScannerConfig.DeepProbe. Pointer so a profile + // can explicitly set it false while the global is true (otherwise + // the zero value would be ambiguous). Use config.True / False + // helpers below. + DeepProbe *bool `json:"deep_probe,omitempty"` + // DeepProbePorts overrides ScannerConfig.DeepProbePorts. + DeepProbePorts []int `json:"deep_probe_ports,omitempty"` + // UDPPorts overrides ScannerConfig.UDPPorts. + UDPPorts []int `json:"udp_ports,omitempty"` + // EnrichARP overrides ScannerConfig.EnrichARP. See DeepProbe re: + // pointer-bool. + EnrichARP *bool `json:"enrich_arp,omitempty"` +} + +// True / False are bool-pointer helpers for SubnetProfile.DeepProbe and +// EnrichARP. JSON unmarshalling handles the wire form; these are for Go +// callers building profiles programmatically. +func True() *bool { v := true; return &v } +func False() *bool { v := false; return &v } + +// ResolvedProfile is a fully-defaulted profile — every field is populated +// from either the profile's own setting or the global ScannerConfig +// fallback. Produced by ScannerConfig.Resolve(); consumed by the agent. +type ResolvedProfile struct { + Subnet string + ScanInterval time.Duration + Timeout time.Duration + ProbePorts []int + DeepProbe bool + DeepProbePorts []int + UDPPorts []int + EnrichARP bool +} + +// Resolve flattens Subnets + Profiles into a unified list with every +// field populated. The flat-Subnets form is preserved for backwards +// compat: each entry becomes a profile with all overrides empty (so it +// inherits every global default). +// +// Validation: +// - At most one of Subnets / Profiles may be set. +// - Profile.Subnet values must be unique. +// - Profile.Subnet must be a parseable CIDR (caller doesn't need to +// re-validate). +func (c *ScannerConfig) Resolve() ([]ResolvedProfile, error) { + if len(c.Subnets) > 0 && len(c.Profiles) > 0 { + return nil, fmt.Errorf("scanner.subnets and scanner.profiles are mutually exclusive — pick one") + } + var profiles []SubnetProfile + switch { + case len(c.Profiles) > 0: + profiles = c.Profiles + case len(c.Subnets) > 0: + profiles = make([]SubnetProfile, len(c.Subnets)) + for i, s := range c.Subnets { + profiles[i] = SubnetProfile{Subnet: s} + } + } + seen := make(map[string]bool, len(profiles)) + out := make([]ResolvedProfile, 0, len(profiles)) + for _, p := range profiles { + if p.Subnet == "" { + return nil, fmt.Errorf("scanner profile missing subnet") + } + if seen[p.Subnet] { + return nil, fmt.Errorf("scanner profile subnet %q listed twice", p.Subnet) + } + seen[p.Subnet] = true + out = append(out, resolveProfile(p, c)) + } + return out, nil +} + +func resolveProfile(p SubnetProfile, c *ScannerConfig) ResolvedProfile { + r := ResolvedProfile{ + Subnet: p.Subnet, + ScanInterval: p.ScanInterval.Duration, + Timeout: p.Timeout.Duration, + ProbePorts: p.ProbePorts, + DeepProbePorts: p.DeepProbePorts, + UDPPorts: p.UDPPorts, + } + if r.ScanInterval == 0 { + r.ScanInterval = c.ScanInterval.Duration + } + if r.Timeout == 0 { + r.Timeout = c.Timeout.Duration + } + if r.ProbePorts == nil { + r.ProbePorts = c.ProbePorts + } + if r.DeepProbePorts == nil { + r.DeepProbePorts = c.DeepProbePorts + } + if r.UDPPorts == nil { + r.UDPPorts = c.UDPPorts + } + if p.DeepProbe != nil { + r.DeepProbe = *p.DeepProbe + } else { + r.DeepProbe = c.DeepProbe + } + if p.EnrichARP != nil { + r.EnrichARP = *p.EnrichARP + } else { + r.EnrichARP = c.EnrichARP + } + return r +} + type LogConfig struct { Level string `json:"level"` // debug | info | warn | error Format string `json:"format"` // text | json diff --git a/internal/config/profile_test.go b/internal/config/profile_test.go new file mode 100644 index 0000000..d611a40 --- /dev/null +++ b/internal/config/profile_test.go @@ -0,0 +1,119 @@ +package config_test + +import ( + "testing" + "time" + + "github.com/Ronin48/NetworkInventoryAgent/internal/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestResolve_LegacyFlatSubnetsUseGlobals(t *testing.T) { + c := &config.ScannerConfig{ + Subnets: []string{"10.0.0.0/24", "192.168.1.0/24"}, + ScanInterval: config.Duration{Duration: 5 * time.Minute}, + Timeout: config.Duration{Duration: 2 * time.Second}, + ProbePorts: []int{22, 80, 443}, + DeepProbe: true, + EnrichARP: true, + } + got, err := c.Resolve() + require.NoError(t, err) + require.Len(t, got, 2) + for _, p := range got { + assert.Equal(t, 5*time.Minute, p.ScanInterval) + assert.Equal(t, 2*time.Second, p.Timeout) + assert.Equal(t, []int{22, 80, 443}, p.ProbePorts) + assert.True(t, p.DeepProbe, "global DeepProbe must propagate") + assert.True(t, p.EnrichARP, "global EnrichARP must propagate") + } +} + +func TestResolve_ProfileOverridesWin(t *testing.T) { + c := &config.ScannerConfig{ + Profiles: []config.SubnetProfile{ + { + Subnet: "10.0.0.0/24", + ScanInterval: config.Duration{Duration: time.Hour}, + ProbePorts: []int{443}, + DeepProbe: config.True(), + }, + { + Subnet: "192.168.1.0/24", + // Inherits everything from globals. + }, + }, + ScanInterval: config.Duration{Duration: 5 * time.Minute}, + Timeout: config.Duration{Duration: 2 * time.Second}, + ProbePorts: []int{22, 80, 443}, + DeepProbe: false, + } + got, err := c.Resolve() + require.NoError(t, err) + require.Len(t, got, 2) + + // First profile overrides. + assert.Equal(t, time.Hour, got[0].ScanInterval) + assert.Equal(t, []int{443}, got[0].ProbePorts) + assert.True(t, got[0].DeepProbe) + assert.Equal(t, 2*time.Second, got[0].Timeout, "Timeout inherits from global") + + // Second profile inherits everything. + assert.Equal(t, 5*time.Minute, got[1].ScanInterval) + assert.Equal(t, []int{22, 80, 443}, got[1].ProbePorts) + assert.False(t, got[1].DeepProbe) +} + +func TestResolve_ExplicitFalseBeatsGlobalTrue(t *testing.T) { + c := &config.ScannerConfig{ + Profiles: []config.SubnetProfile{ + {Subnet: "10.0.0.0/24", DeepProbe: config.False()}, + }, + ScanInterval: config.Duration{Duration: 5 * time.Minute}, + DeepProbe: true, // global ON, profile must turn it OFF + } + got, err := c.Resolve() + require.NoError(t, err) + require.Len(t, got, 1) + assert.False(t, got[0].DeepProbe, "pointer-bool False() must override the global True") +} + +func TestResolve_SubnetsAndProfilesMutuallyExclusive(t *testing.T) { + c := &config.ScannerConfig{ + Subnets: []string{"10.0.0.0/24"}, + Profiles: []config.SubnetProfile{{Subnet: "192.168.1.0/24"}}, + } + _, err := c.Resolve() + require.Error(t, err) + assert.Contains(t, err.Error(), "mutually exclusive") +} + +func TestResolve_DuplicateSubnetRejected(t *testing.T) { + c := &config.ScannerConfig{ + Profiles: []config.SubnetProfile{ + {Subnet: "10.0.0.0/24"}, + {Subnet: "10.0.0.0/24"}, + }, + } + _, err := c.Resolve() + require.Error(t, err) + assert.Contains(t, err.Error(), "listed twice") +} + +func TestResolve_EmptySubnetRejected(t *testing.T) { + c := &config.ScannerConfig{ + Profiles: []config.SubnetProfile{{Subnet: ""}}, + } + _, err := c.Resolve() + require.Error(t, err) +} + +func TestResolve_NoSubnetsOrProfilesIsValid(t *testing.T) { + c := &config.ScannerConfig{ + ScanInterval: config.Duration{Duration: 5 * time.Minute}, + } + got, err := c.Resolve() + require.NoError(t, err) + assert.Empty(t, got, "no-config deployment is allowed (watchdog-only mode)") +} diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 6e34211..a553dc4 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -110,10 +110,86 @@ func New(opts Options) *Scanner { } } +// SubnetOptions overrides scanner-level defaults for one Scan call. Every +// field is optional; the zero value inherits the default the scanner was +// constructed with. Per-subnet profile configuration plumbs these values +// in from the agent so an operator can scan critical infrastructure +// aggressively while leaving a guest network on the lazy default. +type SubnetOptions struct { + // Timeout overrides Options.Timeout for every dial in this scan. 0 + // = inherit. + Timeout time.Duration + // ProbePorts overrides Options.ProbePorts. nil = inherit. + ProbePorts []int + // DeepProbe overrides Options.DeepProbe. Pointer-bool so a profile + // can explicitly disable deep probing even when the scanner-level + // default is on. nil = inherit. + DeepProbe *bool + // DeepProbePorts overrides Options.DeepProbePorts. nil = inherit. + DeepProbePorts []int + // UDPPorts overrides Options.UDPPorts. nil = inherit. + UDPPorts []int + // EnrichARP overrides Options.EnrichARP. See DeepProbe re: pointer-bool. + EnrichARP *bool +} + +// effectiveOpts is the per-Scan resolved view — every Scanner-level +// default merged with the per-call SubnetOptions. Built once at the top +// of Scan and passed by value to the per-host goroutine. Saves repeating +// the "if zero use default" check at every read site. +type effectiveOpts struct { + timeout time.Duration + probePorts []int + deepProbe bool + deepProbePorts []int + udpPorts []int + enrichARP bool +} + +func (s *Scanner) resolve(opts SubnetOptions) effectiveOpts { + e := effectiveOpts{ + timeout: opts.Timeout, + probePorts: opts.ProbePorts, + deepProbePorts: opts.DeepProbePorts, + udpPorts: opts.UDPPorts, + deepProbe: s.deepProbe, + enrichARP: s.enrichARP, + } + if e.timeout == 0 { + e.timeout = s.timeout + } + if e.probePorts == nil { + e.probePorts = s.probePorts + } + if e.deepProbePorts == nil { + e.deepProbePorts = s.deepProbePorts + } + if e.udpPorts == nil { + e.udpPorts = s.udpPorts + } + if opts.DeepProbe != nil { + e.deepProbe = *opts.DeepProbe + } + if opts.EnrichARP != nil { + e.enrichARP = *opts.EnrichARP + } + // When deep probing is enabled but no ports given (per-call OR + // per-Scanner), fall back to the default deep port set rather than + // no-op. + if e.deepProbe && len(e.deepProbePorts) == 0 { + e.deepProbePorts = defaultDeepProbePorts + } + return e +} + // Scan probes every host in subnet (CIDR notation) for open TCP ports and // records each live host in the inventory. It returns the number of live hosts // found. The scan record in the DB is updated when the scan finishes. -func (s *Scanner) Scan(ctx context.Context, subnet string) (int, error) { +// +// Pass SubnetOptions{} to use the scanner-level defaults (this is the +// pre-26.15 behaviour). Per-subnet profiles populate fields to override. +func (s *Scanner) Scan(ctx context.Context, subnet string, opts SubnetOptions) (int, error) { + eo := s.resolve(opts) _, ipNet, err := net.ParseCIDR(subnet) if err != nil { return 0, fmt.Errorf("parse CIDR %q: %w", subnet, err) @@ -157,7 +233,7 @@ func (s *Scanner) Scan(ctx context.Context, subnet string) (int, error) { wg.Add(1) go func(addr string) { defer func() { <-s.sem; wg.Done() }() - openPort, ok := s.probe(ctx, addr) + openPort, ok := s.probe(ctx, addr, eo.timeout, eo.probePorts) if !ok { metrics.ProbeFailureTotal.Inc() return @@ -169,10 +245,10 @@ func (s *Scanner) Scan(ctx context.Context, subnet string) (int, error) { FirstSeen: startedAt, LastSeen: startedAt, } - if fp := fingerprint(ctx, addr, openPort, s.timeout); fp != "" { + if fp := fingerprint(ctx, addr, openPort, eo.timeout); fp != "" { host.OSFingerprint = fp } - if s.enrichARP { + if eo.enrichARP { if mac, vendor := lookupARP(addr); mac != "" { host.MACAddress = mac host.Vendor = vendor @@ -199,11 +275,11 @@ func (s *Scanner) Scan(ctx context.Context, subnet string) (int, error) { livenessService := host.OSFingerprint s.upsertPort(ctx, hostID, addr, openPort, models.TCP, models.StateOpen, livenessService, startedAt) } - if s.deepProbe && s.ports != nil { - openTCP = append(openTCP, s.deepScan(ctx, hostID, addr, openPort, startedAt)...) + if eo.deepProbe && s.ports != nil { + openTCP = append(openTCP, s.deepScan(ctx, hostID, addr, openPort, startedAt, eo.timeout, eo.deepProbePorts)...) } - if len(s.udpPorts) > 0 && s.ports != nil { - openUDP = s.udpScan(ctx, hostID, addr, startedAt) + if len(eo.udpPorts) > 0 && s.ports != nil { + openUDP = s.udpScan(ctx, hostID, addr, startedAt, eo.timeout, eo.udpPorts) } // Classify now that every probe stage has reported. A @@ -232,23 +308,27 @@ func (s *Scanner) Scan(ctx context.Context, subnet string) (int, error) { return count, nil } -// probe dials every configured probe port concurrently and returns the first -// port that answers, or (0, false) if all dials fail within s.timeout. -// Concurrent fan-out collapses worst-case latency from len(probePorts)*timeout -// to ~1*timeout for dead hosts — the original sequential probe could keep a +// probe dials every probe port concurrently and returns the first port +// that answers, or (0, false) if all dials fail within timeout. Concurrent +// fan-out collapses worst-case latency from len(probePorts)*timeout to +// ~1*timeout for dead hosts — the original sequential probe could keep a // /24 sweep running longer than the scan interval. -func (s *Scanner) probe(ctx context.Context, ip string) (int, bool) { - dialCtx, cancel := context.WithTimeout(ctx, s.timeout) +// +// Timeout and probePorts are passed by the caller (rather than read from +// Scanner state) so per-subnet profile overrides flow through without +// mutating the shared Scanner. +func (s *Scanner) probe(ctx context.Context, ip string, timeout time.Duration, probePorts []int) (int, bool) { + dialCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() type result struct { port int ok bool } - results := make(chan result, len(s.probePorts)) - d := net.Dialer{Timeout: s.timeout} + results := make(chan result, len(probePorts)) + d := net.Dialer{Timeout: timeout} - for _, port := range s.probePorts { + for _, port := range probePorts { go func(port int) { conn, err := d.DialContext(dialCtx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) if err != nil { @@ -261,7 +341,7 @@ func (s *Scanner) probe(ctx context.Context, ip string) (int, bool) { } var firstOpen int - for range s.probePorts { + for range probePorts { r := <-results if r.ok && firstOpen == 0 { firstOpen = r.port @@ -294,21 +374,19 @@ func (s *Scanner) upsertPort(ctx context.Context, hostID int64, ip string, port metrics.PortsUpsertedTotal.Inc() } -// deepScan dials each port in s.deepProbePorts (skipping the one already +// deepScan dials each port in deepProbePorts (skipping the one already // confirmed by the liveness probe), persists every successful dial, and // returns the list of newly-open ports so the classifier can see the full -// picture. The fan-out shares the global sem so deep probing does not blow -// past the configured Workers budget. Closed/filtered ports are -// intentionally NOT recorded — the ports table is a positive log of what's -// open, not an inverse-index of what isn't. -func (s *Scanner) deepScan(ctx context.Context, hostID int64, ip string, knownOpen int, ts time.Time) []int { - d := net.Dialer{Timeout: s.timeout} +// picture. timeout + deepProbePorts are caller-supplied so per-subnet +// profiles override. +func (s *Scanner) deepScan(ctx context.Context, hostID int64, ip string, knownOpen int, ts time.Time, timeout time.Duration, deepProbePorts []int) []int { + d := net.Dialer{Timeout: timeout} var ( mu sync.Mutex out []int wg sync.WaitGroup ) - for _, port := range s.deepProbePorts { + for _, port := range deepProbePorts { if port == knownOpen { continue } @@ -330,7 +408,7 @@ func (s *Scanner) deepScan(ctx context.Context, hostID int64, ip string, knownOp // the server (or client) to speak first — easier to // just redial inside fingerprint() than juggle a half- // initialised socket here. - service := fingerprint(ctx, ip, port, s.timeout) + service := fingerprint(ctx, ip, port, timeout) s.upsertPort(ctx, hostID, ip, port, models.TCP, models.StateOpen, service, ts) mu.Lock() out = append(out, port) @@ -341,23 +419,24 @@ func (s *Scanner) deepScan(ctx context.Context, hostID int64, ip string, knownOp return out } -// udpScan tries each UDP port in s.udpPorts and returns the list of UDP +// udpScan tries each UDP port in udpPorts and returns the list of UDP // ports that came back open (state=Open only — Closed responses are // persisted but excluded from the return so the classifier reasons about -// services, not negative observations). +// services, not negative observations). timeout + udpPorts are caller- +// supplied so per-subnet profiles override. // // Best-effort semantics: // - any bytes read back → Open // - connection-refused (Linux surfaces ICMP port-unreachable this way) → Closed // - anything else (no reply within timeout) → not recorded, since the // ambiguous case would otherwise dominate the ports table. -func (s *Scanner) udpScan(ctx context.Context, hostID int64, ip string, ts time.Time) []int { +func (s *Scanner) udpScan(ctx context.Context, hostID int64, ip string, ts time.Time, timeout time.Duration, udpPorts []int) []int { var ( mu sync.Mutex out []int wg sync.WaitGroup ) - for _, port := range s.udpPorts { + for _, port := range udpPorts { select { case s.sem <- struct{}{}: case <-ctx.Done(): @@ -366,7 +445,7 @@ func (s *Scanner) udpScan(ctx context.Context, hostID int64, ip string, ts time. wg.Add(1) go func(port int) { defer func() { <-s.sem; wg.Done() }() - state, ok := probeUDP(ctx, ip, port, s.timeout) + state, ok := probeUDP(ctx, ip, port, timeout) if !ok { return } diff --git a/internal/scanner/scanner_test.go b/internal/scanner/scanner_test.go index cc6ca75..f6f30b2 100644 --- a/internal/scanner/scanner_test.go +++ b/internal/scanner/scanner_test.go @@ -202,7 +202,7 @@ func newScanner(hosts *mockHostStore, scans *mockScanStore) *scanner.Scanner { func TestScanner_Scan_InvalidCIDR(t *testing.T) { s := newScanner(newMockHostStore(), newMockScanStore()) - _, err := s.Scan(t.Context(), "not-a-cidr") + _, err := s.Scan(t.Context(), "not-a-cidr", scanner.SubnetOptions{}) require.Error(t, err) assert.Contains(t, err.Error(), "parse CIDR") } @@ -216,7 +216,7 @@ func TestScanner_Scan_MaxHostsGuard(t *testing.T) { Workers: 4, MaxHosts: 5, }) - _, err := s.Scan(t.Context(), "192.168.1.0/24") + _, err := s.Scan(t.Context(), "192.168.1.0/24", scanner.SubnetOptions{}) require.Error(t, err) assert.Contains(t, err.Error(), "exceeds limit") } @@ -227,7 +227,7 @@ func TestScanner_Scan_ContextCancelled_CompletesGracefully(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) cancel() // already cancelled before the scan starts - _, err := s.Scan(ctx, "192.168.1.0/30") + _, err := s.Scan(ctx, "192.168.1.0/30", scanner.SubnetOptions{}) // A pre-cancelled context should not produce an error — the scan // exits early and the scan record is still finished normally. require.NoError(t, err) @@ -240,7 +240,7 @@ func TestScanner_Scan_CreatesAndFinishesScanRecord(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) cancel() // cancel so no probing actually happens - _, err := s.Scan(ctx, "192.168.1.0/30") + _, err := s.Scan(ctx, "192.168.1.0/30", scanner.SubnetOptions{}) require.NoError(t, err) scans.mu.Lock() @@ -284,7 +284,7 @@ func TestScanner_Scan_PersistsOpenPort(t *testing.T) { MaxHosts: 65535, }) - n, err := s.Scan(t.Context(), "127.0.0.1/32") + n, err := s.Scan(t.Context(), "127.0.0.1/32", scanner.SubnetOptions{}) require.NoError(t, err) if n == 0 { t.Skip("no probe port answered on 127.0.0.1; cannot exercise port persistence") @@ -341,7 +341,7 @@ func TestScanner_Scan_DeepProbePersistsExtraOpenPorts(t *testing.T) { DeepProbePorts: []int{livePort, deepPort}, // livePort skipped (already known open) }) - n, err := s.Scan(t.Context(), "127.0.0.1/32") + n, err := s.Scan(t.Context(), "127.0.0.1/32", scanner.SubnetOptions{}) require.NoError(t, err) require.Equal(t, 1, n) @@ -393,7 +393,7 @@ func TestScanner_Scan_PopulatesDeviceType(t *testing.T) { ProbePorts: []int{11211}, }) - n, err := s.Scan(t.Context(), "127.0.0.1/32") + n, err := s.Scan(t.Context(), "127.0.0.1/32", scanner.SubnetOptions{}) require.NoError(t, err) require.Equal(t, 1, n) @@ -417,7 +417,7 @@ func TestScanner_Scan_DoesNotProbeNetworkOrBroadcast(t *testing.T) { ctx, cancel := context.WithCancel(t.Context()) cancel() - _, err := s.Scan(ctx, "10.0.0.0/30") + _, err := s.Scan(ctx, "10.0.0.0/30", scanner.SubnetOptions{}) require.NoError(t, err) // With a pre-cancelled context nothing is probed, but verify we stored