Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,53 @@ _No unreleased changes._

---

## 26.11 — 2026-05-27

Device-type classifier (P2-03 from the operator-feedback queue). Populates
`Host.DeviceType` automatically based on the OUI vendor, OS-fingerprint
banner, and the open TCP + UDP ports found during the scan. The admin
console templates already had the field plumbed; this sprint just gives
them data to show.

### Added

- **`internal/scanner/classify.go`** — pure heuristic function with
rules for printers (9100/631/515), routers (MikroTik + vendor-pinned
Cisco), hypervisors (VMware + 902/5988/5989), databases (MySQL,
Postgres, MSSQL, MongoDB, Redis, Memcached), Windows hosts/servers
(SMB ± HTTP), Active Directory domain controllers (Kerberos+LDAP),
mail servers (SMTP+IMAP combo), DNS servers, MQTT brokers, embedded
systems (Raspberry Pi OUI), Linux hosts (SSH banner), and generic
web appliances. Conservative: returns "" rather than guessing when
no rule fires confidently.
- **`internal/scanner/classify_test.go`** — 27 table-driven cases
covering every rule above, including overlapping cases (DC beats
generic SMB, SMB+webserver beats SMB alone).
- **`scanner.Scan` integration test** — confirms a host listening on
11211 (memcached) gets `DeviceType = "database (memcached)"` after a
real scan, exercising the full classify → re-upsert path.

### Changed

- **`scanner.deepScan` and `scanner.udpScan`** now return the list of
open ports they found (in addition to upserting them) so the per-
host goroutine can pass the complete port set to `classify()`. No
behavioural change for callers that ignore the return value.
- **Per-host scan path** does a second `hosts.Upsert` when the
classifier produces a non-empty device-type. The cost is one extra
small SQL write per live host per cycle.

### Fixed

- **`mockHostStore.Upsert` now mirrors the sqlite UPSERT** — it
previously returned the existing row unchanged on conflict, which
meant the scanner's "first upsert without device_type, then
re-upsert with device_type" path tested green against the mock but
would have lost the device_type write against any real store. Mock
parity caught the bug before it shipped.

---

## 26.10 — 2026-05-27

Container distribution. Adds multi-arch Docker images on the GitHub
Expand Down
153 changes: 153 additions & 0 deletions internal/scanner/classify.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package scanner

import (
"strings"
)

// classify returns a coarse device-type label for a host given its OUI
// vendor (from ARP), its OS fingerprint banner, and the TCP + UDP ports
// that came back open during the scan. Returns "" if no rule matches —
// the admin UI hides empty device-type cells rather than guessing.
//
// Ordering matters: more-specific rules fire first. The classifier is
// deliberately conservative — we'd rather say "" than mislabel an
// asset (a wrong "printer" tag on a server is worse than no tag).
func classify(vendor, osfp string, tcp, udp []int) string {
tcpSet := indexSet(tcp)
udpSet := indexSet(udp)
vlow := strings.ToLower(vendor)
osfpLow := strings.ToLower(osfp)

// ── Print servers ─────────────────────────────────────────────
// JetDirect raw-print 9100, IPP 631, LPD 515 — any one of these
// is a strong signal even on its own.
if tcpSet[9100] || tcpSet[631] || tcpSet[515] {
return "printer"
}

// ── Network infrastructure ────────────────────────────────────
// MikroTik RouterOS exposes 8728/8729 (API) and 22+80+443 are the
// common management surfaces. The vendor check pins it.
if strings.Contains(vlow, "mikrotik") || tcpSet[8728] || tcpSet[8729] {
return "router"
}
if strings.Contains(vlow, "cisco") && (tcpSet[22] || tcpSet[23] || tcpSet[80] || tcpSet[443]) {
return "router"
}

// ── Hypervisors ───────────────────────────────────────────────
// ESXi exposes 902 (vsphere SDK) and the host typically has many
// ports including 22, 80, 443, 5988/5989 (CIM).
if strings.Contains(vlow, "vmware") && (tcpSet[902] || tcpSet[5988] || tcpSet[5989]) {
return "hypervisor"
}

// ── Database servers ──────────────────────────────────────────
// Database ports are uniquely strong signals — almost nothing
// else listens on 3306, 5432, 1433, 27017, 6379.
switch {
case tcpSet[3306]:
return "database (mysql)"
case tcpSet[5432]:
return "database (postgres)"
case tcpSet[1433]:
return "database (mssql)"
case tcpSet[27017]:
return "database (mongodb)"
case tcpSet[6379]:
return "database (redis)"
case tcpSet[11211]:
return "database (memcached)"
}

// ── Active Directory / Windows DC ─────────────────────────────
// Kerberos (88) + LDAP (389) is the signature; SMB and DNS
// usually ride along but aren't required. Must fire BEFORE the
// generic SMB rule below so a DC isn't mislabelled as a
// plain windows-host.
if tcpSet[88] && tcpSet[389] {
return "windows-dc"
}

// ── Windows hosts ─────────────────────────────────────────────
// SMB (445) is the giveaway. RDP (3389) and WinRM (5985/5986)
// reinforce. Distinguish server (also serves HTTP) from workstation.
if tcpSet[445] || tcpSet[5985] || tcpSet[5986] {
if tcpSet[80] || tcpSet[443] || tcpSet[25] {
return "windows-server"
}
return "windows-host"
}
// RDP alone (without SMB) is often a Windows VM with the
// firewall blocking SMB — still a Windows host.
if tcpSet[3389] {
return "windows-host"
}

// ── Mail servers ──────────────────────────────────────────────
// SMTP (25/587/465) + IMAP (143/993) or POP3 (110/995) is the
// classic combo. SMTP alone is enough on a non-workstation.
if (tcpSet[25] || tcpSet[465] || tcpSet[587]) &&
(tcpSet[143] || tcpSet[993] || tcpSet[110] || tcpSet[995]) {
return "mail-server"
}

// ── DNS servers ───────────────────────────────────────────────
// DNS on TCP/53 OR UDP/53. Many devices answer recursive DNS;
// require it to NOT also look like a Windows DC (which has 53 +
// 88 Kerberos + 389 LDAP).
if (tcpSet[53] || udpSet[53]) && !tcpSet[88] && !tcpSet[389] {
return "dns-server"
}

// ── IoT / embedded ────────────────────────────────────────────
// Raspberry Pi OUI + SSH-only is the canonical pi-hole / sensor
// shape. MQTT brokers (1883/8883) are clear IoT signals.
if tcpSet[1883] || tcpSet[8883] {
return "iot-broker"
}
if strings.Contains(vlow, "raspberry pi") {
return "embedded"
}

// ── SSH-banner-driven OS hints ────────────────────────────────
// fingerprint() records the SSH greeting as the OS fingerprint
// when port 22 answered. OpenSSH on Linux is the bulk of these.
if strings.HasPrefix(osfpLow, "ssh-") {
switch {
case strings.Contains(osfpLow, "openssh") && !strings.Contains(osfpLow, "windows"):
return "linux-host"
case strings.Contains(osfpLow, "openssh_for_windows"):
return "windows-host"
}
}

// ── Generic web server ────────────────────────────────────────
// HTTP(S) without any host-shell port (no 22/23/3389/445) is
// most often an appliance or load balancer.
if (tcpSet[80] || tcpSet[443] || tcpSet[8080] || tcpSet[8443]) &&
!tcpSet[22] && !tcpSet[23] && !tcpSet[3389] && !tcpSet[445] {
return "appliance"
}

// ── Generic Linux host (SSH only) ─────────────────────────────
if tcpSet[22] && len(tcp) <= 3 {
return "linux-host"
}

return ""
}

// indexSet builds a port-membership lookup. Cheap to construct (these
// slices have <50 entries in practice) and much clearer at the call
// site than a Contains helper per check.
func indexSet(ports []int) map[int]bool {
if len(ports) == 0 {
return nil
}
m := make(map[int]bool, len(ports))
for _, p := range ports {
m[p] = true
}
return m
}
164 changes: 164 additions & 0 deletions internal/scanner/classify_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package scanner

import "testing"

func TestClassify(t *testing.T) {
cases := []struct {
name string
vendor string
osfp string
tcp []int
udp []int
want string
}{
{
name: "printer JetDirect 9100",
tcp: []int{9100},
want: "printer",
},
{
name: "printer IPP 631 alone",
tcp: []int{631},
want: "printer",
},
{
name: "mikrotik router by vendor",
vendor: "Routerboard.com (MikroTik)",
tcp: []int{22, 80, 443},
want: "router",
},
{
name: "mikrotik router by API port even without vendor",
tcp: []int{8728},
want: "router",
},
{
name: "cisco router",
vendor: "Cisco",
tcp: []int{22, 80, 443},
want: "router",
},
{
name: "esxi hypervisor",
vendor: "VMware",
tcp: []int{22, 80, 443, 902, 5989},
want: "hypervisor",
},
{
name: "mysql database",
tcp: []int{22, 3306},
want: "database (mysql)",
},
{
name: "postgres database",
tcp: []int{5432, 22},
want: "database (postgres)",
},
{
name: "mssql database",
tcp: []int{1433},
want: "database (mssql)",
},
{
name: "mongodb",
tcp: []int{27017},
want: "database (mongodb)",
},
{
name: "redis",
tcp: []int{6379},
want: "database (redis)",
},
{
name: "windows host with SMB only",
tcp: []int{445, 135, 139},
want: "windows-host",
},
{
name: "windows server (SMB+IIS)",
tcp: []int{445, 80, 443, 3389},
want: "windows-server",
},
{
name: "windows host with RDP only",
tcp: []int{3389},
want: "windows-host",
},
{
name: "mail server (smtp+imap)",
tcp: []int{25, 143, 993, 587},
want: "mail-server",
},
{
name: "windows-dc with SMB still wins over generic windows",
tcp: []int{53, 88, 389, 445},
want: "windows-dc",
},
{
name: "active directory without SMB",
tcp: []int{53, 88, 389, 636},
want: "windows-dc",
},
{
name: "dns server (tcp)",
tcp: []int{53, 22},
want: "dns-server",
},
{
name: "dns server (udp)",
udp: []int{53},
want: "dns-server",
},
{
name: "mqtt broker",
tcp: []int{1883, 22},
want: "iot-broker",
},
{
name: "raspberry pi by vendor + ssh",
vendor: "Raspberry Pi Foundation",
tcp: []int{22},
want: "embedded",
},
{
name: "linux host (openssh banner + ssh only)",
osfp: "SSH-2.0-OpenSSH_9.6p1 Ubuntu-3ubuntu13.5",
tcp: []int{22},
want: "linux-host",
},
{
name: "appliance (https only, no shell port)",
tcp: []int{443},
want: "appliance",
},
{
name: "appliance (http+https only)",
tcp: []int{80, 443},
want: "appliance",
},
{
name: "ssh-only no banner falls through to linux-host",
tcp: []int{22},
want: "linux-host",
},
{
name: "no match returns empty",
tcp: []int{4242},
want: "",
},
{
name: "empty everything returns empty",
want: "",
},
}

for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
got := classify(c.vendor, c.osfp, c.tcp, c.udp)
if got != c.want {
t.Errorf("classify(vendor=%q, osfp=%q, tcp=%v, udp=%v) = %q; want %q",
c.vendor, c.osfp, c.tcp, c.udp, got, c.want)
}
})
}
}
Loading
Loading