diff --git a/ChangeLog.md b/ChangeLog.md index cbd839b..eb826dd 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -17,6 +17,65 @@ _No unreleased changes._ --- +## 26.12 — 2026-05-27 + +Service / application discovery (P2-01). Turns "port 22 is open" into +"SSH-2.0-OpenSSH_9.6p1 Ubuntu", and similar for nine more protocols. +Banners now flow into `Port.Service` for every persisted port, in addition +to the existing `Host.OSFingerprint` for the liveness winner. + +### Added + +- **`internal/scanner/banner.go`** — three new banner-grab strategies: + - `lineBanner` for protocols where the server greets first (SMTP + 25/465/587, FTP 21, POP3 110, IMAP 143, Telnet 23). Bounded + `capReader` defends against peers that flood without an EOL. + - `tlsHTTPSFingerprint` for HTTPS (443/8443). Completes a TLS + handshake (InsecureSkipVerify — we're scraping for ID, not + trusting), peeks at the peer cert CN/SAN, then reuses the same + connection for a HEAD to capture the Server header. Two IDs for + the cost of one dial. + - `mysqlGreeting` for MySQL/MariaDB/Percona (3306). Reads the v10 + handshake packet and extracts the server-version string. Passive + — we never write to the socket. +- **HTTP port list expanded** to include 8000 and 8888 (common + developer-server defaults). +- **`Port.Service` populated by the scanner** for every TCP port + upserted by the liveness, deep-probe, and HTTPS-fingerprint paths. + The column has existed in the schema since the initial migration + but was never written until now. +- **`internal/scanner/banner_test.go`** — full coverage of every + banner: SMTP greeting parse, FTP greeting parse, silent-server + timeout (must return ""), valid v10 MySQL handshake parse, wrong + protover MySQL guard, HTTPS handshake with cert CN extraction + + Server header pickup via a `httptest.NewUnstartedServer`. + +### Changed + +- **`scanner.upsertPort` gained a `service string` parameter.** The + three existing call sites (liveness, deepScan, udpScan) pass the + result of `fingerprint()` for TCP and `""` for UDP. UDP banner + probes are protocol-specific and out of scope for this sprint. +- **`fingerprint()` dispatch table grew** from 4 entries to 12 — see + the function comment for the full list. + +### Notes + +- The liveness path no longer redials for its banner: `host.OSFingerprint` + was already populated by `fingerprint()` before the port upsert, so + the same string is reused as the liveness-port `Service`. +- The deepScan path *does* redial inside `fingerprint()` per open + port. The first dial in `deepScan` was a connect-and-close to + confirm liveness; protocols where the server speaks first need a + fresh socket so the read deadline starts cleanly. The cost is one + extra dial per deep-open port per cycle — well within the global + worker semaphore. +- UDP services (DNS 53, SNMP 161, NTP 123, …) are not banner-grabbed + yet. Each requires a protocol-specific request packet rather than a + passive listen; deferred to a future sprint if asked. + +--- + ## 26.11 — 2026-05-27 Device-type classifier (P2-03 from the operator-feedback queue). Populates diff --git a/internal/scanner/banner.go b/internal/scanner/banner.go new file mode 100644 index 0000000..872a78e --- /dev/null +++ b/internal/scanner/banner.go @@ -0,0 +1,176 @@ +package scanner + +import ( + "bufio" + "context" + "crypto/tls" + "fmt" + "net" + "net/http" + "strconv" + "strings" + "time" +) + +// maxBannerBytes caps how much we read from each service. The longest +// realistic line banner is an Apache 2.4 Server header at ~120 bytes; +// MySQL greetings cap at the protocol-defined 256-byte packet. +const maxBannerBytes = 512 + +// lineBanner reads up to the first CR/LF or maxBannerBytes from a service +// that greets the client without prompting (SMTP, FTP, POP3, IMAP, Telnet). +// The label prefix ("SMTP: ", "FTP: ", …) keeps the stored string +// self-describing — operators inspecting Port.Service shouldn't have to +// look up the port number to know which protocol the banner came from. +func lineBanner(ctx context.Context, ip string, port int, timeout time.Duration, label string) string { + d := net.Dialer{Timeout: timeout} + conn, err := d.DialContext(ctx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return "" + } + defer func() { _ = conn.Close() }() + + _ = conn.SetReadDeadline(time.Now().Add(timeout)) + line, err := bufio.NewReader(&capReader{r: conn, n: maxBannerBytes}).ReadString('\n') + if err != nil && line == "" { + return "" + } + line = strings.TrimRight(line, "\r\n") + if line == "" { + return "" + } + return label + ": " + line +} + +// tlsHTTPSFingerprint completes a TLS handshake to harvest the peer +// certificate's CN/SAN (which often identifies the appliance vendor — +// e.g. "*.unifi.example.com" → Ubiquiti), then sends a HEAD over the +// already-established connection to capture the Server header. Two pieces +// of identification for the cost of one connection. +// +// InsecureSkipVerify is intentional: self-signed and expired certs are the +// rule, not the exception, on internal inventory targets. We're not +// trusting the cert for security — we're scraping it for identification. +func tlsHTTPSFingerprint(ctx context.Context, ip string, port int, timeout time.Duration) string { + dialer := &tls.Dialer{ + NetDialer: &net.Dialer{Timeout: timeout}, + Config: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // intentional, see comment + MinVersion: tls.VersionTLS10, + }, + } + conn, err := dialer.DialContext(ctx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return "" + } + defer func() { _ = conn.Close() }() + + var certInfo string + if tlsConn, ok := conn.(*tls.Conn); ok { + state := tlsConn.ConnectionState() + if len(state.PeerCertificates) > 0 { + cert := state.PeerCertificates[0] + if cert.Subject.CommonName != "" { + certInfo = "CN=" + cert.Subject.CommonName + } else if len(cert.DNSNames) > 0 { + certInfo = "SAN=" + cert.DNSNames[0] + } + } + } + + // HEAD over the existing TLS conn so we don't burn a second dial. + _ = conn.SetDeadline(time.Now().Add(timeout)) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, + fmt.Sprintf("https://%s/", net.JoinHostPort(ip, strconv.Itoa(port))), nil) + if err != nil && certInfo == "" { + return "" + } + var serverInfo string + if err == nil { + if writeErr := req.Write(conn); writeErr == nil { + resp, readErr := http.ReadResponse(bufio.NewReader(conn), req) + if readErr == nil { + _ = resp.Body.Close() + if s := resp.Header.Get("Server"); s != "" { + serverInfo = "Server=" + s + } + } + } + } + + switch { + case certInfo != "" && serverInfo != "": + return "HTTPS: " + certInfo + " " + serverInfo + case serverInfo != "": + return "HTTPS: " + serverInfo + case certInfo != "": + return "HTTPS: " + certInfo + default: + return "" + } +} + +// mysqlGreeting reads MySQL's initial handshake packet. The protocol is +// docs.oracle.com/cd/E17952_01/mysql-8.0-en/connection-phase-packets- +// protocol-handshake-v10.html — for our purposes: +// +// bytes 0..2 : packet length (LE) +// byte 3 : packet number (always 0 for greeting) +// byte 4 : protocol version (almost always 10) +// bytes 5.. : null-terminated server version string +// +// We don't need to parse past the version string. The handshake is the +// server's first packet — we never send anything, so this is passive. +func mysqlGreeting(ctx context.Context, ip string, port int, timeout time.Duration) string { + d := net.Dialer{Timeout: timeout} + conn, err := d.DialContext(ctx, "tcp", net.JoinHostPort(ip, strconv.Itoa(port))) + if err != nil { + return "" + } + defer func() { _ = conn.Close() }() + + _ = conn.SetReadDeadline(time.Now().Add(timeout)) + buf := make([]byte, 256) + n, err := conn.Read(buf) + if err != nil || n < 6 { + return "" + } + // Header sanity: byte 4 is the protocol version. MySQL has used + // version 10 since the 3.21 era (~1998). MariaDB and Percona too. + if buf[4] != 10 { + return "" + } + // Version string starts at byte 5, null-terminated, capped at the + // remainder of the read so we don't run off the end if the server + // sends less than expected. + end := 5 + for end < n && buf[end] != 0 { + end++ + } + if end == 5 { + return "" + } + return "MySQL: " + string(buf[5:end]) +} + +// capReader wraps an io.Reader with a hard byte cap, defended against a +// peer that sends data without an end-of-line for longer than we'd want +// to wait. Used by lineBanner. +type capReader struct { + r interface { + Read([]byte) (int, error) + } + n int +} + +func (c *capReader) Read(p []byte) (int, error) { + if c.n <= 0 { + return 0, fmt.Errorf("banner exceeded %d bytes", maxBannerBytes) + } + if len(p) > c.n { + p = p[:c.n] + } + n, err := c.r.Read(p) + c.n -= n + return n, err +} diff --git a/internal/scanner/banner_test.go b/internal/scanner/banner_test.go new file mode 100644 index 0000000..9cc755a --- /dev/null +++ b/internal/scanner/banner_test.go @@ -0,0 +1,234 @@ +package scanner + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/base64" + "encoding/binary" + "math/big" + "net" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func TestLineBanner_SMTP(t *testing.T) { + greeting := "220 mx.example.com ESMTP Postfix (Debian)\r\n" + addr := startLineGreeting(t, greeting) + host, portStr, err := net.SplitHostPort(addr) + if err != nil { + t.Fatal(err) + } + port := atoi(t, portStr) + + got := lineBanner(context.Background(), host, port, 500*time.Millisecond, "SMTP") + if got != "SMTP: 220 mx.example.com ESMTP Postfix (Debian)" { + t.Errorf("got %q", got) + } +} + +func TestLineBanner_FTP(t *testing.T) { + addr := startLineGreeting(t, "220 (vsFTPd 3.0.3)\r\n") + host, portStr, _ := net.SplitHostPort(addr) + got := lineBanner(context.Background(), host, atoi(t, portStr), 500*time.Millisecond, "FTP") + if got != "FTP: 220 (vsFTPd 3.0.3)" { + t.Errorf("got %q", got) + } +} + +func TestLineBanner_EmptyServerReturnsEmpty(t *testing.T) { + // A server that accepts but never writes — lineBanner should time + // out and return "" rather than hang the scan loop. + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + defer ln.Close() + go func() { + for { + c, err := ln.Accept() + if err != nil { + return + } + _ = c // hold open without writing + } + }() + + _, portStr, _ := net.SplitHostPort(ln.Addr().String()) + got := lineBanner(context.Background(), "127.0.0.1", atoi(t, portStr), 100*time.Millisecond, "SMTP") + if got != "" { + t.Errorf("expected empty banner on silent server, got %q", got) + } +} + +func TestMySQLGreeting(t *testing.T) { + // Hand-build a realistic v10 handshake: payload = [protover, version-string, NUL, ...] + // We don't need the full handshake — just enough for the parser. + version := "8.0.35-MySQL Community Server - GPL" + payload := append([]byte{10}, version...) + payload = append(payload, 0) // null terminator + + // 4-byte packet header: 3 bytes LE length + 1 byte seq number + pkt := make([]byte, 4+len(payload)) + binary.LittleEndian.PutUint32(pkt, uint32(len(payload))) + pkt[3] = 0 // seq + copy(pkt[4:], payload) + + addr := startRawWrite(t, pkt) + host, portStr, _ := net.SplitHostPort(addr) + got := mysqlGreeting(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "MySQL: 8.0.35-MySQL Community Server - GPL" { + t.Errorf("got %q", got) + } +} + +func TestMySQLGreeting_WrongProtocolVersion(t *testing.T) { + // Same shape but protocol version != 10 → must return "". + addr := startRawWrite(t, append([]byte{4, 0, 0, 0, 9}, "x"...)) + host, portStr, _ := net.SplitHostPort(addr) + got := mysqlGreeting(context.Background(), host, atoi(t, portStr), 500*time.Millisecond) + if got != "" { + t.Errorf("expected empty (wrong protover) got %q", got) + } +} + +func TestTLSHTTPSFingerprint(t *testing.T) { + // Self-signed cert with a recognisable CN so we can assert the CN + // makes it into the result string. + cert, key := selfSignedCert(t, "router.example.com") + tlsCert, err := tls.X509KeyPair(cert, key) + if err != nil { + t.Fatal(err) + } + + srv := httptest.NewUnstartedServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Server", "TestAppliance/1.0") + w.WriteHeader(http.StatusOK) + })) + srv.TLS = &tls.Config{ + Certificates: []tls.Certificate{tlsCert}, + MinVersion: tls.VersionTLS12, + } + srv.StartTLS() + defer srv.Close() + + host, portStr, _ := net.SplitHostPort(strings.TrimPrefix(srv.URL, "https://")) + got := tlsHTTPSFingerprint(context.Background(), host, atoi(t, portStr), 2*time.Second) + + if !strings.Contains(got, "HTTPS:") { + t.Errorf("expected HTTPS: prefix, got %q", got) + } + if !strings.Contains(got, "CN=router.example.com") { + t.Errorf("expected CN in result, got %q", got) + } + if !strings.Contains(got, "Server=TestAppliance/1.0") { + t.Errorf("expected Server header in result, got %q", got) + } +} + +// --- helpers --- + +func startLineGreeting(t *testing.T, line string) (addr string) { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = ln.Close() }) + go func() { + for { + c, err := ln.Accept() + if err != nil { + return + } + _, _ = c.Write([]byte(line)) + _ = c.Close() + } + }() + return ln.Addr().String() +} + +func startRawWrite(t *testing.T, blob []byte) (addr string) { + t.Helper() + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = ln.Close() }) + go func() { + for { + c, err := ln.Accept() + if err != nil { + return + } + _, _ = c.Write(blob) + _ = c.Close() + } + }() + return ln.Addr().String() +} + +func atoi(t *testing.T, s string) int { + t.Helper() + var v int + for _, r := range s { + v = v*10 + int(r-'0') + } + return v +} + +func selfSignedCert(t *testing.T, cn string) (certPEM, keyPEM []byte) { + t.Helper() + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatal(err) + } + tmpl := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: cn}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + IsCA: false, + BasicConstraintsValid: true, + IPAddresses: []net.IP{net.ParseIP("127.0.0.1")}, + DNSNames: []string{cn, "localhost"}, + } + der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &priv.PublicKey, priv) + if err != nil { + t.Fatal(err) + } + keyDER, err := x509.MarshalPKCS8PrivateKey(priv) + if err != nil { + t.Fatal(err) + } + certPEM = pemBlock("CERTIFICATE", der) + keyPEM = pemBlock("PRIVATE KEY", keyDER) + return +} + +func pemBlock(typ string, der []byte) []byte { + // Tiny inline PEM encoder so we don't add a new test-time dep. + const lineLen = 64 + b64 := base64Encode(der) + out := "-----BEGIN " + typ + "-----\n" + for i := 0; i < len(b64); i += lineLen { + end := i + lineLen + if end > len(b64) { + end = len(b64) + } + out += b64[i:end] + "\n" + } + out += "-----END " + typ + "-----\n" + return []byte(out) +} + +func base64Encode(b []byte) string { return base64.StdEncoding.EncodeToString(b) } diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index c1fa006..6e34211 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -193,7 +193,11 @@ func (s *Scanner) Scan(ctx context.Context, subnet string) (int, error) { var openUDP []int if s.ports != nil { - s.upsertPort(ctx, hostID, addr, openPort, models.TCP, models.StateOpen, startedAt) + // Reuse the host-level fingerprint string when the + // liveness port matches — it's the same banner. + // Saves a redial. + livenessService := host.OSFingerprint + s.upsertPort(ctx, hostID, addr, openPort, models.TCP, models.StateOpen, livenessService, startedAt) } if s.deepProbe && s.ports != nil { openTCP = append(openTCP, s.deepScan(ctx, hostID, addr, openPort, startedAt)...) @@ -271,13 +275,14 @@ func (s *Scanner) probe(ctx context.Context, ip string) (int, bool) { } // upsertPort writes one row to the ports store and increments the metrics. -// Pulled out of the hot loop to keep the per-host goroutine readable now -// that we have three port-producing stages (liveness, deep TCP, UDP). -func (s *Scanner) upsertPort(ctx context.Context, hostID int64, ip string, port int, proto models.Protocol, state models.PortState, ts time.Time) { +// service is the protocol-specific banner from fingerprint() — empty when +// no probe applies or the probe failed; the schema allows it. +func (s *Scanner) upsertPort(ctx context.Context, hostID int64, ip string, port int, proto models.Protocol, state models.PortState, service string, ts time.Time) { if err := s.ports.Upsert(ctx, &models.Port{ HostID: hostID, Number: port, Protocol: proto, + Service: service, State: state, FirstSeen: ts, LastSeen: ts, @@ -320,7 +325,13 @@ func (s *Scanner) deepScan(ctx context.Context, hostID int64, ip string, knownOp return } _ = conn.Close() - s.upsertPort(ctx, hostID, ip, port, models.TCP, models.StateOpen, ts) + // Probe again for the protocol-specific banner. Open + // dial confirmed liveness, but most protocols expect + // the server (or client) to speak first — easier to + // just redial inside fingerprint() than juggle a half- + // initialised socket here. + service := fingerprint(ctx, ip, port, s.timeout) + s.upsertPort(ctx, hostID, ip, port, models.TCP, models.StateOpen, service, ts) mu.Lock() out = append(out, port) mu.Unlock() @@ -359,7 +370,10 @@ func (s *Scanner) udpScan(ctx context.Context, hostID int64, ip string, ts time. if !ok { return } - s.upsertPort(ctx, hostID, ip, port, models.UDP, state, ts) + // UDP banner-grabs are protocol-specific (DNS query + // for 53, SNMP get for 161, …). Out of scope here; + // Service stays empty for UDP for now. + s.upsertPort(ctx, hostID, ip, port, models.UDP, state, "", ts) if state == models.StateOpen { metrics.UDPProbeSuccessTotal.Inc() mu.Lock() @@ -415,21 +429,39 @@ func reverseDNS(ctx context.Context, ip string) string { return strings.TrimSuffix(names[0], ".") } -// fingerprint is a best-effort banner grab for the port that answered the -// liveness probe. For SSH (22) we read the first line of the protocol -// greeting; for HTTP (80, 8080) we send a minimal HEAD request and capture -// the Server header. Anything else, or any error, returns "" so the field -// stays absent rather than misleadingly populated. +// fingerprint dispatches to a port-specific banner-grab helper. Returns "" +// for unknown ports, dial failures, protocol-decode errors, or any other +// uncertainty — the inventory shows the field blank rather than mislabel. +// +// The result is used for two storage paths: +// - Host.OSFingerprint, for the liveness-winner port only (preserves +// pre-26.12 semantics; the classifier uses this string to refine +// Linux/Windows decisions). +// - Port.Service, for every port the scanner persists. +// +// One function, two storage targets — a banner for port 22 is both an OS +// hint and a service identification, so duplicating logic to separate the +// two concerns would just produce drift. func fingerprint(ctx context.Context, ip string, port int, timeout time.Duration) string { switch port { case 22: return sshBanner(ctx, ip, port, timeout) - case 80, 8080: + case 21: + return lineBanner(ctx, ip, port, timeout, "FTP") + case 23: + return lineBanner(ctx, ip, port, timeout, "Telnet") + case 25, 465, 587: + return lineBanner(ctx, ip, port, timeout, "SMTP") + case 110: + return lineBanner(ctx, ip, port, timeout, "POP3") + case 143: + return lineBanner(ctx, ip, port, timeout, "IMAP") + case 80, 8080, 8000, 8888: return httpServerHeader(ctx, ip, port, timeout, "http") - case 443: - // TLS handshake would be needed for 443; skip rather than dial - // twice. A future deep-probe pass can do this properly. - return "" + case 443, 8443: + return tlsHTTPSFingerprint(ctx, ip, port, timeout) + case 3306: + return mysqlGreeting(ctx, ip, port, timeout) default: return "" }