diff --git a/README.md b/README.md index 223861c..e70d08b 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ As of June 2017 we have a first working version and we are trying to get the con Petze is configured through a set of yaml configuration files. The path to folder containing the configuration is passed to petze as the first argument on the commandline. ```bash -$ petze path/to/petzconf +$ petze path/to/confdir ``` Take a look at a simple example config: https://github.com/foomo/petze/tree/master/exampleConfig @@ -35,6 +35,18 @@ Take a look at a simple example config: https://github.com/foomo/petze/tree/mast The configuration file for petze is called: **petze.yml**. It provides information for the petze service, as well the configuration for your notifications. +The hierarchy of the config folder should look as follows: +``` +exampleConfigs/ + petze.yml + services/ + spiegel.yml + google.yml + bestbytes.yml + hosts/ + example.yml +``` + ## Main config file petze.yml ```yaml @@ -76,7 +88,7 @@ sms: ## Service configuration files -Any other files with a .yml suffix will be treated as service configurations. +Files in the services subdirectory with a .yml suffix will be treated as service configuration files. It is strongly encouraged to organize them in folder structures. These will be reflected in the service ids. @@ -134,6 +146,27 @@ session: - matchReply: "asdf" ``` + +## Host Configuration files + +Files in the hosts subdirectory with a .yml suffix will be treated as host configuration files. + +```yaml +# host name to check +hostname: example.hostname.com + +# interval of the ICMP echo request +interval: 10s + +# ICMP echo request timeout +timeout: 5s + +# services that are running on this host +services: + - spiegel + - google +``` + ## SMTP Integration You can now get notifications by Mail, all you need to provide is an SMTP server! diff --git a/collector/collector.go b/collector/collector.go index b143101..2229f88 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -10,27 +10,40 @@ import ( log "github.com/sirupsen/logrus" ) -type ResultListener func(watch.Result) +type ServiceResultListener func(watch.ServiceResult) +type HostResultListener func(watch.HostResult) -// Collector collects stats on services +// Collector collects stats on hosts & services type Collector struct { - servicesConfigDir string - chanServices chan map[string]*config.Service - chanGetResults chan map[string][]watch.Result - watchers map[string]*watch.Watcher - resultListeners []ResultListener - services map[string]*config.Service + servicesConfigDir string + hostsConfigDir string + chanServices chan map[string]*config.Service + chanHosts chan map[string]*config.Host + chanGetServiceResults chan map[string][]watch.ServiceResult + chanGetHostResults chan map[string][]watch.HostResult + serviceWatchers map[string]*watch.ServiceWatcher + hostWatchers map[string]*watch.HostWatcher + serviceResultListeners []ServiceResultListener + hostResultListeners []HostResultListener + services map[string]*config.Service + hosts map[string]*config.Host } // NewCollector construct a collector - it will watch its config files for changes -func NewCollector(servicesConfigDir string) (c *Collector, err error) { +func NewCollector(servicesConfigDir string, hostsConfigDir string) (c *Collector, err error) { c = &Collector{ - servicesConfigDir: servicesConfigDir, - services: make(map[string]*config.Service), - chanServices: make(chan map[string]*config.Service), - chanGetResults: make(chan map[string][]watch.Result), - watchers: make(map[string]*watch.Watcher), - resultListeners: make([]ResultListener, 0), + servicesConfigDir: servicesConfigDir, + hostsConfigDir: hostsConfigDir, + services: make(map[string]*config.Service), + hosts: make(map[string]*config.Host), + chanServices: make(chan map[string]*config.Service), + chanHosts: make(chan map[string]*config.Host), + chanGetServiceResults: make(chan map[string][]watch.ServiceResult), + chanGetHostResults: make(chan map[string][]watch.HostResult), + serviceWatchers: make(map[string]*watch.ServiceWatcher), + hostWatchers: make(map[string]*watch.HostWatcher), + serviceResultListeners: make([]ServiceResultListener, 0), + hostResultListeners: make([]HostResultListener, 0), } return c, nil @@ -42,95 +55,168 @@ func (c *Collector) Start() { go c.configWatch() } -const maxResults = 1000 +const maxServiceResults = 1000 +const maxHostResults = 1000 -func (c *Collector) RegisterListener(listener ResultListener) { - c.resultListeners = append(c.resultListeners, listener) +func (c *Collector) RegisterServiceListener(listener ServiceResultListener) { + c.serviceResultListeners = append(c.serviceResultListeners, listener) } -func (c *Collector) NotifyListeners(result watch.Result) { - for _, listener := range c.resultListeners { +func (c *Collector) RegisterHostListener(listener HostResultListener) { + c.hostResultListeners = append(c.hostResultListeners, listener) +} + +func (c *Collector) NotifyServiceListeners(result watch.ServiceResult) { + for _, listener := range c.serviceResultListeners { + listener(result) + } +} + +func (c *Collector) NotifyHostListeners(result watch.HostResult) { + for _, listener := range c.hostResultListeners { listener(result) } } func (c *Collector) collect() { - chanResult := make(chan watch.Result) - results := map[string][]watch.Result{} + chanServiceResult := make(chan watch.ServiceResult) + chanHostResult := make(chan watch.HostResult) + serviceResults := map[string][]watch.ServiceResult{} + hostResults := map[string][]watch.HostResult{} for { select { - case <-c.chanGetResults: - resultsCopy := map[string][]watch.Result{} - for name, results := range results { - resultsCopy[name] = results + case <-c.chanGetServiceResults: + serviceResultsCopy := map[string][]watch.ServiceResult{} + for name, serviceResults := range serviceResults { + serviceResultsCopy[name] = serviceResults + } + c.chanGetServiceResults <- serviceResultsCopy + + case <-c.chanGetHostResults: + hostResultsCopy := map[string][]watch.HostResult{} + for name, hostResults := range hostResults { + hostResultsCopy[name] = hostResults } - c.chanGetResults <- resultsCopy + c.chanGetHostResults <- hostResultsCopy case newServices := <-c.chanServices: c.services = newServices var lastErrors = make(map[string][]watch.Error) // stop old watchers - for oldWatcherID, oldWatcher := range c.watchers { - oldWatcher.Stop() + for oldWatcherID, oldWatcher := range c.serviceWatchers { + oldWatcher.Watcher.Stop() // if the service had errors before updating the config // store them in a map so we can transfer them to the updated watchers - if len(oldWatcher.LastErrors()) > 0 { - lastErrors[oldWatcherID] = oldWatcher.LastErrors() + if len(oldWatcher.Watcher.LastErrors()) > 0 { + lastErrors[oldWatcherID] = oldWatcher.Watcher.LastErrors() } - delete(c.watchers, oldWatcherID) + delete(c.serviceWatchers, oldWatcherID) } - // setup new watchers for serviceID, service := range c.services { // check if the service had errors before being updated lastErrs, ok := lastErrors[serviceID] if ok { // transfer errors to new watcher - newWatcher := watch.Watch(service, chanResult) + newWatcher := watch.WatchService(service, chanServiceResult, chanHostResult, c.hosts) + newWatcher.SetLastErrors(lastErrs) + c.serviceWatchers[serviceID] = newWatcher + } else { + // no errors - init a new watcher + // TODO: when starting up c.hosts is empty... + c.serviceWatchers[serviceID] = watch.WatchService(service, chanServiceResult, chanHostResult, c.hosts) + } + // reset stored results + _, ok = serviceResults[serviceID] + if !ok { + serviceResults[serviceID] = []watch.ServiceResult{} + } + } + + case newHosts := <-c.chanHosts: + c.hosts = newHosts + + var lastErrors = make(map[string][]watch.Error) + + // stop old watchers + for oldWatcherID, oldWatcher := range c.hostWatchers { + oldWatcher.Watcher.Stop() + + // if the host had errors before updating the config + // store them in a map so we can transfer them to the updated watchers + if len(oldWatcher.Watcher.LastErrors()) > 0 { + lastErrors[oldWatcherID] = oldWatcher.Watcher.LastErrors() + } + + delete(c.hostWatchers, oldWatcherID) + } + + // setup new watchers + for hostID, host := range c.hosts { + // check if the host had errors before being updated + lastErrs, ok := lastErrors[hostID] + if ok { + // transfer errors to new watcher + newWatcher := watch.WatchHost(host, chanHostResult) newWatcher.SetLastErrors(lastErrs) - c.watchers[serviceID] = newWatcher + c.hostWatchers[hostID] = newWatcher } else { // no errors - init a new watcher - c.watchers[serviceID] = watch.Watch(service, chanResult) + c.hostWatchers[hostID] = watch.WatchHost(host, chanHostResult) } // reset stored results - _, ok = results[serviceID] + _, ok = hostResults[hostID] if !ok { - results[serviceID] = []watch.Result{} + hostResults[hostID] = []watch.HostResult{} } } // clean up results - for possiblyUnknownServiceID := range results { - _, ok := c.watchers[possiblyUnknownServiceID] + for possiblyUnknownHostID := range hostResults { + _, ok := c.hostWatchers[possiblyUnknownHostID] if !ok { // clean up results - delete(results, possiblyUnknownServiceID) + delete(hostResults, possiblyUnknownHostID) } } - case result := <-chanResult: - serviceResults, ok := results[result.ID] + case serviceResult := <-chanServiceResult: + serviceResultsFromChan, ok := serviceResults[serviceResult.ID] if ok { - serviceResults = append(serviceResults, result) - if len(serviceResults) > maxResults { - serviceResults = serviceResults[len(serviceResults)-maxResults:] + serviceResultsFromChan = append(serviceResultsFromChan, serviceResult) + if len(serviceResultsFromChan) > maxServiceResults { + serviceResultsFromChan = serviceResultsFromChan[len(serviceResultsFromChan)-maxServiceResults:] } - results[result.ID] = serviceResults + serviceResults[serviceResult.ID] = serviceResultsFromChan - c.NotifyListeners(result) + c.NotifyServiceListeners(serviceResult) + } + case hostResult := <-chanHostResult: + hostResultsFromChan, ok := hostResults[hostResult.ID] + if ok { + hostResultsFromChan = append(hostResultsFromChan, hostResult) + if len(hostResultsFromChan) > maxHostResults { + hostResultsFromChan = hostResultsFromChan[len(hostResultsFromChan)-maxHostResults:] + } + hostResults[hostResult.ID] = hostResultsFromChan + + c.NotifyHostListeners(hostResult) } } } } -// GetResults get current results -func (c *Collector) GetResults() map[string][]watch.Result { - c.chanGetResults <- nil - return <-c.chanGetResults +func (c *Collector) GetServiceResults() map[string][]watch.ServiceResult { + c.chanGetServiceResults <- nil + return <-c.chanGetServiceResults +} + +func (c *Collector) GetHostResults() map[string][]watch.HostResult { + c.chanGetHostResults <- nil + return <-c.chanGetHostResults } func hashServiceConfig(config map[string]*config.Service) (hash string) { @@ -142,20 +228,53 @@ func hashServiceConfig(config map[string]*config.Service) (hash string) { return hash } +func hashHostConfig(config map[string]*config.Host) (hash string) { + hash = "invalid config" + jsonBytes, errJSON := json.Marshal(config) + if errJSON == nil { + hash = string(jsonBytes) + } + return hash +} + func (c *Collector) configWatch() { for { services, errServices := config.LoadServices(c.servicesConfigDir) if errServices != nil { - log.Error("could not read configuration:", errServices) + log.Error("could not read configuration: ", errServices) } if errServices == nil { newHash := hashServiceConfig(services) oldHash := hashServiceConfig(c.services) if newHash != oldHash { - log.Info("configuration update successful") + log.Info("service configuration update successful") c.updateServices() } } + + hosts, errHosts := config.LoadHosts(c.hostsConfigDir) + + if errHosts != nil { + log.Error("could not read configuration: ", errHosts) + } + if errHosts == nil { + newHash := hashHostConfig(hosts) + oldHash := hashHostConfig(c.hosts) + if newHash != oldHash { + log.Info("host configuration update successful") + c.updateHosts() + } + } + + // check if service defined in host config exists + for _, hostConfig := range hosts { + // validate services + for _, service := range hostConfig.Services { + if _, ok := services[service]; !ok { + log.Fatal("host " + hostConfig.Hostname + " has service " + service + " in its list of services but the service doesn't exist in the service config dir") + } + } + } time.Sleep(10 * time.Second) } } @@ -169,3 +288,13 @@ func (c *Collector) updateServices() error { } return err } + +func (c *Collector) updateHosts() error { + hosts, err := config.LoadHosts(c.hostsConfigDir) + if err == nil { + c.chanHosts <- hosts + } else { + log.Warn("could not update hosts:", err) + } + return err +} diff --git a/collector/collector_test.go b/collector/collector_test.go index 2f821a4..c696f51 100644 --- a/collector/collector_test.go +++ b/collector/collector_test.go @@ -1,19 +1,24 @@ package collector import ( - "github.com/foomo/petze/watch" "testing" + + "github.com/foomo/petze/watch" ) func TestCollectorListeners(t *testing.T) { - var actualResult watch.Result - c, _ := NewCollector("") - c.RegisterListener(func(result watch.Result) { + var actualResult watch.ServiceResult + c, _ := NewCollector("", "") + c.RegisterServiceListener(func(result watch.ServiceResult) { actualResult = result }) - expectedResult := watch.Result{ID: "some-fake-id"} - c.NotifyListeners(expectedResult) + expectedResult := watch.ServiceResult{ + Result: watch.Result{ + ID: "some-fake-id", + }, + } + c.NotifyServiceListeners(expectedResult) if actualResult.ID != expectedResult.ID { t.Error("actual result is not equal to the expected result") diff --git a/config/config.go b/config/config.go index 3a46bbd..4bdd53b 100644 --- a/config/config.go +++ b/config/config.go @@ -58,7 +58,7 @@ type Service struct { Endpoint string `yaml:"endpoint"` Interval time.Duration `yaml:"interval"` - Session []Call `yaml:"session"` + Session []Call `yaml:"session"` // Notifications NotifyIfResolved bool `yaml:"notifyIfResolved"` @@ -67,11 +67,27 @@ type Service struct { TLSWarning time.Duration `yaml:"tlsWarning"` } +// Host is a host to monitor +type Host struct { + + // host identifier + ID string `yaml:"id"` + Hostname string `yaml:"hostname"` + Interval time.Duration `yaml:"interval"` + Timeout time.Duration `yaml:"timeout"` + + // Notifications + NotifyIfResolved bool `yaml:"notifyIfResolved"` + + // List of services running on host + Services []string `yaml:"services"` +} + // Server models the petze.yml main config file type Server struct { // endpoint to expose metrics - Address string `yaml:"address"` + Address string `yaml:"address"` // auth BasicAuthFile string `yaml:"basicAuthFile"` diff --git a/config/load.go b/config/load.go index 79711d6..6eaad41 100644 --- a/config/load.go +++ b/config/load.go @@ -33,6 +33,22 @@ func LoadServices(configDir string) (services map[string]*Service, err error) { return services, nil } +func LoadHosts(configDir string) (hosts map[string]*Host, err error) { + hosts = make(map[string]*Host) + errLoadServices := loadHostsFromDir(configDir, hosts) + if errLoadServices != nil { + err = errors.New("could not load host configurations from config dir : " + configDir + ", : " + errLoadServices.Error()) + return + } + for id, host := range hosts { + host.ID = id + if host.Interval == 0 { + host.Interval = 60 + } + } + return hosts, nil +} + func LoadServer(configDir string) (server *Server, err error) { server = &Server{} return server, load(path.Join(configDir, serverConfigFile), &server) @@ -66,6 +82,26 @@ func loadServicesFromDir(configDir string, targets map[string]*Service) error { }) } +func loadHostsFromDir(configDir string, targets map[string]*Host) error { + absoluteConfigDir, errAbsoluteConfigDir := filepath.Abs(configDir) + if errAbsoluteConfigDir != nil { + return errAbsoluteConfigDir + } + return filepath.Walk(absoluteConfigDir, func(fp string, info os.FileInfo, err error) error { + if !info.IsDir() && !strings.HasPrefix(info.Name(), ".") && strings.HasSuffix(fp, ".yml") && info.Name() != "petze.yml" { + p := strings.TrimSuffix(strings.TrimPrefix(fp, absoluteConfigDir+string(os.PathSeparator)), ".yml") + hostConfig := &Host{} + targets[p] = hostConfig + loadErr := load(fp, &hostConfig) + if loadErr != nil { + return loadErr + } + return nil + } + return nil + }) +} + // Load load config from a file func load(configFile string, target interface{}) error { configBytes, err := ioutil.ReadFile(configFile) diff --git a/exampleConfig/hosts/example.yml b/exampleConfig/hosts/example.yml new file mode 100644 index 0000000..cd9b01f --- /dev/null +++ b/exampleConfig/hosts/example.yml @@ -0,0 +1,6 @@ +hostname: example.hostname.com +interval: 10s +timeout: 5s +services: + - spiegel + - google \ No newline at end of file diff --git a/exampleConfig/bestbytes.yml b/exampleConfig/services/bestbytes.yml similarity index 100% rename from exampleConfig/bestbytes.yml rename to exampleConfig/services/bestbytes.yml diff --git a/exampleConfig/google.yml b/exampleConfig/services/google.yml similarity index 100% rename from exampleConfig/google.yml rename to exampleConfig/services/google.yml diff --git a/exampleConfig/services/petze.yml b/exampleConfig/services/petze.yml new file mode 100644 index 0000000..25b0b6e --- /dev/null +++ b/exampleConfig/services/petze.yml @@ -0,0 +1,15 @@ +# service metrics at this endpoint +address: 127.0.0.1:8080 + +# configure SMTP notifications +smtp: + server: smtp-relay.yourprovider.com + user: you@mail.com + pass: yourpassword + port: 465 + from: replyto@mail.com + to: + - user1tonotify@mail.com + - user2tonotify@mail.com + +slack: https://hooks.slack.com/services/custom-parameters diff --git a/exampleConfig/spiegel.yml b/exampleConfig/services/spiegel.yml similarity index 100% rename from exampleConfig/spiegel.yml rename to exampleConfig/services/spiegel.yml diff --git a/exporter/logexporter.go b/exporter/logexporter.go index c191382..4e05d6b 100644 --- a/exporter/logexporter.go +++ b/exporter/logexporter.go @@ -5,15 +5,15 @@ import ( "github.com/sirupsen/logrus" ) -func LogResultHandler(result watch.Result) { +func LogServiceResultHandler(serviceResult watch.ServiceResult) { logger := logrus.WithFields(logrus.Fields{ - "service_id": result.ID, - "runtime": result.RunTime, - "timeout": result.Timeout, + "service_id": serviceResult.ID, + "runtime": serviceResult.RunTime, + "timeout": serviceResult.Timeout, }) - if len(result.Errors) > 0 { - for _, err := range result.Errors { + if len(serviceResult.Errors) > 0 { + for _, err := range serviceResult.Errors { if err.Comment != "" { logger = logger.WithField("comment", err.Comment) } @@ -25,5 +25,26 @@ func LogResultHandler(result watch.Result) { } else { logger.Info("run completed without errors") } +} + +func LogHostResultHandler(hostResult watch.HostResult) { + logger := logrus.WithFields(logrus.Fields{ + "host_id": hostResult.ID, + "rtt": hostResult.RunTime, + "timeout": hostResult.Timeout, + }) + if len(hostResult.Errors) > 0 { + for _, err := range hostResult.Errors { + if err.Comment != "" { + logger = logger.WithField("comment", err.Comment) + } + logger.WithFields(logrus.Fields{ + "type": err.Type, + "location": err.Location, + }).Error(err.Error) + } + } else { + logger.Info("run completed without errors") + } } diff --git a/exporter/prometheus.go b/exporter/prometheus.go index 85d315f..434b3a6 100644 --- a/exporter/prometheus.go +++ b/exporter/prometheus.go @@ -1,9 +1,10 @@ package exporter import ( + "time" + "github.com/foomo/petze/watch" "github.com/prometheus/client_golang/prometheus" - "time" ) var ( @@ -12,19 +13,36 @@ var ( Help: "Number of services that are throwing errors for their scenarios", }, []string{"service_id"}) + hostErrors = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "petze_host_error_count", + Help: "Number of hosts that are throwing errors for their scenarios", + }, []string{"host_id"}) + serviceResponseTimes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "petze_service_session_execution_time", Help: "Service response times per session execution", }, []string{"service_id"}) + + hostResponseTimes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "petze_host_ping_execution_time", + Help: "Host response times per ICMP echo reply", + }, []string{"host_id"}) ) func init() { // Metrics have to be registered to be exposed: prometheus.MustRegister(serviceErrors) prometheus.MustRegister(serviceResponseTimes) + prometheus.MustRegister(hostErrors) + prometheus.MustRegister(hostResponseTimes) +} + +func PrometheusServiceMetricsListener(serviceResult watch.ServiceResult) { + serviceErrors.WithLabelValues(serviceResult.ID).Set(float64(len(serviceResult.Errors))) + serviceResponseTimes.WithLabelValues(serviceResult.ID).Set(float64(serviceResult.RunTime / time.Millisecond)) } -func PrometheusMetricsListener(result watch.Result) { - serviceErrors.WithLabelValues(result.ID).Set(float64(len(result.Errors))) - serviceResponseTimes.WithLabelValues(result.ID).Set(float64(result.RunTime / time.Millisecond)) +func PrometheusHostMetricsListener(hostResult watch.HostResult) { + hostErrors.WithLabelValues(hostResult.ID).Set(float64(len(hostResult.Errors))) + hostResponseTimes.WithLabelValues(hostResult.ID).Set(float64(hostResult.RunTime / time.Millisecond)) } diff --git a/go.mod b/go.mod index d621699..8da0198 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/abbot/go-http-auth v0.4.0 github.com/andybalholm/cascadia v1.2.0 // indirect github.com/davecgh/go-spew v1.1.1 + github.com/go-ping/ping v0.0.0-20201022122018-3977ed72668a github.com/google/uuid v1.1.1 // indirect github.com/huandu/xstrings v1.3.2 // indirect github.com/imdario/mergo v0.3.9 // indirect @@ -35,7 +36,6 @@ require ( github.com/ttacon/builder v0.0.0-20170518171403-c099f663e1c2 // indirect github.com/ttacon/libphonenumber v1.1.0 // indirect github.com/x-cray/logrus-prefixed-formatter v0.5.2 - golang.org/x/net v0.0.0-20200625001655-4c5254603344 // indirect gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect gopkg.in/gomail.v2 v2.0.0-20160411212932-81ebce5c23df gopkg.in/yaml.v2 v2.3.0 diff --git a/go.sum b/go.sum index 6a5e7ac..a579e25 100644 --- a/go.sum +++ b/go.sum @@ -46,6 +46,8 @@ github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2 github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-ping/ping v0.0.0-20201022122018-3977ed72668a h1:O9xspHB2yrvKfMQ1m6OQhqe37i5yvg0dXAYMuAjugmM= +github.com/go-ping/ping v0.0.0-20201022122018-3977ed72668a/go.mod h1:35JbSyV/BYqHwwRA6Zr1uVDm1637YlNOU61wI797NPI= github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= @@ -206,6 +208,8 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344 h1:vGXIOMxbNfDTk/aXCmfdLgkrSV+Z2tcbze+pEc3v5W4= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA= +golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/mail/mail.go b/mail/mail.go index 925e106..063caf7 100644 --- a/mail/mail.go +++ b/mail/mail.go @@ -116,7 +116,7 @@ func Send(to string, subject string, mail hermes.Email) { // prevent loop if to != m.from { // notify grand master - Send(m.from, "[Mail Error] "+subject+" to "+to, GenerateErrorMail([]error{err}, "failed to send mail", "internal")) + Send(m.from, "[Mail Error] "+subject+" to "+to, GenerateServiceErrorMail([]error{err}, "failed to send mail", "internal")) } } } @@ -156,7 +156,7 @@ func renderMail(mail hermes.Email) (html, plainText string, err error) { return } -func GenerateErrorMail(errs []error, msg string, service string) hermes.Email { +func GenerateServiceErrorMail(errs []error, msg string, service string) hermes.Email { var intros = []string{ "An error with the service " + strings.ToUpper(service) + " occurred:", @@ -182,14 +182,54 @@ func GenerateErrorMail(errs []error, msg string, service string) hermes.Email { } } -func GenerateResolvedNotificationMail(service string) hermes.Email { +func GenerateHostErrorMail(errs []error, msg string, service string) hermes.Email { + + var intros = []string{ + "An error with the host " + strings.ToUpper(service) + " occurred:", + "Timestamp: " + time.Now().Format(timestampFormat), + } + if msg != "" { + intros = append(intros, "Message: "+msg) + } + if len(errs) > 0 { + intros = append(intros, "Errors: ") + for _, e := range errs { + intros = append(intros, e.Error()) + } + } + + return hermes.Email{ + Body: hermes.Body{ + Greeting: "Dear", + Name: "Admin", + Signature: "kind regards", + Intros: intros, + }, + } +} + +func GenerateServiceResolvedNotificationMail(service string) hermes.Email { + return hermes.Email{ + Body: hermes.Body{ + Greeting: "Dear", + Name: "Admin", + Signature: "kind regards", + Intros: []string{ + "Service " + strings.ToUpper(service) + " is back to normal operation", + "Timestamp: " + time.Now().Format(timestampFormat), + }, + }, + } +} + +func GenerateHostResolvedNotificationMail(service string) hermes.Email { return hermes.Email{ Body: hermes.Body{ Greeting: "Dear", Name: "Admin", Signature: "kind regards", Intros: []string{ - "service " + strings.ToUpper(service) + " is back to normal operation", + "Host " + strings.ToUpper(service) + " is back to normal operation", "Timestamp: " + time.Now().Format(timestampFormat), }, }, diff --git a/service/handler.go b/service/handler.go index 5c47aa2..db44db4 100644 --- a/service/handler.go +++ b/service/handler.go @@ -10,15 +10,24 @@ import ( ) type ServiceStatus struct { - ID string `json:"id"` - Results []watch.Result `json:"results"` + ID string `json:"id"` + ServiceResults []watch.ServiceResult `json:"results"` +} + +type HostStatus struct { + ID string `json:"id"` + HostResults []watch.HostResult `json:"results"` } func (s *server) GETServices(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { jsonReply("GETCollectorConfigServices", w) } -func (s *server) GETStatus(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { +func (s *server) GETHosts(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { + jsonReply("GETCollectorConfigHosts", w) +} + +func (s *server) GETServicesStatus(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { limitInt := 1000 limitIntCandidate, err := strconv.Atoi(r.FormValue("limit")) if err == nil { @@ -26,7 +35,7 @@ func (s *server) GETStatus(w http.ResponseWriter, r *http.Request, ps httprouter } status := []ServiceStatus{} - serviceResults := s.collector.GetResults() + serviceResults := s.collector.GetServiceResults() serviceIDs := []string{} for serviceID := range serviceResults { serviceIDs = append(serviceIDs, serviceID) @@ -38,8 +47,35 @@ func (s *server) GETStatus(w http.ResponseWriter, r *http.Request, ps httprouter results = results[len(results)-limitInt:] } status = append(status, ServiceStatus{ - ID: serviceID, - Results: results, + ID: serviceID, + ServiceResults: results, + }) + } + jsonReply(status, w) +} + +func (s *server) GETHostsStatus(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { + limitInt := 1000 + limitIntCandidate, err := strconv.Atoi(r.FormValue("limit")) + if err == nil { + limitInt = limitIntCandidate + } + status := []HostStatus{} + + serviceResults := s.collector.GetHostResults() + hostIDs := []string{} + for hostID := range serviceResults { + hostIDs = append(hostIDs, hostID) + } + sort.Strings(hostIDs) + for _, hostID := range hostIDs { + results := serviceResults[hostID] + if len(results) > limitInt { + results = results[len(results)-limitInt:] + } + status = append(status, HostStatus{ + ID: hostID, + HostResults: results, }) } jsonReply(status, w) diff --git a/service/service.go b/service/service.go index ee1f0f3..669199f 100644 --- a/service/service.go +++ b/service/service.go @@ -31,8 +31,8 @@ type server struct { collector *collector.Collector } -func newServer(servicesConfigfile string) (s *server, err error) { - coll, err := collector.NewCollector(servicesConfigfile) +func newServer(configFile string) (s *server, err error) { + coll, err := collector.NewCollector(configFile+"/services", configFile+"/hosts") defer coll.Start() s = &server{ router: httprouter.New(), @@ -40,7 +40,9 @@ func newServer(servicesConfigfile string) (s *server, err error) { } s.router.GET("/services", s.GETServices) - s.router.GET("/status", s.GETStatus) + s.router.GET("/services/status", s.GETServicesStatus) + s.router.GET("/hosts", s.GETHosts) + s.router.GET("/hosts/status", s.GETHostsStatus) s.router.Handler("GET", "/metrics", promhttp.Handler()) return s, nil @@ -101,15 +103,17 @@ func getTLSConfig() *tls.Config { } // Run as a server -func Run(c *config.Server, servicesConfigfile string) error { +func Run(c *config.Server, configFile string) error { - s, err := newServer(servicesConfigfile) + s, err := newServer(configFile) if err != nil { return err } // register additional listeners which listen to results - s.collector.RegisterListener(exporter.PrometheusMetricsListener) - s.collector.RegisterListener(exporter.LogResultHandler) + s.collector.RegisterServiceListener(exporter.PrometheusServiceMetricsListener) + s.collector.RegisterHostListener(exporter.PrometheusHostMetricsListener) + s.collector.RegisterServiceListener(exporter.LogServiceResultHandler) + s.collector.RegisterHostListener(exporter.LogHostResultHandler) log.Info("starting petze server on: ", c.Address) diff --git a/slack/slack.go b/slack/slack.go index 5d6b1b2..d754383 100644 --- a/slack/slack.go +++ b/slack/slack.go @@ -3,12 +3,13 @@ package slack import ( "bytes" "encoding/json" - "github.com/sirupsen/logrus" - prefixed "github.com/x-cray/logrus-prefixed-formatter" "io/ioutil" "net/http" "strings" "time" + + "github.com/sirupsen/logrus" + prefixed "github.com/x-cray/logrus-prefixed-formatter" ) var ( @@ -77,7 +78,7 @@ func Send(message []byte) { Log.Info("slack bot response body: ", string(responseBody)) } -func GenerateErrorMessage(errs []error, service string) []byte { +func GenerateServiceErrorMessage(errs []error, service string) []byte { var errMessage = []string{ time.Now().Format(timestampFormat), @@ -99,11 +100,49 @@ func GenerateErrorMessage(errs []error, service string) []byte { return marshalledMessage } -func GenerateResolvedNotification(service string) []byte { +func GenerateHostErrorMessage(errs []error, service string) []byte { + + var errMessage = []string{ + time.Now().Format(timestampFormat), + "an error occured for host " + strings.ToUpper(service) + "\n", + } + + if len(errs) > 0 { + for _, e := range errs { + errMessage = append(errMessage, e.Error()) + } + } + + unmarshalledMessage := &Message{Text: strings.Join(errMessage, " ")} + marshalledMessage, err := json.Marshal(unmarshalledMessage) + if err != nil { + Log.Error(err) + } + + return marshalledMessage +} + +func GenerateServiceErrorResolvedNotification(service string) []byte { + + var errMessage = []string{ + time.Now().Format(timestampFormat), + "Service " + strings.ToUpper(service) + " is back to normal operation!", + } + + unmarshalledMessage := &Message{Text: strings.Join(errMessage, " ")} + marshalledMessage, err := json.Marshal(unmarshalledMessage) + if err != nil { + Log.Error(err) + } + + return marshalledMessage +} + +func GenerateHostErrorResolvedNotification(service string) []byte { var errMessage = []string{ time.Now().Format(timestampFormat), - "service " + strings.ToUpper(service) + " is back to normal operation!", + "Host " + strings.ToUpper(service) + " is back to normal operation!", } unmarshalledMessage := &Message{Text: strings.Join(errMessage, " ")} diff --git a/sms/sms.go b/sms/sms.go index 998e540..4d12b9a 100644 --- a/sms/sms.go +++ b/sms/sms.go @@ -16,18 +16,36 @@ func InitSMS(c *config.SMS) { conf = c } -func SendErrors(errs []error, service string) { +func SendServiceErrors(errs []error, service string) { if conf.TwilioSID != "" && conf.TwilioToken != "" { - SendTwilioSMS(GenerateTwilioErrorSMS(errs, service)) + SendTwilioSMS(GenerateTwilioServiceErrorSMS(errs, service)) } if conf.SendInBlueAPIKey != "" { SendSIB(GenerateSIBErrorSMS(errs, service)) } } -func SendResolvedNotification(service string) { +func SendHostErrors(errs []error, service string) { if conf.TwilioSID != "" && conf.TwilioToken != "" { - SendTwilioSMS(GenerateTwilioResolvedSMS(service)) + SendTwilioSMS(GenerateTwilioHostErrorSMS(errs, service)) + } + if conf.SendInBlueAPIKey != "" { + SendSIB(GenerateSIBErrorSMS(errs, service)) + } +} + +func SendServiceErrorResolvedNotification(service string) { + if conf.TwilioSID != "" && conf.TwilioToken != "" { + SendTwilioSMS(GenerateTwilioServiceErrorResolvedSMS(service)) + } + if conf.SendInBlueAPIKey != "" { + SendSIB(GenerateSIBResolvedSMS(service)) + } +} + +func SendHostErrorResolvedNotification(service string) { + if conf.TwilioSID != "" && conf.TwilioToken != "" { + SendTwilioSMS(GenerateTwilioHostErrorResolvedSMS(service)) } if conf.SendInBlueAPIKey != "" { SendSIB(GenerateSIBResolvedSMS(service)) diff --git a/sms/twilio.go b/sms/twilio.go index aefce39..2575390 100644 --- a/sms/twilio.go +++ b/sms/twilio.go @@ -1,18 +1,19 @@ package sms import ( - "github.com/kevinburke/twilio-go" "log" "strings" "time" + + "github.com/kevinburke/twilio-go" ) type TwilioSMS struct { - To string + To string Body string } -func GenerateTwilioErrorSMS(errs []error, service string) []*TwilioSMS { +func GenerateTwilioServiceErrorSMS(errs []error, service string) []*TwilioSMS { var smsArr []*TwilioSMS for _, recipient := range conf.To { @@ -29,7 +30,51 @@ func GenerateTwilioErrorSMS(errs []error, service string) []*TwilioSMS { } } smsArr = append(smsArr, &TwilioSMS{ - To: recipient, + To: recipient, + Body: strings.Join(lines, "\n"), + }) + } + + return smsArr +} + +func GenerateTwilioHostErrorSMS(errs []error, service string) []*TwilioSMS { + + var smsArr []*TwilioSMS + for _, recipient := range conf.To { + + var lines = []string{ + "Dear Admin,", + "An error with the host " + strings.ToUpper(service) + " occurred:", + "Timestamp: " + time.Now().Format(timestampFormat), + } + if len(errs) > 0 { + lines = append(lines, "Errors: ") + for _, e := range errs { + lines = append(lines, e.Error()) + } + } + smsArr = append(smsArr, &TwilioSMS{ + To: recipient, + Body: strings.Join(lines, "\n"), + }) + } + + return smsArr +} + +func GenerateTwilioServiceErrorResolvedSMS(service string) []*TwilioSMS { + + var smsArr []*TwilioSMS + for _, recipient := range conf.To { + + var lines = []string{ + "Dear Admin,", + "Service " + strings.ToUpper(service) + " is back to normal operation", + "Timestamp: " + time.Now().Format(timestampFormat), + } + smsArr = append(smsArr, &TwilioSMS{ + To: recipient, Body: strings.Join(lines, "\n"), }) } @@ -37,18 +82,18 @@ func GenerateTwilioErrorSMS(errs []error, service string) []*TwilioSMS { return smsArr } -func GenerateTwilioResolvedSMS(service string) []*TwilioSMS { +func GenerateTwilioHostErrorResolvedSMS(service string) []*TwilioSMS { var smsArr []*TwilioSMS for _, recipient := range conf.To { var lines = []string{ "Dear Admin,", - "service " + strings.ToUpper(service) + " is back to normal operation", + "Host " + strings.ToUpper(service) + " is back to normal operation", "Timestamp: " + time.Now().Format(timestampFormat), } smsArr = append(smsArr, &TwilioSMS{ - To: recipient, + To: recipient, Body: strings.Join(lines, "\n"), }) } diff --git a/watch/notifiers.go b/watch/notifiers.go index 60219cc..35266c5 100644 --- a/watch/notifiers.go +++ b/watch/notifiers.go @@ -3,12 +3,13 @@ package watch import ( "errors" "fmt" + "github.com/foomo/petze/mail" "github.com/foomo/petze/slack" "github.com/foomo/petze/sms" ) -func (w *Watcher) smsNotify(r *Result) { +func (w *Watcher) smsNotify(r *Result, isService bool, serviceOrHostid string, notifyIfResolved bool) { // if SMS notifications are not enabled, return immediately if !sms.IsInitialized() { @@ -29,7 +30,11 @@ func (w *Watcher) smsNotify(r *Result) { if !w.didReceiveSMSNotification || w.didErrorsChange(r) { go func() { - sms.SendErrors(errs, w.service.ID) + if isService { + sms.SendServiceErrors(errs, serviceOrHostid) + } else { + sms.SendHostErrors(errs, serviceOrHostid) + } }() w.didReceiveSMSNotification = true w.lastErrors = r.Errors @@ -41,16 +46,20 @@ func (w *Watcher) smsNotify(r *Result) { w.didReceiveSMSNotification = false w.lastErrors = []Error{} - if w.service.NotifyIfResolved { + if notifyIfResolved { go func() { - sms.SendResolvedNotification(w.service.ID) + if isService { + sms.SendServiceErrorResolvedNotification(serviceOrHostid) + } else { + sms.SendHostErrorResolvedNotification(serviceOrHostid) + } }() } } } } -func (w *Watcher) mailNotify(r *Result) { +func (w *Watcher) mailNotify(r *Result, isService bool, serviceOrHostid string, notifyIfResolved bool) { // if SMTP notifications are not enabled, return immediately if !mail.IsInitialized() { @@ -71,7 +80,11 @@ func (w *Watcher) mailNotify(r *Result) { if !w.didReceiveMailNotification || w.didErrorsChange(r) { go func() { - mail.SendMails("Error for Service: "+w.service.ID, mail.GenerateErrorMail(errs, "", w.service.ID)) + if isService { + mail.SendMails("Error for Service: "+serviceOrHostid, mail.GenerateServiceErrorMail(errs, "", serviceOrHostid)) + } else { + mail.SendMails("Error for Host: "+serviceOrHostid, mail.GenerateHostErrorMail(errs, "", serviceOrHostid)) + } }() w.didReceiveMailNotification = true w.lastErrors = r.Errors @@ -83,16 +96,20 @@ func (w *Watcher) mailNotify(r *Result) { w.didReceiveMailNotification = false w.lastErrors = []Error{} - if w.service.NotifyIfResolved { + if notifyIfResolved { go func() { - mail.SendMails("Issues resolved for service: "+w.service.ID, mail.GenerateResolvedNotificationMail(w.service.ID)) + if isService { + mail.SendMails("Issues resolved for service: "+serviceOrHostid, mail.GenerateServiceResolvedNotificationMail(serviceOrHostid)) + } else { + mail.SendMails("Issues resolved for host: "+serviceOrHostid, mail.GenerateHostResolvedNotificationMail(serviceOrHostid)) + } }() } } } } -func (w *Watcher) slackNotify(r *Result) { +func (w *Watcher) slackNotify(r *Result, isService bool, serviceOrHostid string, notifyIfResolved bool) { // if Slack notifications are not enabled, return immediately if !slack.IsInitialized() { @@ -112,7 +129,11 @@ func (w *Watcher) slackNotify(r *Result) { } if !w.didReceiveSlackNotification || w.didErrorsChange(r) { go func() { - slack.Send(slack.GenerateErrorMessage(errs, w.service.ID)) + if isService { + slack.Send(slack.GenerateServiceErrorMessage(errs, serviceOrHostid)) + } else { + slack.Send(slack.GenerateHostErrorMessage(errs, serviceOrHostid)) + } }() w.didReceiveSlackNotification = true w.lastErrors = r.Errors @@ -124,9 +145,13 @@ func (w *Watcher) slackNotify(r *Result) { w.didReceiveSlackNotification = false w.lastErrors = []Error{} - if w.service.NotifyIfResolved { + if notifyIfResolved { go func() { - slack.Send(slack.GenerateResolvedNotification(w.service.ID)) + if isService { + slack.Send(slack.GenerateServiceErrorResolvedNotification(serviceOrHostid)) + } else { + slack.Send(slack.GenerateHostErrorResolvedNotification(serviceOrHostid)) + } }() } } diff --git a/watch/session.go b/watch/session.go index 63e773b..e609250 100644 --- a/watch/session.go +++ b/watch/session.go @@ -21,10 +21,10 @@ import ( var userAgent = "Petze Service Monitor" func SetUserAgentVersion(version string) { - userAgent += "/"+version + userAgent += "/" + version } -func (w *Watcher) runSession(r *Result, client *http.Client) error { +func (w *ServiceWatcher) runSession(r *ServiceResult, client *http.Client) error { //log.Println("running session with session length:", len(service.Session)) //spew.Dump(service) diff --git a/watch/watch.go b/watch/watch.go index d5a6c2c..10cd575 100644 --- a/watch/watch.go +++ b/watch/watch.go @@ -12,6 +12,8 @@ import ( "strings" "time" + "github.com/go-ping/ping" + "reflect" "github.com/foomo/petze/config" @@ -33,6 +35,8 @@ type ErrorType string const ( ErrorInvalidEndpoint ErrorType = "endpointInvalid" + ErrorHostLookup = "hostLookupFailure" + ErrorHostUnreachable = "hostUnreachable" ErrorTypeServerTooSlow = "serverTooSlow" ErrorTypeNotImplemented = "notImplemented" ErrorTypeUnknownError = "unknownError" @@ -73,16 +77,40 @@ type Result struct { RunTime time.Duration `json:"runtime"` } -func NewResult(id string) *Result { - return &Result{ - ID: id, - Errors: []Error{}, - Timestamp: time.Now(), +type ServiceResult struct { + Result +} + +type HostResult struct { + Result +} + +func NewServiceResult(id string) *ServiceResult { + return &ServiceResult{ + Result: Result{ + ID: id, + Errors: []Error{}, + Timestamp: time.Now(), + }, + } +} + +func NewHostResult(id string) *HostResult { + return &HostResult{ + Result: Result{ + ID: id, + Errors: []Error{}, + Timestamp: time.Now(), + }, } } -func (r *Result) addError(e error, t ErrorType, comment string) { - r.Errors = addError(r.Errors, e, t, comment) +func (serviceResult *ServiceResult) addError(e error, t ErrorType, comment string) { + serviceResult.Errors = addError(serviceResult.Errors, e, t, comment) +} + +func (hostResult *HostResult) addError(e error, t ErrorType, comment string) { + hostResult.Errors = addError(hostResult.Errors, e, t, comment) } func addError(errors []Error, err error, t ErrorType, comment string) []Error { @@ -106,8 +134,7 @@ type dialerErrRecorder struct { } type Watcher struct { - active bool - service *config.Service + active bool // notifications didReceiveMailNotification bool @@ -116,15 +143,40 @@ type Watcher struct { lastErrors []Error } -// Watch create a watcher and start watching -func Watch(service *config.Service, chanResult chan Result) *Watcher { +type ServiceWatcher struct { + Watcher + service *config.Service +} + +type HostWatcher struct { + Watcher + host *config.Host +} + +// Watch create a service watcher and start watching +func WatchService(service *config.Service, chanServiceResult chan ServiceResult, chanHostResult chan HostResult, hosts map[string]*config.Host) *ServiceWatcher { - w := &Watcher{ - active: true, + serviceWatcher := &ServiceWatcher{ + Watcher: Watcher{ + active: true, + }, service: service, } - go w.watchLoop(chanResult) - return w + go serviceWatcher.serviceWatchLoop(chanServiceResult, chanHostResult, hosts) + return serviceWatcher +} + +// Create a host watcher and start watching +func WatchHost(host *config.Host, chanResult chan HostResult) *HostWatcher { + + hostWatcher := &HostWatcher{ + Watcher: Watcher{ + active: true, + }, + host: host, + } + go hostWatcher.hostWatchLoop(chanResult) + return hostWatcher } // Stop watching - beware this is async @@ -139,29 +191,55 @@ func (w *Watcher) LastErrors() []Error { return nil } -func (w *Watcher) SetLastErrors(errs []Error) { - w.lastErrors = errs +func (serviceWatcher *ServiceWatcher) SetLastErrors(errs []Error) { + serviceWatcher.lastErrors = errs +} + +func (hostWatcher *HostWatcher) SetLastErrors(errs []Error) { + hostWatcher.lastErrors = errs } -func (w *Watcher) watchLoop(chanResult chan Result) { - httpClient, errRecorder := w.getClientAndDialErrRecorder() +func (serviceWatcher *ServiceWatcher) serviceWatchLoop(chanServiceResult chan ServiceResult, chanHostResult chan HostResult, hosts map[string]*config.Host) { + + httpClient, errRecorder := serviceWatcher.getClientAndDialErrRecorder() - for w.active { - r := w.watch(httpClient, errRecorder) - if w.active { + for serviceWatcher.Watcher.active { + r := serviceWatcher.watchService(httpClient, errRecorder) + if serviceWatcher.active { // send notifications - w.smsNotify(r) - w.mailNotify(r) - w.slackNotify(r) + serviceWatcher.smsNotify(&r.Result, true, serviceWatcher.service.ID, serviceWatcher.service.NotifyIfResolved) + serviceWatcher.mailNotify(&r.Result, true, serviceWatcher.service.ID, serviceWatcher.service.NotifyIfResolved) + serviceWatcher.slackNotify(&r.Result, true, serviceWatcher.service.ID, serviceWatcher.service.NotifyIfResolved) - chanResult <- *r - time.Sleep(w.service.Interval) + chanServiceResult <- *r + time.Sleep(serviceWatcher.service.Interval) } } } -func (w *Watcher) getClientAndDialErrRecorder() (client *http.Client, errRecorder *dialerErrRecorder) { +func (hostWatcher *HostWatcher) hostWatchLoop(chanHostResult chan HostResult) { + + errRecorder := &dialerErrRecorder{ + errors: []Error{}, + } + + for hostWatcher.active { + r := hostWatcher.watchHost(errRecorder) + if hostWatcher.active { + + // send notifications + hostWatcher.smsNotify(&r.Result, false, hostWatcher.host.ID, hostWatcher.host.NotifyIfResolved) + hostWatcher.mailNotify(&r.Result, false, hostWatcher.host.ID, hostWatcher.host.NotifyIfResolved) + hostWatcher.slackNotify(&r.Result, false, hostWatcher.host.ID, hostWatcher.host.NotifyIfResolved) + + chanHostResult <- *r + time.Sleep(hostWatcher.host.Interval) + } + } +} + +func (serviceWatcher *ServiceWatcher) getClientAndDialErrRecorder() (client *http.Client, errRecorder *dialerErrRecorder) { errRecorder = &dialerErrRecorder{ errors: []Error{}, } @@ -177,15 +255,15 @@ func (w *Watcher) getClientAndDialErrRecorder() (client *http.Client, errRecorde connectionState := tlsConn.ConnectionState() for _, cert := range connectionState.PeerCertificates { durationUntilExpiry := cert.NotAfter.Sub(time.Now()) - if durationUntilExpiry < w.service.TLSWarning { + if durationUntilExpiry < serviceWatcher.service.TLSWarning { var ( - prefix = "cert CN=\"" + cert.Subject.CommonName + prefix = "cert CN=\"" + cert.Subject.CommonName certErr = Error{ - Error: errors.New( + Error: errors.New( fmt.Sprint( "cert CN=\"", cert.Subject.CommonName, - "\" is expiring in less than "+strconv.FormatFloat(w.service.TLSWarning.Hours(), 'f', 0, 64)+"h: ", + "\" is expiring in less than "+strconv.FormatFloat(serviceWatcher.service.TLSWarning.Hours(), 'f', 0, 64)+"h: ", cert.NotAfter, ", left: ", strconv.FormatFloat(durationUntilExpiry.Hours(), 'f', 0, 64), @@ -280,16 +358,16 @@ func (w *Watcher) getClientAndDialErrRecorder() (client *http.Client, errRecorde return } -// actual watch -func (w *Watcher) watch(client *http.Client, errRecorder *dialerErrRecorder) (r *Result) { +// actual service watch +func (serviceWatcher *ServiceWatcher) watchService(client *http.Client, errRecorder *dialerErrRecorder) (serviceResult *ServiceResult) { - r = NewResult(w.service.ID) + serviceResult = NewServiceResult(serviceWatcher.service.ID) // parsing, the endpoint - request, err := http.NewRequest("GET", w.service.Endpoint, nil) + request, err := http.NewRequest("GET", serviceWatcher.service.Endpoint, nil) if err != nil { - r.addError(err, ErrorInvalidEndpoint, "") - return r + serviceResult.addError(err, ErrorInvalidEndpoint, "") + return serviceResult } // my personal dns error check if len(request.Host) > 0 { @@ -298,20 +376,20 @@ func (w *Watcher) watch(client *http.Client, errRecorder *dialerErrRecorder) (r if len(parts) > 1 { host, _, err = net.SplitHostPort(request.Host) if err != nil { - r.addError(err, ErrorInvalidEndpoint, "") + serviceResult.addError(err, ErrorInvalidEndpoint, "") return } } _, lookupErr := net.LookupIP(host) if lookupErr != nil { - r.addError(lookupErr, ErrorTypeDNS, "") + serviceResult.addError(lookupErr, ErrorTypeDNS, "") return } } // i am explicitly not calling http.Get, because it does 30x handling and i do not want that response, err := client.Do(request) - r.Errors = append(r.Errors, errRecorder.errors...) + serviceResult.Errors = append(serviceResult.Errors, errRecorder.errors...) if response != nil && response.Body != nil { // always close the body @@ -320,31 +398,31 @@ func (w *Watcher) watch(client *http.Client, errRecorder *dialerErrRecorder) (r if err != nil { // sth. went wrong - r.addError(err, ErrorTypeClientError, "") + serviceResult.addError(err, ErrorTypeClientError, "") var netErr net.Error switch true { case errRecorder.tlsHostnameError != nil: - r.addError(errRecorder.tlsHostnameError, ErrorTypeTLSHostNameError, "") + serviceResult.addError(errRecorder.tlsHostnameError, ErrorTypeTLSHostNameError, "") case errRecorder.tlsSystemRootsError != nil: - r.addError(errRecorder.tlsSystemRootsError, ErrorTypeTLSSystemRootsError, "") + serviceResult.addError(errRecorder.tlsSystemRootsError, ErrorTypeTLSSystemRootsError, "") case errRecorder.tlsUnknownAuthorityError != nil: - r.addError(errRecorder.tlsUnknownAuthorityError, ErrorTypeTLSUnknownAuthority, "") + serviceResult.addError(errRecorder.tlsUnknownAuthorityError, ErrorTypeTLSUnknownAuthority, "") case errRecorder.tlsCertificateInvalidError != nil: - r.addError(errRecorder.tlsCertificateInvalidError, ErrorTypeTLSCertificateInvalid, "") + serviceResult.addError(errRecorder.tlsCertificateInvalidError, ErrorTypeTLSCertificateInvalid, "") case errRecorder.unknownErr != nil: - r.addError(errRecorder.unknownErr, ErrorTypeUnknownError, "") + serviceResult.addError(errRecorder.unknownErr, ErrorTypeUnknownError, "") case errRecorder.dnsConfigError != nil: netErr = errRecorder.dnsConfigError - r.addError(errRecorder.dnsConfigError, ErrorTypeDNSConfig, "") + serviceResult.addError(errRecorder.dnsConfigError, ErrorTypeDNSConfig, "") case errRecorder.dnsError != nil: netErr = errRecorder.dnsError - r.addError(errRecorder.dnsError, ErrorTypeDNS, "") + serviceResult.addError(errRecorder.dnsError, ErrorTypeDNS, "") case errRecorder.err != nil: netErr = errRecorder.err - r.addError(errRecorder.err, ErrorTypeUnknownError, "") + serviceResult.addError(errRecorder.err, ErrorTypeUnknownError, "") } if netErr != nil { - r.Timeout = netErr.Timeout() + serviceResult.Timeout = netErr.Timeout() } return } @@ -352,14 +430,50 @@ func (w *Watcher) watch(client *http.Client, errRecorder *dialerErrRecorder) (r // prepare to run the session with cookies cookieJar, _ := cookiejar.New(nil) client.Jar = cookieJar - errSession := w.runSession(r, client) + errSession := serviceWatcher.runSession(serviceResult, client) if errSession != nil { log.Error("session error", errSession) - r.addError(errSession, ErrorTypeSessionFail, "") + serviceResult.addError(errSession, ErrorTypeSessionFail, "") + } + serviceResult.RunTime = time.Since(serviceResult.Timestamp) + + return +} + +// actual host watch +func (hostWatcher *HostWatcher) watchHost(errRecorder *dialerErrRecorder) (hostResult *HostResult) { + + hostResult = NewHostResult(hostWatcher.host.ID) + + pinger, err := ping.NewPinger(hostWatcher.host.Hostname) + + if err != nil { + hostResult.addError(err, ErrorHostLookup, "") + return hostResult + } + + pinger.Count = 1 + pinger.Interval = hostWatcher.host.Interval + pinger.Timeout = hostWatcher.host.Timeout + pinger.SetPrivileged(true) + + fmt.Println(hostWatcher.host.Hostname) + + pinger.OnRecv = func(pkt *ping.Packet) { + hostResult.RunTime = pkt.Rtt + } + pinger.OnFinish = func(stats *ping.Statistics) { + if stats.PacketsRecv == 0 { + hostResult.addError(errors.New("ICMP packet to: "+hostWatcher.host.Hostname+" was lost"), ErrorHostUnreachable, "") + } + } + + err = pinger.Run() // blocking + + if err != nil { + hostResult.addError(err, ErrorHostUnreachable, "") + return hostResult } - r.RunTime = time.Since(r.Timestamp) - // r.addError(errors.New(fmt.Sprint("response time too slow:", runTimeMilliseconds, ", should not be more than:", maxRuntime)), ErrorTypeServerTooSlow) - //r.StatusCode = response.StatusCode return }