package apppulse import ( "context" "github.com/rollicks-c/apppulse/health" ) func Run(ctx context.Context, options ...health.WatchDogOption) { health.Run(ctx, options...) } func GetStatus() health.Status { return health.GetStatus() } func Register(name string, options ...health.Option) health.Reporter { return health.Register(name, options...) } func Report(name string, err error) { health.Report(name, err) }
package health import ( "context" "fmt" "time" ) type Reporter func(err error) type Status struct { HasError bool Checks map[string]string Status string } type WatchDogOption func(wd *watchDog) func WithWatchDogInterval(d time.Duration) WatchDogOption { return func(wd *watchDog) { wd.interval = d } } func Run(ctx context.Context, options ...WatchDogOption) { wd := &watchDog{ interval: time.Second * 5, } for _, option := range options { option(wd) } wd.Run(ctx) } func GetStatus() Status { return data.Status } type Option func(*healthCheck) func WithGracePeriod(d time.Duration) Option { return func(hc *healthCheck) { hc.GracePeriod = d } } func WithAutoFailAfter(d time.Duration) Option { return func(hc *healthCheck) { hc.AutoFailAfter = &d } } func Register(name string, options ...Option) Reporter { dataLock.Lock() defer dataLock.Unlock() if data.Checks == nil { data.Checks = make(map[string]healthCheck) } check := healthCheck{ Name: name, GracePeriod: time.Nanosecond * 1, Error: fmt.Errorf("not yet checked"), } for _, option := range options { option(&check) } if prev, ok := data.Checks[name]; ok { check.LastCheck = prev.LastCheck check.IsRecovering = prev.IsRecovering check.Error = prev.Error } data.Checks[name] = check //data.Status = updateStatus(data) return func(err error) { Report(name, err) } } func Report(name string, err error) { dataLock.Lock() defer dataLock.Unlock() if data.Checks == nil { data.Checks = make(map[string]healthCheck) } check, ok := data.Checks[name] if !ok { check = healthCheck{ Name: name, AutoFailAfter: nil, } } check.LastCheck = time.Now() check.IsRecovering = false if check.Error != nil && err == nil { check.IsRecovering = true } check.Error = err data.Checks[name] = check data.Status = updateStatus(data) }
package health import ( "github.com/rs/zerolog/log" "sync" "time" ) var ( dataLock = sync.Mutex{} data = healthData{} ) type healthData struct { Checks map[string]healthCheck Status Status } type healthCheck struct { Name string Error error IsRecovering bool AutoFailAfter *time.Duration GracePeriod time.Duration LastCheck time.Time } func updateStatus(db healthData) Status { hadError := db.Status.HasError status := Status{ HasError: false, Checks: make(map[string]string), Status: "OK", } for _, check := range db.Checks { if check.Error != nil { status.HasError = true status.Status = "ERROR" status.Checks[check.Name] = check.Error.Error() continue } else if check.IsRecovering { status.HasError = true status.Status = "RECOVERING" status.Checks[check.Name] = "recovering..." continue } status.Checks[check.Name] = "OK" } // recover from error if hadError && !status.HasError { log.Info().Msg("health recovered") } return status }
package health import ( "context" "fmt" "time" ) type watchDog struct { interval time.Duration } func (wd watchDog) Run(ctx context.Context) { // setup timer ticker := time.NewTicker(wd.interval) defer ticker.Stop() for { // invoke time-based updates wd.autoFail() wd.recoverFromError() // await next tick select { case <-ticker.C: continue case <-ctx.Done(): return } } } func (wd watchDog) autoFail() { dataLock.Lock() defer dataLock.Unlock() now := time.Now() for name, check := range data.Checks { if check.AutoFailAfter == nil { continue } if check.LastCheck.Add(*check.AutoFailAfter).Before(now) { check.Error = fmt.Errorf("no report since %s", check.LastCheck.Format(time.RFC1123)) data.Checks[name] = check } } data.Status = updateStatus(data) } func (wd watchDog) recoverFromError() { dataLock.Lock() defer dataLock.Unlock() now := time.Now() for name, check := range data.Checks { if !check.IsRecovering { continue } if check.LastCheck.Add(check.GracePeriod).Before(now) { check.Error = nil check.IsRecovering = false data.Checks[name] = check } } data.Status = updateStatus(data) }