Add dbx library: PostgreSQL cluster with master/replica routing, retry, health checking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
90
health.go
Normal file
90
health.go
Normal file
@@ -0,0 +1,90 @@
|
||||
package dbx
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
)
|
||||
|
||||
// healthChecker periodically pings nodes and updates their health state.
|
||||
type healthChecker struct {
|
||||
nodes []*Node
|
||||
cfg HealthCheckConfig
|
||||
logger Logger
|
||||
metrics *MetricsHook
|
||||
stop chan struct{}
|
||||
done chan struct{}
|
||||
}
|
||||
|
||||
func newHealthChecker(nodes []*Node, cfg HealthCheckConfig, logger Logger, metrics *MetricsHook) *healthChecker {
|
||||
return &healthChecker{
|
||||
nodes: nodes,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
metrics: metrics,
|
||||
stop: make(chan struct{}),
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *healthChecker) start() {
|
||||
go h.loop()
|
||||
}
|
||||
|
||||
func (h *healthChecker) loop() {
|
||||
defer close(h.done)
|
||||
|
||||
ticker := time.NewTicker(h.cfg.Interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-h.stop:
|
||||
return
|
||||
case <-ticker.C:
|
||||
h.checkAll()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *healthChecker) checkAll() {
|
||||
for _, node := range h.nodes {
|
||||
h.checkNode(node)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *healthChecker) checkNode(n *Node) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), h.cfg.Timeout)
|
||||
defer cancel()
|
||||
|
||||
err := n.pool.Ping(ctx)
|
||||
wasHealthy := n.healthy.Load()
|
||||
|
||||
if err != nil {
|
||||
n.healthy.Store(false)
|
||||
if wasHealthy {
|
||||
h.logger.Error(ctx, "dbx: node is down",
|
||||
"node", n.name,
|
||||
"error", err,
|
||||
)
|
||||
if h.metrics != nil && h.metrics.OnNodeDown != nil {
|
||||
h.metrics.OnNodeDown(ctx, n.name, err)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
n.healthy.Store(true)
|
||||
if !wasHealthy {
|
||||
h.logger.Info(ctx, "dbx: node is up",
|
||||
"node", n.name,
|
||||
)
|
||||
if h.metrics != nil && h.metrics.OnNodeUp != nil {
|
||||
h.metrics.OnNodeUp(ctx, n.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *healthChecker) shutdown() {
|
||||
close(h.stop)
|
||||
<-h.done
|
||||
}
|
||||
Reference in New Issue
Block a user