package health import ( "context" "fmt" "net/http" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "go.uber.org/zap" "github.com/guildhouse-co/kedge/internal/config" ) // PeerCounter is implemented by the mesh manager. type PeerCounter interface { PeerCount() int } // VLANCounter is implemented by the VLAN manager. type VLANCounter interface { VLANCount() int } var ( meshPeerGauge = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "kedge", Subsystem: "mesh", Name: "peer_count", Help: "Number of active WireGuard mesh peers", }) vlanInterfaceGauge = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "kedge", Subsystem: "vlan", Name: "interface_count", Help: "Number of managed VLAN interfaces", }) sessionTransitCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "kedge", Subsystem: "quartermaster", Name: "session_transits_total", Help: "Total SessionTransitArtifacts submitted to Quartermaster", }) networkMutationCounter = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "kedge", Subsystem: "quartermaster", Name: "network_mutations_total", Help: "Total NetworkMutationArtifacts submitted to Quartermaster", }) tunnelStatusGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "kedge", Subsystem: "mesh", Name: "tunnel_up", Help: "WireGuard tunnel status (1=up, 0=down)", }, []string{"peer"}) ) func init() { prometheus.MustRegister( meshPeerGauge, vlanInterfaceGauge, sessionTransitCounter, networkMutationCounter, tunnelStatusGauge, ) } // Server serves health checks and Prometheus metrics. type Server struct { cfg config.HealthConfig peers PeerCounter vlans VLANCounter log *zap.SugaredLogger } // NewServer creates a new health and metrics server. func NewServer(cfg config.HealthConfig, peers PeerCounter, vlans VLANCounter, log *zap.SugaredLogger) *Server { return &Server{cfg: cfg, peers: peers, vlans: vlans, log: log} } // Run starts the HTTP server for health checks and metrics. func (s *Server) Run(ctx context.Context) error { mux := http.NewServeMux() mux.HandleFunc("/healthz", s.handleHealthz) mux.HandleFunc("/readyz", s.handleReadyz) mux.Handle("/metrics", promhttp.Handler()) srv := &http.Server{ Addr: s.cfg.ListenAddr, Handler: mux, } go func() { <-ctx.Done() srv.Close() }() s.log.Infof("health server listening on %s", s.cfg.ListenAddr) if err := srv.ListenAndServe(); err != http.ErrServerClosed { return fmt.Errorf("health server error: %w", err) } return nil } func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) fmt.Fprintln(w, "ok") } func (s *Server) handleReadyz(w http.ResponseWriter, r *http.Request) { // Update metrics. if s.peers != nil { meshPeerGauge.Set(float64(s.peers.PeerCount())) } if s.vlans != nil { vlanInterfaceGauge.Set(float64(s.vlans.VLANCount())) } w.WriteHeader(http.StatusOK) fmt.Fprintln(w, "ok") } // RecordSessionTransit increments the session transit counter. func RecordSessionTransit() { sessionTransitCounter.Inc() } // RecordNetworkMutation increments the network mutation counter. func RecordNetworkMutation() { networkMutationCounter.Inc() }