Files
tyto/backend/internal/api/routes.go
vikingowl 80f6e788f4 feat: implement server hub for multi-device agent management
Server Package (internal/server/):
- Registry: Agent registration with approval workflow, persistence
- Hub: Connection manager for connected agents, message routing
- GRPCServer: mTLS-enabled gRPC server with interceptors
- SSEBridge: Bridges agent metrics to browser SSE clients

Registry Features:
- JSON file-based persistence
- Agent lifecycle: pending -> approved -> connected -> offline
- Revocation support for certificate-based agent removal
- Automatic last-seen tracking

Hub Features:
- Bidirectional gRPC stream handling
- MetricsSubscriber interface for metric distribution
- Stale connection detection and cleanup
- Broadcast and per-agent command sending

gRPC Server:
- Unary and stream interceptors for auth
- Agent ID extraction from mTLS certificates
- Delegation to Hub for business logic

Agent Management API:
- GET/DELETE /api/v1/agents - List/remove agents
- GET /api/v1/agents/pending - Pending approvals
- POST /api/v1/agents/pending/:id/approve|reject
- GET /api/v1/agents/:id/metrics - Latest agent metrics
- GET /api/v1/agents/connected - Connected agents

Server Mode Startup:
- Full initialization of registry, hub, gRPC, SSE bridge
- Graceful shutdown with signal handling
- Agent mode now uses the agent package

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 07:53:23 +01:00

501 lines
15 KiB
Go

package api
import (
"encoding/json"
"fmt"
"net/http"
"os"
"strconv"
"strings"
"sync"
"syscall"
"time"
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"tyto/internal/config"
"tyto/internal/models"
"tyto/internal/sse"
)
type Server struct {
router *gin.Engine
broker *sse.Broker
cfg *config.Config
rateLimiter *RateLimiter
}
// RateLimiter implements a simple token bucket rate limiter
type RateLimiter struct {
requests map[string][]time.Time
mu sync.Mutex
limit int
window time.Duration
}
func NewRateLimiter(limit int, window time.Duration) *RateLimiter {
return &RateLimiter{
requests: make(map[string][]time.Time),
limit: limit,
window: window,
}
}
func (r *RateLimiter) Allow(ip string) bool {
r.mu.Lock()
defer r.mu.Unlock()
now := time.Now()
cutoff := now.Add(-r.window)
// Clean old requests
var recent []time.Time
for _, t := range r.requests[ip] {
if t.After(cutoff) {
recent = append(recent, t)
}
}
if len(recent) >= r.limit {
r.requests[ip] = recent
return false
}
r.requests[ip] = append(recent, now)
return true
}
func NewServer(cfg *config.Config, broker *sse.Broker) *Server {
gin.SetMode(gin.ReleaseMode)
router := gin.New()
router.Use(gin.Recovery())
// CORS configuration
router.Use(cors.New(cors.Config{
AllowOrigins: []string{"*"},
AllowMethods: []string{"GET", "POST", "OPTIONS"},
AllowHeaders: []string{"Origin", "Content-Type", "Accept", "Authorization"},
ExposeHeaders: []string{"Content-Length"},
AllowCredentials: false,
MaxAge: 12 * time.Hour,
}))
s := &Server{
router: router,
broker: broker,
cfg: cfg,
rateLimiter: NewRateLimiter(100, time.Minute), // 100 requests per minute
}
s.setupRoutes()
return s
}
func (s *Server) setupRoutes() {
// Health check (no auth required)
s.router.GET("/health", s.healthHandler)
// API v1
v1 := s.router.Group("/api/v1")
// Apply rate limiting
v1.Use(s.rateLimitMiddleware())
// Apply basic auth if configured
if s.cfg.AuthEnabled {
v1.Use(s.basicAuthMiddleware())
}
{
v1.GET("/metrics", s.metricsHandler)
v1.GET("/stream", s.streamHandler)
v1.GET("/history", s.historyHandler)
v1.POST("/settings/refresh", s.setRefreshHandler)
v1.GET("/settings/refresh", s.getRefreshHandler)
// Alerts endpoints
v1.GET("/alerts", s.getAlertsHandler)
v1.GET("/alerts/config", s.getAlertConfigHandler)
v1.POST("/alerts/config", s.setAlertConfigHandler)
v1.POST("/alerts/:id/acknowledge", s.acknowledgeAlertHandler)
// Process endpoints
v1.GET("/processes/:pid", s.getProcessDetailHandler)
v1.POST("/processes/:pid/signal", s.sendProcessSignalHandler)
// Export endpoints
v1.GET("/export/metrics", s.exportMetricsHandler)
}
// Prometheus metrics endpoint (no auth, rate limited)
s.router.GET("/metrics", s.rateLimitMiddleware(), s.prometheusHandler)
}
func (s *Server) rateLimitMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
ip := c.ClientIP()
if !s.rateLimiter.Allow(ip) {
c.JSON(http.StatusTooManyRequests, gin.H{"error": "rate limit exceeded"})
c.Abort()
return
}
c.Next()
}
}
func (s *Server) basicAuthMiddleware() gin.HandlerFunc {
return gin.BasicAuth(gin.Accounts{
s.cfg.AuthUser: s.cfg.AuthPass,
})
}
func (s *Server) Run() error {
return s.router.Run(":" + s.cfg.Port)
}
func (s *Server) RunTLS(certFile, keyFile string) error {
return s.router.RunTLS(":"+s.cfg.Port, certFile, keyFile)
}
func (s *Server) healthHandler(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok"})
}
func (s *Server) metricsHandler(c *gin.Context) {
metrics := s.broker.CollectAll()
c.JSON(http.StatusOK, metrics)
}
func (s *Server) historyHandler(c *gin.Context) {
history := s.broker.History.GetAll()
c.JSON(http.StatusOK, history)
}
func (s *Server) streamHandler(c *gin.Context) {
// Set SSE headers
c.Writer.Header().Set("Content-Type", "text/event-stream")
c.Writer.Header().Set("Cache-Control", "no-cache")
c.Writer.Header().Set("Connection", "keep-alive")
c.Writer.Header().Set("X-Accel-Buffering", "no")
c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
// Create client channel
clientChan := make(chan []byte, 10)
s.broker.Register(clientChan)
// Clean up on disconnect
defer s.broker.Unregister(clientChan)
// Send initial data immediately
initial := s.broker.CollectAll()
initialJSON, err := json.Marshal(initial)
if err == nil {
fmt.Fprintf(c.Writer, "data: %s\n\n", initialJSON)
c.Writer.Flush()
}
// Stream data using Server-Sent Events
notify := c.Request.Context().Done()
for {
select {
case <-notify:
return
case data := <-clientChan:
fmt.Fprintf(c.Writer, "data: %s\n\n", data)
c.Writer.Flush()
}
}
}
type RefreshRequest struct {
Interval int `json:"interval"` // seconds
}
func (s *Server) setRefreshHandler(c *gin.Context) {
var req RefreshRequest
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request"})
return
}
if req.Interval < 1 || req.Interval > 60 {
c.JSON(http.StatusBadRequest, gin.H{"error": "interval must be between 1 and 60 seconds"})
return
}
s.broker.SetInterval(time.Duration(req.Interval) * time.Second)
c.JSON(http.StatusOK, gin.H{"interval": req.Interval})
}
func (s *Server) getRefreshHandler(c *gin.Context) {
interval := s.broker.GetInterval()
c.JSON(http.StatusOK, gin.H{"interval": int(interval.Seconds())})
}
func (s *Server) prometheusHandler(c *gin.Context) {
metrics := s.broker.CollectAll()
var sb strings.Builder
// CPU metrics
sb.WriteString(fmt.Sprintf("# HELP sysmon_cpu_usage_percent CPU usage percentage\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_cpu_usage_percent gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_cpu_usage_percent{type=\"total\"} %.2f\n", metrics.CPU.TotalUsage))
for _, core := range metrics.CPU.Cores {
sb.WriteString(fmt.Sprintf("sysmon_cpu_usage_percent{type=\"core\",core=\"%d\"} %.2f\n", core.ID, core.Usage))
}
// Memory metrics
sb.WriteString(fmt.Sprintf("# HELP sysmon_memory_bytes Memory in bytes\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_memory_bytes gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_memory_bytes{type=\"total\"} %d\n", metrics.Memory.Total))
sb.WriteString(fmt.Sprintf("sysmon_memory_bytes{type=\"used\"} %d\n", metrics.Memory.Used))
sb.WriteString(fmt.Sprintf("sysmon_memory_bytes{type=\"available\"} %d\n", metrics.Memory.Available))
sb.WriteString(fmt.Sprintf("sysmon_memory_bytes{type=\"cached\"} %d\n", metrics.Memory.Cached))
// GPU metrics
if metrics.GPU.Available {
sb.WriteString(fmt.Sprintf("# HELP sysmon_gpu_usage_percent GPU usage percentage\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_gpu_usage_percent gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_gpu_usage_percent %d\n", metrics.GPU.Utilization))
sb.WriteString(fmt.Sprintf("# HELP sysmon_gpu_memory_bytes GPU memory in bytes\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_gpu_memory_bytes gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_gpu_memory_bytes{type=\"used\"} %d\n", metrics.GPU.VRAMUsed))
sb.WriteString(fmt.Sprintf("sysmon_gpu_memory_bytes{type=\"total\"} %d\n", metrics.GPU.VRAMTotal))
sb.WriteString(fmt.Sprintf("# HELP sysmon_gpu_temperature_celsius GPU temperature\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_gpu_temperature_celsius gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_gpu_temperature_celsius %.1f\n", metrics.GPU.Temperature))
sb.WriteString(fmt.Sprintf("# HELP sysmon_gpu_power_watts GPU power consumption\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_gpu_power_watts gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_gpu_power_watts %.1f\n", metrics.GPU.PowerWatts))
}
// Temperature metrics
sb.WriteString(fmt.Sprintf("# HELP sysmon_temperature_celsius Temperature sensor readings\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_temperature_celsius gauge\n"))
for _, sensor := range metrics.Temperature.Sensors {
label := sensor.Label
if label == "" {
label = "default"
}
sb.WriteString(fmt.Sprintf("sysmon_temperature_celsius{sensor=\"%s\",label=\"%s\"} %.1f\n",
sensor.Name, label, sensor.Temperature))
}
// Disk metrics
sb.WriteString(fmt.Sprintf("# HELP sysmon_disk_bytes Disk space in bytes\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_disk_bytes gauge\n"))
for _, mount := range metrics.Disk.Mounts {
sb.WriteString(fmt.Sprintf("sysmon_disk_bytes{device=\"%s\",mount=\"%s\",type=\"total\"} %d\n",
mount.Device, mount.MountPoint, mount.Total))
sb.WriteString(fmt.Sprintf("sysmon_disk_bytes{device=\"%s\",mount=\"%s\",type=\"used\"} %d\n",
mount.Device, mount.MountPoint, mount.Used))
}
// Network metrics
sb.WriteString(fmt.Sprintf("# HELP sysmon_network_bytes Network traffic in bytes\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_network_bytes counter\n"))
for _, iface := range metrics.Network.Interfaces {
sb.WriteString(fmt.Sprintf("sysmon_network_bytes{interface=\"%s\",direction=\"rx\"} %d\n",
iface.Name, iface.RxBytes))
sb.WriteString(fmt.Sprintf("sysmon_network_bytes{interface=\"%s\",direction=\"tx\"} %d\n",
iface.Name, iface.TxBytes))
}
// Process count
sb.WriteString(fmt.Sprintf("# HELP sysmon_process_count Number of processes\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_process_count gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_process_count %d\n", metrics.Processes.Total))
// Docker metrics
if metrics.Docker.Available {
sb.WriteString(fmt.Sprintf("# HELP sysmon_docker_containers Docker container counts\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_docker_containers gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_docker_containers{state=\"total\"} %d\n", metrics.Docker.Total))
sb.WriteString(fmt.Sprintf("sysmon_docker_containers{state=\"running\"} %d\n", metrics.Docker.Running))
}
// Systemd metrics
if metrics.Systemd.Available {
sb.WriteString(fmt.Sprintf("# HELP sysmon_systemd_services Systemd service counts\n"))
sb.WriteString(fmt.Sprintf("# TYPE sysmon_systemd_services gauge\n"))
sb.WriteString(fmt.Sprintf("sysmon_systemd_services{state=\"total\"} %d\n", metrics.Systemd.Total))
sb.WriteString(fmt.Sprintf("sysmon_systemd_services{state=\"active\"} %d\n", metrics.Systemd.Active))
sb.WriteString(fmt.Sprintf("sysmon_systemd_services{state=\"failed\"} %d\n", metrics.Systemd.Failed))
}
c.Data(http.StatusOK, "text/plain; charset=utf-8", []byte(sb.String()))
}
func (s *Server) ListenAddr() string {
return fmt.Sprintf(":%s", s.cfg.Port)
}
// Router returns the underlying Gin engine for adding routes.
func (s *Server) Router() *gin.Engine {
return s.router
}
// Alert handlers
func (s *Server) getAlertsHandler(c *gin.Context) {
response := models.AlertsResponse{
Active: s.broker.Alerts.GetActiveAlerts(),
History: s.broker.Alerts.GetAlertHistory(),
Config: s.broker.Alerts.GetConfig(),
}
c.JSON(http.StatusOK, response)
}
func (s *Server) getAlertConfigHandler(c *gin.Context) {
config := s.broker.Alerts.GetConfig()
c.JSON(http.StatusOK, config)
}
func (s *Server) setAlertConfigHandler(c *gin.Context) {
var config models.AlertConfig
if err := c.ShouldBindJSON(&config); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid config format"})
return
}
s.broker.Alerts.SetConfig(config)
c.JSON(http.StatusOK, gin.H{"status": "ok"})
}
func (s *Server) acknowledgeAlertHandler(c *gin.Context) {
alertID := c.Param("id")
if s.broker.Alerts.AcknowledgeAlert(alertID) {
c.JSON(http.StatusOK, gin.H{"status": "acknowledged"})
} else {
c.JSON(http.StatusNotFound, gin.H{"error": "alert not found"})
}
}
func (s *Server) getProcessDetailHandler(c *gin.Context) {
pidStr := c.Param("pid")
pid, err := strconv.Atoi(pidStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid pid"})
return
}
detail, err := s.broker.ProcessCollector.GetProcessDetail(pid)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "process not found"})
return
}
c.JSON(http.StatusOK, detail)
}
type SignalRequest struct {
Signal int `json:"signal"`
}
func (s *Server) sendProcessSignalHandler(c *gin.Context) {
pidStr := c.Param("pid")
pid, err := strconv.Atoi(pidStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid pid"})
return
}
var req SignalRequest
if err := c.ShouldBindJSON(&req); err != nil {
// Default to SIGTERM (15) if no signal specified
req.Signal = 15
}
// Validate signal (common signals: 9=SIGKILL, 15=SIGTERM, 2=SIGINT)
if req.Signal < 1 || req.Signal > 31 {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid signal number"})
return
}
// Find the process
process, err := os.FindProcess(pid)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "process not found"})
return
}
// Send the signal
err = process.Signal(syscall.Signal(req.Signal))
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.JSON(http.StatusOK, gin.H{"status": "signal sent", "pid": pid, "signal": req.Signal})
}
func (s *Server) exportMetricsHandler(c *gin.Context) {
format := c.DefaultQuery("format", "json")
metrics := s.broker.CollectAll()
switch format {
case "csv":
c.Header("Content-Type", "text/csv")
c.Header("Content-Disposition", "attachment; filename=metrics.csv")
c.String(http.StatusOK, metricsToCSV(metrics))
default:
c.Header("Content-Disposition", "attachment; filename=metrics.json")
c.JSON(http.StatusOK, metrics)
}
}
func metricsToCSV(m models.AllMetrics) string {
var sb strings.Builder
// CPU section
sb.WriteString("# CPU Metrics\n")
sb.WriteString("metric,value\n")
sb.WriteString(fmt.Sprintf("cpu_total_usage,%.2f\n", m.CPU.TotalUsage))
sb.WriteString(fmt.Sprintf("cpu_load_1m,%.2f\n", m.CPU.LoadAverage.Load1))
sb.WriteString(fmt.Sprintf("cpu_load_5m,%.2f\n", m.CPU.LoadAverage.Load5))
sb.WriteString(fmt.Sprintf("cpu_load_15m,%.2f\n", m.CPU.LoadAverage.Load15))
// Per-core
sb.WriteString("\n# CPU Cores\n")
sb.WriteString("core,usage,frequency_mhz\n")
for _, core := range m.CPU.Cores {
sb.WriteString(fmt.Sprintf("%d,%.2f,%d\n", core.ID, core.Usage, core.Frequency))
}
// Memory section
sb.WriteString("\n# Memory Metrics\n")
sb.WriteString("metric,bytes\n")
sb.WriteString(fmt.Sprintf("memory_total,%d\n", m.Memory.Total))
sb.WriteString(fmt.Sprintf("memory_used,%d\n", m.Memory.Used))
sb.WriteString(fmt.Sprintf("memory_available,%d\n", m.Memory.Available))
sb.WriteString(fmt.Sprintf("memory_cached,%d\n", m.Memory.Cached))
sb.WriteString(fmt.Sprintf("swap_total,%d\n", m.Memory.SwapTotal))
sb.WriteString(fmt.Sprintf("swap_used,%d\n", m.Memory.SwapUsed))
// GPU section
if m.GPU.Available {
sb.WriteString("\n# GPU Metrics\n")
sb.WriteString("metric,value\n")
sb.WriteString(fmt.Sprintf("gpu_utilization,%d\n", m.GPU.Utilization))
sb.WriteString(fmt.Sprintf("gpu_vram_used,%d\n", m.GPU.VRAMUsed))
sb.WriteString(fmt.Sprintf("gpu_vram_total,%d\n", m.GPU.VRAMTotal))
sb.WriteString(fmt.Sprintf("gpu_temperature,%.1f\n", m.GPU.Temperature))
sb.WriteString(fmt.Sprintf("gpu_power_watts,%.1f\n", m.GPU.PowerWatts))
sb.WriteString(fmt.Sprintf("gpu_clock_mhz,%d\n", m.GPU.ClockGPU))
sb.WriteString(fmt.Sprintf("gpu_memory_clock_mhz,%d\n", m.GPU.ClockMemory))
}
// Temperature section
sb.WriteString("\n# Temperature Sensors\n")
sb.WriteString("sensor,label,temperature_c,critical_c\n")
for _, sensor := range m.Temperature.Sensors {
sb.WriteString(fmt.Sprintf("%s,%s,%.1f,%.1f\n",
sensor.Name, sensor.Label, sensor.Temperature, sensor.Critical))
}
return sb.String()
}