Files
tyto/backend/internal/server/hub.go
vikingowl 80f6e788f4 feat: implement server hub for multi-device agent management
Server Package (internal/server/):
- Registry: Agent registration with approval workflow, persistence
- Hub: Connection manager for connected agents, message routing
- GRPCServer: mTLS-enabled gRPC server with interceptors
- SSEBridge: Bridges agent metrics to browser SSE clients

Registry Features:
- JSON file-based persistence
- Agent lifecycle: pending -> approved -> connected -> offline
- Revocation support for certificate-based agent removal
- Automatic last-seen tracking

Hub Features:
- Bidirectional gRPC stream handling
- MetricsSubscriber interface for metric distribution
- Stale connection detection and cleanup
- Broadcast and per-agent command sending

gRPC Server:
- Unary and stream interceptors for auth
- Agent ID extraction from mTLS certificates
- Delegation to Hub for business logic

Agent Management API:
- GET/DELETE /api/v1/agents - List/remove agents
- GET /api/v1/agents/pending - Pending approvals
- POST /api/v1/agents/pending/:id/approve|reject
- GET /api/v1/agents/:id/metrics - Latest agent metrics
- GET /api/v1/agents/connected - Connected agents

Server Mode Startup:
- Full initialization of registry, hub, gRPC, SSE bridge
- Graceful shutdown with signal handling
- Agent mode now uses the agent package

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 07:53:23 +01:00

514 lines
12 KiB
Go

package server
import (
"context"
"encoding/json"
"log"
"sync"
"time"
"tyto/internal/models"
pb "tyto/internal/proto"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// AgentConnection represents a connected agent.
type AgentConnection struct {
ID string
Stream pb.AgentService_StreamServer
Info *pb.AgentInfo
LastMetrics *models.AllMetrics
LastSeen time.Time
Connected bool
SendCh chan *pb.ServerMessage
cancel context.CancelFunc
}
// MetricsSubscriber receives aggregated metrics from all agents.
type MetricsSubscriber interface {
OnAgentMetrics(agentID string, metrics *models.AllMetrics)
OnAgentConnected(agentID string, info *pb.AgentInfo)
OnAgentDisconnected(agentID string)
}
// Hub manages agent connections and message routing.
type Hub struct {
registry *Registry
config *HubConfig
agents map[string]*AgentConnection
mu sync.RWMutex
subscribers []MetricsSubscriber
// Channels for internal coordination
registerCh chan *AgentConnection
unregisterCh chan string
stopCh chan struct{}
wg sync.WaitGroup
}
// HubConfig contains Hub configuration.
type HubConfig struct {
RequireApproval bool
AutoApprove bool
}
// NewHub creates a new Hub instance.
func NewHub(registry *Registry, config *HubConfig) *Hub {
if config == nil {
config = &HubConfig{
RequireApproval: true,
AutoApprove: false,
}
}
return &Hub{
registry: registry,
config: config,
agents: make(map[string]*AgentConnection),
registerCh: make(chan *AgentConnection, 16),
unregisterCh: make(chan string, 16),
stopCh: make(chan struct{}),
}
}
// Subscribe adds a metrics subscriber.
func (h *Hub) Subscribe(sub MetricsSubscriber) {
h.mu.Lock()
defer h.mu.Unlock()
h.subscribers = append(h.subscribers, sub)
}
// Start begins the hub's event loop.
func (h *Hub) Start() {
h.wg.Add(1)
go h.run()
}
// Stop gracefully shuts down the hub.
func (h *Hub) Stop() {
close(h.stopCh)
h.wg.Wait()
}
func (h *Hub) run() {
defer h.wg.Done()
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
for {
select {
case <-h.stopCh:
h.disconnectAll()
return
case conn := <-h.registerCh:
h.handleRegister(conn)
case agentID := <-h.unregisterCh:
h.handleUnregister(agentID)
case <-ticker.C:
h.checkStaleConnections()
}
}
}
func (h *Hub) handleRegister(conn *AgentConnection) {
h.mu.Lock()
defer h.mu.Unlock()
// Close existing connection if any
if existing, ok := h.agents[conn.ID]; ok {
existing.Connected = false
if existing.cancel != nil {
existing.cancel()
}
close(existing.SendCh)
}
h.agents[conn.ID] = conn
log.Printf("Agent registered: %s", conn.ID)
// Notify subscribers
for _, sub := range h.subscribers {
sub.OnAgentConnected(conn.ID, conn.Info)
}
}
func (h *Hub) handleUnregister(agentID string) {
h.mu.Lock()
defer h.mu.Unlock()
conn, ok := h.agents[agentID]
if !ok {
return
}
conn.Connected = false
delete(h.agents, agentID)
log.Printf("Agent unregistered: %s", agentID)
// Update registry status
h.registry.UpdateStatus(agentID, AgentStatusOffline)
// Notify subscribers
for _, sub := range h.subscribers {
sub.OnAgentDisconnected(agentID)
}
}
func (h *Hub) checkStaleConnections() {
h.mu.RLock()
staleIDs := make([]string, 0)
for id, conn := range h.agents {
if time.Since(conn.LastSeen) > 60*time.Second {
staleIDs = append(staleIDs, id)
}
}
h.mu.RUnlock()
for _, id := range staleIDs {
log.Printf("Removing stale agent: %s", id)
h.unregisterCh <- id
}
}
func (h *Hub) disconnectAll() {
h.mu.Lock()
defer h.mu.Unlock()
for _, conn := range h.agents {
conn.Connected = false
if conn.cancel != nil {
conn.cancel()
}
close(conn.SendCh)
}
h.agents = make(map[string]*AgentConnection)
}
// Register handles agent registration requests.
func (h *Hub) Register(ctx context.Context, req *pb.RegisterRequest) (*pb.RegisterResponse, error) {
agentID := req.AgentId
info := req.Info
// Check if already registered
existing, exists := h.registry.Get(agentID)
if exists {
switch existing.Status {
case AgentStatusRevoked:
return &pb.RegisterResponse{
Status: pb.RegisterStatus_REGISTER_STATUS_REJECTED,
Message: "agent certificate has been revoked",
}, nil
case AgentStatusApproved, AgentStatusConnected:
// Update info and return success
record := &AgentRecord{
ID: agentID,
Hostname: info.Hostname,
OS: info.Os,
Architecture: info.Architecture,
Version: info.Version,
Capabilities: info.Capabilities,
}
h.registry.Register(record)
return &pb.RegisterResponse{
Status: pb.RegisterStatus_REGISTER_STATUS_ALREADY_REGISTERED,
Message: "already registered",
Config: h.getAgentConfig(),
}, nil
case AgentStatusPending:
return &pb.RegisterResponse{
Status: pb.RegisterStatus_REGISTER_STATUS_PENDING_APPROVAL,
Message: "awaiting approval",
}, nil
}
}
// New registration
record := &AgentRecord{
ID: agentID,
Hostname: info.Hostname,
OS: info.Os,
Architecture: info.Architecture,
Version: info.Version,
Capabilities: info.Capabilities,
Status: AgentStatusPending,
}
// Auto-approve if configured
if h.config.AutoApprove || !h.config.RequireApproval {
record.Status = AgentStatusApproved
}
if err := h.registry.Register(record); err != nil {
return nil, status.Errorf(codes.Internal, "registration failed: %v", err)
}
if record.Status == AgentStatusApproved {
return &pb.RegisterResponse{
Status: pb.RegisterStatus_REGISTER_STATUS_ACCEPTED,
Message: "registration accepted",
Config: h.getAgentConfig(),
}, nil
}
return &pb.RegisterResponse{
Status: pb.RegisterStatus_REGISTER_STATUS_PENDING_APPROVAL,
Message: "awaiting approval",
}, nil
}
// Stream handles the bidirectional streaming RPC.
func (h *Hub) Stream(stream pb.AgentService_StreamServer) error {
// Extract agent ID from context (set by auth interceptor)
agentID, ok := AgentIDFromContext(stream.Context())
if !ok {
return status.Error(codes.Unauthenticated, "agent ID not found in context")
}
// Verify agent is approved
if !h.registry.IsApproved(agentID) {
return status.Error(codes.PermissionDenied, "agent not approved")
}
// Create connection
ctx, cancel := context.WithCancel(stream.Context())
conn := &AgentConnection{
ID: agentID,
Stream: stream,
Connected: true,
LastSeen: time.Now(),
SendCh: make(chan *pb.ServerMessage, 16),
cancel: cancel,
}
// Register connection
h.registerCh <- conn
// Update registry status
h.registry.UpdateStatus(agentID, AgentStatusConnected)
// Start sender goroutine
h.wg.Add(1)
go h.sendLoop(conn)
// Receive loop
err := h.receiveLoop(ctx, conn)
// Cleanup
h.unregisterCh <- agentID
return err
}
func (h *Hub) sendLoop(conn *AgentConnection) {
defer h.wg.Done()
for msg := range conn.SendCh {
if err := conn.Stream.Send(msg); err != nil {
log.Printf("Send error for agent %s: %v", conn.ID, err)
return
}
}
}
func (h *Hub) receiveLoop(ctx context.Context, conn *AgentConnection) error {
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
msg, err := conn.Stream.Recv()
if err != nil {
return err
}
conn.LastSeen = time.Now()
h.registry.UpdateLastSeen(conn.ID)
switch payload := msg.Payload.(type) {
case *pb.AgentMessage_Metrics:
h.handleMetrics(conn, payload.Metrics)
case *pb.AgentMessage_Heartbeat:
h.handleHeartbeat(conn, payload.Heartbeat)
case *pb.AgentMessage_Info:
conn.Info = payload.Info
}
}
}
func (h *Hub) handleMetrics(conn *AgentConnection, report *pb.MetricsReport) {
// Deserialize metrics
var metrics models.AllMetrics
if err := json.Unmarshal(report.MetricsJson, &metrics); err != nil {
log.Printf("Failed to unmarshal metrics from %s: %v", conn.ID, err)
return
}
metrics.Timestamp = time.UnixMilli(report.TimestampMs)
conn.LastMetrics = &metrics
// Notify subscribers
h.mu.RLock()
for _, sub := range h.subscribers {
sub.OnAgentMetrics(conn.ID, &metrics)
}
h.mu.RUnlock()
// Send acknowledgment
conn.SendCh <- &pb.ServerMessage{
Payload: &pb.ServerMessage_Ack{
Ack: &pb.Ack{Success: true},
},
}
}
func (h *Hub) handleHeartbeat(conn *AgentConnection, hb *pb.HeartbeatRequest) {
// Just update last seen (already done in receive loop)
log.Printf("Heartbeat from %s (uptime: %ds)", conn.ID, hb.UptimeSeconds)
}
// Heartbeat handles simple heartbeat RPCs.
func (h *Hub) Heartbeat(ctx context.Context, req *pb.HeartbeatRequest) (*pb.HeartbeatResponse, error) {
h.registry.UpdateLastSeen(req.AgentId)
return &pb.HeartbeatResponse{
ServerTimeMs: time.Now().UnixMilli(),
ConfigChanged: false,
}, nil
}
// SendCommand sends a command to a specific agent.
func (h *Hub) SendCommand(agentID string, cmd *pb.Command) error {
h.mu.RLock()
conn, ok := h.agents[agentID]
h.mu.RUnlock()
if !ok || !conn.Connected {
return status.Error(codes.NotFound, "agent not connected")
}
select {
case conn.SendCh <- &pb.ServerMessage{
Payload: &pb.ServerMessage_Command{Command: cmd},
}:
return nil
default:
return status.Error(codes.ResourceExhausted, "agent send buffer full")
}
}
// SendConfigUpdate sends a config update to a specific agent.
func (h *Hub) SendConfigUpdate(agentID string, config *pb.ConfigUpdate) error {
h.mu.RLock()
conn, ok := h.agents[agentID]
h.mu.RUnlock()
if !ok || !conn.Connected {
return status.Error(codes.NotFound, "agent not connected")
}
select {
case conn.SendCh <- &pb.ServerMessage{
Payload: &pb.ServerMessage_Config{Config: config},
}:
return nil
default:
return status.Error(codes.ResourceExhausted, "agent send buffer full")
}
}
// BroadcastCommand sends a command to all connected agents.
func (h *Hub) BroadcastCommand(cmd *pb.Command) {
h.mu.RLock()
defer h.mu.RUnlock()
msg := &pb.ServerMessage{
Payload: &pb.ServerMessage_Command{Command: cmd},
}
for _, conn := range h.agents {
if conn.Connected {
select {
case conn.SendCh <- msg:
default:
log.Printf("Send buffer full for agent %s", conn.ID)
}
}
}
}
// GetConnectedAgents returns a list of currently connected agent IDs.
func (h *Hub) GetConnectedAgents() []string {
h.mu.RLock()
defer h.mu.RUnlock()
ids := make([]string, 0, len(h.agents))
for id, conn := range h.agents {
if conn.Connected {
ids = append(ids, id)
}
}
return ids
}
// GetAgentMetrics returns the last metrics for an agent.
func (h *Hub) GetAgentMetrics(agentID string) (*models.AllMetrics, bool) {
h.mu.RLock()
defer h.mu.RUnlock()
conn, ok := h.agents[agentID]
if !ok || conn.LastMetrics == nil {
return nil, false
}
return conn.LastMetrics, true
}
// GetAllMetrics returns the last metrics for all connected agents.
func (h *Hub) GetAllMetrics() map[string]*models.AllMetrics {
h.mu.RLock()
defer h.mu.RUnlock()
result := make(map[string]*models.AllMetrics, len(h.agents))
for id, conn := range h.agents {
if conn.LastMetrics != nil {
result[id] = conn.LastMetrics
}
}
return result
}
func (h *Hub) getAgentConfig() *pb.AgentConfig {
return &pb.AgentConfig{
CollectionIntervalSeconds: 5,
EnabledCollectors: []string{"cpu", "memory", "disk", "network", "process", "temperature", "gpu"},
}
}
// Context key for agent ID
type contextKey string
const agentIDKey contextKey = "agentID"
// AgentIDFromContext extracts the agent ID from context.
func AgentIDFromContext(ctx context.Context) (string, bool) {
id, ok := ctx.Value(agentIDKey).(string)
return id, ok
}
// ContextWithAgentID returns a context with the agent ID set.
func ContextWithAgentID(ctx context.Context, agentID string) context.Context {
return context.WithValue(ctx, agentIDKey, agentID)
}