Server Package (internal/server/): - Registry: Agent registration with approval workflow, persistence - Hub: Connection manager for connected agents, message routing - GRPCServer: mTLS-enabled gRPC server with interceptors - SSEBridge: Bridges agent metrics to browser SSE clients Registry Features: - JSON file-based persistence - Agent lifecycle: pending -> approved -> connected -> offline - Revocation support for certificate-based agent removal - Automatic last-seen tracking Hub Features: - Bidirectional gRPC stream handling - MetricsSubscriber interface for metric distribution - Stale connection detection and cleanup - Broadcast and per-agent command sending gRPC Server: - Unary and stream interceptors for auth - Agent ID extraction from mTLS certificates - Delegation to Hub for business logic Agent Management API: - GET/DELETE /api/v1/agents - List/remove agents - GET /api/v1/agents/pending - Pending approvals - POST /api/v1/agents/pending/:id/approve|reject - GET /api/v1/agents/:id/metrics - Latest agent metrics - GET /api/v1/agents/connected - Connected agents Server Mode Startup: - Full initialization of registry, hub, gRPC, SSE bridge - Graceful shutdown with signal handling - Agent mode now uses the agent package 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
514 lines
12 KiB
Go
514 lines
12 KiB
Go
package server
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"tyto/internal/models"
|
|
pb "tyto/internal/proto"
|
|
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
// AgentConnection represents a connected agent.
|
|
type AgentConnection struct {
|
|
ID string
|
|
Stream pb.AgentService_StreamServer
|
|
Info *pb.AgentInfo
|
|
LastMetrics *models.AllMetrics
|
|
LastSeen time.Time
|
|
Connected bool
|
|
SendCh chan *pb.ServerMessage
|
|
cancel context.CancelFunc
|
|
}
|
|
|
|
// MetricsSubscriber receives aggregated metrics from all agents.
|
|
type MetricsSubscriber interface {
|
|
OnAgentMetrics(agentID string, metrics *models.AllMetrics)
|
|
OnAgentConnected(agentID string, info *pb.AgentInfo)
|
|
OnAgentDisconnected(agentID string)
|
|
}
|
|
|
|
// Hub manages agent connections and message routing.
|
|
type Hub struct {
|
|
registry *Registry
|
|
config *HubConfig
|
|
agents map[string]*AgentConnection
|
|
mu sync.RWMutex
|
|
subscribers []MetricsSubscriber
|
|
|
|
// Channels for internal coordination
|
|
registerCh chan *AgentConnection
|
|
unregisterCh chan string
|
|
stopCh chan struct{}
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
// HubConfig contains Hub configuration.
|
|
type HubConfig struct {
|
|
RequireApproval bool
|
|
AutoApprove bool
|
|
}
|
|
|
|
// NewHub creates a new Hub instance.
|
|
func NewHub(registry *Registry, config *HubConfig) *Hub {
|
|
if config == nil {
|
|
config = &HubConfig{
|
|
RequireApproval: true,
|
|
AutoApprove: false,
|
|
}
|
|
}
|
|
|
|
return &Hub{
|
|
registry: registry,
|
|
config: config,
|
|
agents: make(map[string]*AgentConnection),
|
|
registerCh: make(chan *AgentConnection, 16),
|
|
unregisterCh: make(chan string, 16),
|
|
stopCh: make(chan struct{}),
|
|
}
|
|
}
|
|
|
|
// Subscribe adds a metrics subscriber.
|
|
func (h *Hub) Subscribe(sub MetricsSubscriber) {
|
|
h.mu.Lock()
|
|
defer h.mu.Unlock()
|
|
h.subscribers = append(h.subscribers, sub)
|
|
}
|
|
|
|
// Start begins the hub's event loop.
|
|
func (h *Hub) Start() {
|
|
h.wg.Add(1)
|
|
go h.run()
|
|
}
|
|
|
|
// Stop gracefully shuts down the hub.
|
|
func (h *Hub) Stop() {
|
|
close(h.stopCh)
|
|
h.wg.Wait()
|
|
}
|
|
|
|
func (h *Hub) run() {
|
|
defer h.wg.Done()
|
|
|
|
ticker := time.NewTicker(30 * time.Second)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-h.stopCh:
|
|
h.disconnectAll()
|
|
return
|
|
|
|
case conn := <-h.registerCh:
|
|
h.handleRegister(conn)
|
|
|
|
case agentID := <-h.unregisterCh:
|
|
h.handleUnregister(agentID)
|
|
|
|
case <-ticker.C:
|
|
h.checkStaleConnections()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *Hub) handleRegister(conn *AgentConnection) {
|
|
h.mu.Lock()
|
|
defer h.mu.Unlock()
|
|
|
|
// Close existing connection if any
|
|
if existing, ok := h.agents[conn.ID]; ok {
|
|
existing.Connected = false
|
|
if existing.cancel != nil {
|
|
existing.cancel()
|
|
}
|
|
close(existing.SendCh)
|
|
}
|
|
|
|
h.agents[conn.ID] = conn
|
|
log.Printf("Agent registered: %s", conn.ID)
|
|
|
|
// Notify subscribers
|
|
for _, sub := range h.subscribers {
|
|
sub.OnAgentConnected(conn.ID, conn.Info)
|
|
}
|
|
}
|
|
|
|
func (h *Hub) handleUnregister(agentID string) {
|
|
h.mu.Lock()
|
|
defer h.mu.Unlock()
|
|
|
|
conn, ok := h.agents[agentID]
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
conn.Connected = false
|
|
delete(h.agents, agentID)
|
|
log.Printf("Agent unregistered: %s", agentID)
|
|
|
|
// Update registry status
|
|
h.registry.UpdateStatus(agentID, AgentStatusOffline)
|
|
|
|
// Notify subscribers
|
|
for _, sub := range h.subscribers {
|
|
sub.OnAgentDisconnected(agentID)
|
|
}
|
|
}
|
|
|
|
func (h *Hub) checkStaleConnections() {
|
|
h.mu.RLock()
|
|
staleIDs := make([]string, 0)
|
|
for id, conn := range h.agents {
|
|
if time.Since(conn.LastSeen) > 60*time.Second {
|
|
staleIDs = append(staleIDs, id)
|
|
}
|
|
}
|
|
h.mu.RUnlock()
|
|
|
|
for _, id := range staleIDs {
|
|
log.Printf("Removing stale agent: %s", id)
|
|
h.unregisterCh <- id
|
|
}
|
|
}
|
|
|
|
func (h *Hub) disconnectAll() {
|
|
h.mu.Lock()
|
|
defer h.mu.Unlock()
|
|
|
|
for _, conn := range h.agents {
|
|
conn.Connected = false
|
|
if conn.cancel != nil {
|
|
conn.cancel()
|
|
}
|
|
close(conn.SendCh)
|
|
}
|
|
h.agents = make(map[string]*AgentConnection)
|
|
}
|
|
|
|
// Register handles agent registration requests.
|
|
func (h *Hub) Register(ctx context.Context, req *pb.RegisterRequest) (*pb.RegisterResponse, error) {
|
|
agentID := req.AgentId
|
|
info := req.Info
|
|
|
|
// Check if already registered
|
|
existing, exists := h.registry.Get(agentID)
|
|
if exists {
|
|
switch existing.Status {
|
|
case AgentStatusRevoked:
|
|
return &pb.RegisterResponse{
|
|
Status: pb.RegisterStatus_REGISTER_STATUS_REJECTED,
|
|
Message: "agent certificate has been revoked",
|
|
}, nil
|
|
|
|
case AgentStatusApproved, AgentStatusConnected:
|
|
// Update info and return success
|
|
record := &AgentRecord{
|
|
ID: agentID,
|
|
Hostname: info.Hostname,
|
|
OS: info.Os,
|
|
Architecture: info.Architecture,
|
|
Version: info.Version,
|
|
Capabilities: info.Capabilities,
|
|
}
|
|
h.registry.Register(record)
|
|
|
|
return &pb.RegisterResponse{
|
|
Status: pb.RegisterStatus_REGISTER_STATUS_ALREADY_REGISTERED,
|
|
Message: "already registered",
|
|
Config: h.getAgentConfig(),
|
|
}, nil
|
|
|
|
case AgentStatusPending:
|
|
return &pb.RegisterResponse{
|
|
Status: pb.RegisterStatus_REGISTER_STATUS_PENDING_APPROVAL,
|
|
Message: "awaiting approval",
|
|
}, nil
|
|
}
|
|
}
|
|
|
|
// New registration
|
|
record := &AgentRecord{
|
|
ID: agentID,
|
|
Hostname: info.Hostname,
|
|
OS: info.Os,
|
|
Architecture: info.Architecture,
|
|
Version: info.Version,
|
|
Capabilities: info.Capabilities,
|
|
Status: AgentStatusPending,
|
|
}
|
|
|
|
// Auto-approve if configured
|
|
if h.config.AutoApprove || !h.config.RequireApproval {
|
|
record.Status = AgentStatusApproved
|
|
}
|
|
|
|
if err := h.registry.Register(record); err != nil {
|
|
return nil, status.Errorf(codes.Internal, "registration failed: %v", err)
|
|
}
|
|
|
|
if record.Status == AgentStatusApproved {
|
|
return &pb.RegisterResponse{
|
|
Status: pb.RegisterStatus_REGISTER_STATUS_ACCEPTED,
|
|
Message: "registration accepted",
|
|
Config: h.getAgentConfig(),
|
|
}, nil
|
|
}
|
|
|
|
return &pb.RegisterResponse{
|
|
Status: pb.RegisterStatus_REGISTER_STATUS_PENDING_APPROVAL,
|
|
Message: "awaiting approval",
|
|
}, nil
|
|
}
|
|
|
|
// Stream handles the bidirectional streaming RPC.
|
|
func (h *Hub) Stream(stream pb.AgentService_StreamServer) error {
|
|
// Extract agent ID from context (set by auth interceptor)
|
|
agentID, ok := AgentIDFromContext(stream.Context())
|
|
if !ok {
|
|
return status.Error(codes.Unauthenticated, "agent ID not found in context")
|
|
}
|
|
|
|
// Verify agent is approved
|
|
if !h.registry.IsApproved(agentID) {
|
|
return status.Error(codes.PermissionDenied, "agent not approved")
|
|
}
|
|
|
|
// Create connection
|
|
ctx, cancel := context.WithCancel(stream.Context())
|
|
conn := &AgentConnection{
|
|
ID: agentID,
|
|
Stream: stream,
|
|
Connected: true,
|
|
LastSeen: time.Now(),
|
|
SendCh: make(chan *pb.ServerMessage, 16),
|
|
cancel: cancel,
|
|
}
|
|
|
|
// Register connection
|
|
h.registerCh <- conn
|
|
|
|
// Update registry status
|
|
h.registry.UpdateStatus(agentID, AgentStatusConnected)
|
|
|
|
// Start sender goroutine
|
|
h.wg.Add(1)
|
|
go h.sendLoop(conn)
|
|
|
|
// Receive loop
|
|
err := h.receiveLoop(ctx, conn)
|
|
|
|
// Cleanup
|
|
h.unregisterCh <- agentID
|
|
|
|
return err
|
|
}
|
|
|
|
func (h *Hub) sendLoop(conn *AgentConnection) {
|
|
defer h.wg.Done()
|
|
|
|
for msg := range conn.SendCh {
|
|
if err := conn.Stream.Send(msg); err != nil {
|
|
log.Printf("Send error for agent %s: %v", conn.ID, err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *Hub) receiveLoop(ctx context.Context, conn *AgentConnection) error {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
|
|
msg, err := conn.Stream.Recv()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
conn.LastSeen = time.Now()
|
|
h.registry.UpdateLastSeen(conn.ID)
|
|
|
|
switch payload := msg.Payload.(type) {
|
|
case *pb.AgentMessage_Metrics:
|
|
h.handleMetrics(conn, payload.Metrics)
|
|
|
|
case *pb.AgentMessage_Heartbeat:
|
|
h.handleHeartbeat(conn, payload.Heartbeat)
|
|
|
|
case *pb.AgentMessage_Info:
|
|
conn.Info = payload.Info
|
|
}
|
|
}
|
|
}
|
|
|
|
func (h *Hub) handleMetrics(conn *AgentConnection, report *pb.MetricsReport) {
|
|
// Deserialize metrics
|
|
var metrics models.AllMetrics
|
|
if err := json.Unmarshal(report.MetricsJson, &metrics); err != nil {
|
|
log.Printf("Failed to unmarshal metrics from %s: %v", conn.ID, err)
|
|
return
|
|
}
|
|
|
|
metrics.Timestamp = time.UnixMilli(report.TimestampMs)
|
|
conn.LastMetrics = &metrics
|
|
|
|
// Notify subscribers
|
|
h.mu.RLock()
|
|
for _, sub := range h.subscribers {
|
|
sub.OnAgentMetrics(conn.ID, &metrics)
|
|
}
|
|
h.mu.RUnlock()
|
|
|
|
// Send acknowledgment
|
|
conn.SendCh <- &pb.ServerMessage{
|
|
Payload: &pb.ServerMessage_Ack{
|
|
Ack: &pb.Ack{Success: true},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (h *Hub) handleHeartbeat(conn *AgentConnection, hb *pb.HeartbeatRequest) {
|
|
// Just update last seen (already done in receive loop)
|
|
log.Printf("Heartbeat from %s (uptime: %ds)", conn.ID, hb.UptimeSeconds)
|
|
}
|
|
|
|
// Heartbeat handles simple heartbeat RPCs.
|
|
func (h *Hub) Heartbeat(ctx context.Context, req *pb.HeartbeatRequest) (*pb.HeartbeatResponse, error) {
|
|
h.registry.UpdateLastSeen(req.AgentId)
|
|
|
|
return &pb.HeartbeatResponse{
|
|
ServerTimeMs: time.Now().UnixMilli(),
|
|
ConfigChanged: false,
|
|
}, nil
|
|
}
|
|
|
|
// SendCommand sends a command to a specific agent.
|
|
func (h *Hub) SendCommand(agentID string, cmd *pb.Command) error {
|
|
h.mu.RLock()
|
|
conn, ok := h.agents[agentID]
|
|
h.mu.RUnlock()
|
|
|
|
if !ok || !conn.Connected {
|
|
return status.Error(codes.NotFound, "agent not connected")
|
|
}
|
|
|
|
select {
|
|
case conn.SendCh <- &pb.ServerMessage{
|
|
Payload: &pb.ServerMessage_Command{Command: cmd},
|
|
}:
|
|
return nil
|
|
default:
|
|
return status.Error(codes.ResourceExhausted, "agent send buffer full")
|
|
}
|
|
}
|
|
|
|
// SendConfigUpdate sends a config update to a specific agent.
|
|
func (h *Hub) SendConfigUpdate(agentID string, config *pb.ConfigUpdate) error {
|
|
h.mu.RLock()
|
|
conn, ok := h.agents[agentID]
|
|
h.mu.RUnlock()
|
|
|
|
if !ok || !conn.Connected {
|
|
return status.Error(codes.NotFound, "agent not connected")
|
|
}
|
|
|
|
select {
|
|
case conn.SendCh <- &pb.ServerMessage{
|
|
Payload: &pb.ServerMessage_Config{Config: config},
|
|
}:
|
|
return nil
|
|
default:
|
|
return status.Error(codes.ResourceExhausted, "agent send buffer full")
|
|
}
|
|
}
|
|
|
|
// BroadcastCommand sends a command to all connected agents.
|
|
func (h *Hub) BroadcastCommand(cmd *pb.Command) {
|
|
h.mu.RLock()
|
|
defer h.mu.RUnlock()
|
|
|
|
msg := &pb.ServerMessage{
|
|
Payload: &pb.ServerMessage_Command{Command: cmd},
|
|
}
|
|
|
|
for _, conn := range h.agents {
|
|
if conn.Connected {
|
|
select {
|
|
case conn.SendCh <- msg:
|
|
default:
|
|
log.Printf("Send buffer full for agent %s", conn.ID)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// GetConnectedAgents returns a list of currently connected agent IDs.
|
|
func (h *Hub) GetConnectedAgents() []string {
|
|
h.mu.RLock()
|
|
defer h.mu.RUnlock()
|
|
|
|
ids := make([]string, 0, len(h.agents))
|
|
for id, conn := range h.agents {
|
|
if conn.Connected {
|
|
ids = append(ids, id)
|
|
}
|
|
}
|
|
return ids
|
|
}
|
|
|
|
// GetAgentMetrics returns the last metrics for an agent.
|
|
func (h *Hub) GetAgentMetrics(agentID string) (*models.AllMetrics, bool) {
|
|
h.mu.RLock()
|
|
defer h.mu.RUnlock()
|
|
|
|
conn, ok := h.agents[agentID]
|
|
if !ok || conn.LastMetrics == nil {
|
|
return nil, false
|
|
}
|
|
return conn.LastMetrics, true
|
|
}
|
|
|
|
// GetAllMetrics returns the last metrics for all connected agents.
|
|
func (h *Hub) GetAllMetrics() map[string]*models.AllMetrics {
|
|
h.mu.RLock()
|
|
defer h.mu.RUnlock()
|
|
|
|
result := make(map[string]*models.AllMetrics, len(h.agents))
|
|
for id, conn := range h.agents {
|
|
if conn.LastMetrics != nil {
|
|
result[id] = conn.LastMetrics
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (h *Hub) getAgentConfig() *pb.AgentConfig {
|
|
return &pb.AgentConfig{
|
|
CollectionIntervalSeconds: 5,
|
|
EnabledCollectors: []string{"cpu", "memory", "disk", "network", "process", "temperature", "gpu"},
|
|
}
|
|
}
|
|
|
|
// Context key for agent ID
|
|
type contextKey string
|
|
|
|
const agentIDKey contextKey = "agentID"
|
|
|
|
// AgentIDFromContext extracts the agent ID from context.
|
|
func AgentIDFromContext(ctx context.Context) (string, bool) {
|
|
id, ok := ctx.Value(agentIDKey).(string)
|
|
return id, ok
|
|
}
|
|
|
|
// ContextWithAgentID returns a context with the agent ID set.
|
|
func ContextWithAgentID(ctx context.Context, agentID string) context.Context {
|
|
return context.WithValue(ctx, agentIDKey, agentID)
|
|
}
|