Files
tyto/backend/internal/collectors/gpu/intel.go
vikingowl a0a947094d feat: add multi-GPU support and operational modes
Multi-GPU Collection System:
- Add modular GPU collector architecture in collectors/gpu/
- Support AMD (amdgpu), NVIDIA (nvidia-smi), and Intel (i915/xe) GPUs
- GPU Manager auto-detects and aggregates all vendor collectors
- Backward-compatible JSON output for existing frontend

Operational Modes:
- Standalone mode (default): single-host monitoring, no database
- Server mode: multi-device with database, auth, agents (WIP)
- Agent mode: lightweight reporter to central server (WIP)
- Mode selection via TYTO_MODE env var or config.yaml

Configuration Updates:
- Add server config (gRPC port, mTLS settings, registration)
- Add agent config (ID, server URL, TLS certificates)
- Add database config (SQLite/PostgreSQL support)
- Support TYTO_* prefixed environment variables

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-28 07:21:50 +01:00

146 lines
3.6 KiB
Go

package gpu
import (
"os"
"path/filepath"
"strings"
)
// IntelCollector collects metrics from Intel GPUs (integrated and discrete).
// Uses the i915 driver sysfs interface.
type IntelCollector struct {
sysPath string
cards []intelCard
}
// intelCard represents a single Intel GPU.
type intelCard struct {
cardPath string
hwmonPath string
name string
driver string // i915 or xe (newer driver)
}
// NewIntelCollector creates a collector for Intel GPUs.
func NewIntelCollector(sysPath string) *IntelCollector {
return &IntelCollector{
sysPath: sysPath,
cards: make([]intelCard, 0),
}
}
func (c *IntelCollector) Vendor() Vendor {
return VendorIntel
}
// Detect finds all Intel GPUs and returns their count.
func (c *IntelCollector) Detect() int {
c.cards = c.cards[:0]
drmPath := filepath.Join(c.sysPath, "class/drm")
entries, err := os.ReadDir(drmPath)
if err != nil {
return 0
}
for _, entry := range entries {
name := entry.Name()
// Look for card directories, skip render nodes
if !strings.HasPrefix(name, "card") || strings.Contains(name, "-") {
continue
}
devicePath := filepath.Join(drmPath, name, "device")
// Check driver - Intel uses i915 or xe (newer driver)
driverLink, err := os.Readlink(filepath.Join(devicePath, "driver"))
if err != nil {
continue
}
driverName := filepath.Base(driverLink)
if driverName != "i915" && driverName != "xe" {
continue
}
card := intelCard{
cardPath: devicePath,
driver: driverName,
}
// Find hwmon path
hwmonDir := filepath.Join(devicePath, "hwmon")
hwmonEntries, err := os.ReadDir(hwmonDir)
if err == nil && len(hwmonEntries) > 0 {
card.hwmonPath = filepath.Join(hwmonDir, hwmonEntries[0].Name())
}
// Get GPU name from uevent
ueventData, err := os.ReadFile(filepath.Join(devicePath, "uevent"))
if err == nil {
for _, line := range strings.Split(string(ueventData), "\n") {
if strings.HasPrefix(line, "PCI_ID=") {
card.name = strings.TrimPrefix(line, "PCI_ID=")
}
}
}
c.cards = append(c.cards, card)
}
return len(c.cards)
}
// Collect gathers metrics for all detected Intel GPUs.
func (c *IntelCollector) Collect() ([]GPUInfo, error) {
gpus := make([]GPUInfo, 0, len(c.cards))
for i, card := range c.cards {
info := GPUInfo{
Index: i,
Name: card.name,
Vendor: VendorIntel,
Driver: card.driver,
}
// Intel GPU utilization via i915 perf or debugfs
// Try reading from sysfs if available
if val, err := readInt(filepath.Join(card.cardPath, "gt_cur_freq_mhz")); err == nil {
info.ClockCore = val
}
if val, err := readInt(filepath.Join(card.cardPath, "gt_max_freq_mhz")); err == nil {
// Estimate utilization based on frequency ratio
if val > 0 && info.ClockCore > 0 {
info.Utilization = (info.ClockCore * 100) / val
}
}
// Temperature from hwmon
if card.hwmonPath != "" {
if val, err := readInt(filepath.Join(card.hwmonPath, "temp1_input")); err == nil {
info.Temperature = float64(val) / 1000.0
}
// Power (microwatts to watts) - Intel uses energy counter, need to compute
if val, err := readInt(filepath.Join(card.hwmonPath, "power1_average")); err == nil {
info.PowerWatts = float64(val) / 1000000.0
}
}
// Intel discrete GPUs have VRAM, integrated use system RAM
// Try to read local memory info for discrete GPUs
if val, err := readUint64(filepath.Join(card.cardPath, "lmem_total_bytes")); err == nil {
info.MemoryTotal = val
}
if val, err := readUint64(filepath.Join(card.cardPath, "lmem_avail_bytes")); err == nil {
if info.MemoryTotal > 0 {
info.MemoryUsed = info.MemoryTotal - val
}
}
gpus = append(gpus, info)
}
return gpus, nil
}