feat: M1-M7 gap audit phase 2 — security, TUI, context, router feedback

Gap 6 (M3): 7 new bash security checks (8-14)
- JQ injection, obfuscated flags (Unicode lookalike hyphens),
  /proc/environ access, brace expansion, Unicode whitespace,
  zsh dangerous constructs, comment-quote desync
- Total: 14 checks (was 7)

Gap 7 (M5): Model picker numbered selection
- /model shows numbered sorted list, /model 3 picks by number

Gap 8 (M5): /config set command
- /config set provider.default mistral writes to .gnoma/config.toml
- Whitelisted keys: provider.default, provider.model, permission.mode
- New config/write.go with TOML round-trip via BurntSushi/toml

Gap 9 (M6): Simple token estimator
- EstimateTokens (len/4 heuristic), EstimateMessages (content + overhead)
- PreEstimate on Tracker for proactive compaction triggering

Gap 10 (M7): Router quality feedback from elfs
- Router.Outcome + ReportOutcome (logs for now, M9 bandit uses later)
- Manager tracks armID/taskType per elf via elfMeta map
- Manager.ReportResult called after elf completion in both agent + batch tools
This commit is contained in:
2026-04-04 11:07:08 +02:00
parent 31cba286ac
commit 509c897847
8 changed files with 434 additions and 19 deletions

66
internal/config/write.go Normal file
View File

@@ -0,0 +1,66 @@
package config
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/BurntSushi/toml"
)
// SetProjectConfig writes a single key=value to the project config file (.gnoma/config.toml).
// Only whitelisted keys are supported.
func SetProjectConfig(key, value string) error {
allowed := map[string]bool{
"provider.default": true,
"provider.model": true,
"permission.mode": true,
}
if !allowed[key] {
return fmt.Errorf("unknown config key %q (supported: %s)", key, strings.Join(allowedKeys(), ", "))
}
path := filepath.Join(".gnoma", "config.toml")
// Load existing config or start fresh
var cfg Config
if data, err := os.ReadFile(path); err == nil {
toml.Decode(string(data), &cfg)
}
if cfg.Provider.APIKeys == nil {
cfg.Provider.APIKeys = make(map[string]string)
}
if cfg.Provider.Endpoints == nil {
cfg.Provider.Endpoints = make(map[string]string)
}
// Apply the change
switch key {
case "provider.default":
cfg.Provider.Default = value
case "provider.model":
cfg.Provider.Model = value
case "permission.mode":
cfg.Permission.Mode = value
}
// Ensure directory exists
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return fmt.Errorf("create config dir: %w", err)
}
// Write
f, err := os.Create(path)
if err != nil {
return fmt.Errorf("create config file: %w", err)
}
defer f.Close()
enc := toml.NewEncoder(f)
return enc.Encode(cfg)
}
func allowedKeys() []string {
return []string{"provider.default", "provider.model", "permission.mode"}
}

View File

@@ -118,3 +118,43 @@ func (t *Tracker) State() TokenState {
func (t *Tracker) ShouldCompact() bool {
return t.State() == TokensCritical
}
// PreEstimate adds an estimated token count before the provider reports actual usage.
// Used for proactive compaction triggering before sending a request.
func (t *Tracker) PreEstimate(tokens int64) {
t.current += tokens
}
// EstimateTokens returns a rough token estimate for a text string.
// Heuristic: ~4 characters per token for English text.
func EstimateTokens(text string) int64 {
return int64(len(text)+3) / 4
}
// EstimateMessages returns a rough token estimate for a slice of messages.
func EstimateMessages(msgs []message.Message) int64 {
var total int64
for _, msg := range msgs {
for _, c := range msg.Content {
switch c.Type {
case message.ContentText:
total += EstimateTokens(c.Text)
case message.ContentToolCall:
total += 50 // schema overhead per tool call
if c.ToolCall != nil {
total += EstimateTokens(string(c.ToolCall.Arguments))
}
case message.ContentToolResult:
if c.ToolResult != nil {
total += EstimateTokens(c.ToolResult.Content)
}
case message.ContentThinking:
if c.Thinking != nil {
total += EstimateTokens(c.Thinking.Text)
}
}
}
total += 4 // per-message overhead (role, separators)
}
return total
}

View File

@@ -12,10 +12,17 @@ import (
"somegit.dev/Owlibou/gnoma/internal/tool"
)
// elfMeta tracks routing metadata for quality feedback.
type elfMeta struct {
armID router.ArmID
taskType router.TaskType
}
// Manager spawns, tracks, and manages elfs.
type Manager struct {
mu sync.RWMutex
elfs map[string]Elf
meta map[string]elfMeta // routing metadata per elf ID
router *router.Router
tools *tool.Registry
logger *slog.Logger
@@ -34,6 +41,7 @@ func NewManager(cfg ManagerConfig) *Manager {
}
return &Manager{
elfs: make(map[string]Elf),
meta: make(map[string]elfMeta),
router: cfg.Router,
tools: cfg.Tools,
logger: logger,
@@ -80,12 +88,32 @@ func (m *Manager) Spawn(ctx context.Context, taskType router.TaskType, prompt, s
m.mu.Lock()
m.elfs[elf.ID()] = elf
m.meta[elf.ID()] = elfMeta{armID: arm.ID, taskType: taskType}
m.mu.Unlock()
m.logger.Info("elf spawned", "id", elf.ID(), "arm", arm.ID)
return elf, nil
}
// ReportResult reports an elf's outcome to the router for quality feedback.
func (m *Manager) ReportResult(result Result) {
m.mu.RLock()
meta, ok := m.meta[result.ID]
m.mu.RUnlock()
if !ok {
return
}
m.router.ReportOutcome(router.Outcome{
ArmID: meta.armID,
TaskType: meta.taskType,
Success: result.Status == StatusCompleted,
Tokens: int(result.Usage.TotalTokens()),
Duration: result.Duration,
})
}
// SpawnWithProvider creates an elf using a specific provider (bypasses router).
func (m *Manager) SpawnWithProvider(prov provider.Provider, model, prompt, systemPrompt string, maxTurns int) (Elf, error) {
eng, err := engine.New(engine.Config{

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"log/slog"
"sync"
"time"
"somegit.dev/Owlibou/gnoma/internal/provider"
"somegit.dev/Owlibou/gnoma/internal/stream"
@@ -123,6 +124,27 @@ func (r *Router) RemoveArm(id ArmID) {
delete(r.arms, id)
}
// Outcome records the result of a task execution for quality feedback.
type Outcome struct {
ArmID ArmID
TaskType TaskType
Success bool
Tokens int
Duration time.Duration
}
// ReportOutcome records a task execution result for quality tracking.
// M4: logs only. M9 will use this for bandit learning.
func (r *Router) ReportOutcome(o Outcome) {
r.logger.Debug("outcome reported",
"arm", o.ArmID,
"task", o.TaskType,
"success", o.Success,
"tokens", o.Tokens,
"duration", o.Duration,
)
}
// Arms returns all registered arms.
func (r *Router) Arms() []*Arm {
r.mu.RLock()

View File

@@ -169,6 +169,9 @@ func (t *Tool) Execute(ctx context.Context, args json.RawMessage) (tool.Result,
return tool.Result{Output: "Elf timed out after 5 minutes"}, nil
}
// Report outcome to router for quality feedback
t.manager.ReportResult(result)
// Send done signal — stays in tree until turn completes
doneProgress := elf.Progress{
ElfID: result.ID,

View File

@@ -173,6 +173,9 @@ func (t *BatchTool) Execute(ctx context.Context, args json.RawMessage) (tool.Res
}
}
// Report outcome to router
t.manager.ReportResult(results[idx])
// Send done progress
r := results[idx]
t.sendProgress(elf.Progress{

View File

@@ -10,13 +10,20 @@ import (
type SecurityCheck int
const (
CheckIncomplete SecurityCheck = iota + 1 // fragments, trailing operators
CheckMetacharacters // ; | & $ ` < >
CheckCmdSubstitution // $(), ``, ${}
CheckRedirection // < > >> etc.
CheckDangerousVars // IFS, PATH manipulation
CheckNewlineInjection // embedded newlines
CheckControlChars // ASCII 00-1F (except \n \t)
CheckIncomplete SecurityCheck = iota + 1 // fragments, trailing operators
CheckMetacharacters // ; | & $ ` < >
CheckCmdSubstitution // $(), ``, ${}
CheckRedirection // < > >> etc.
CheckDangerousVars // IFS, PATH manipulation
CheckNewlineInjection // embedded newlines
CheckControlChars // ASCII 00-1F (except \n \t)
CheckJQInjection // jq with shell metacharacters
CheckObfuscatedFlags // Unicode lookalike hyphens
CheckProcEnviron // /proc/*/environ access
CheckBraceExpansion // dangerous {a,b} expansion
CheckUnicodeWhitespace // non-ASCII whitespace
CheckZshDangerous // zsh-specific dangerous constructs
CheckCommentDesync // # inside strings hiding commands
)
// SecurityViolation describes a failed security check.
@@ -61,6 +68,27 @@ func ValidateCommand(cmd string) *SecurityViolation {
if v := checkSensitiveRedirection(cmd); v != nil {
return v
}
if v := checkJQInjection(cmd); v != nil {
return v
}
if v := checkObfuscatedFlags(cmd); v != nil {
return v
}
if v := checkProcEnviron(cmd); v != nil {
return v
}
if v := checkBraceExpansion(cmd); v != nil {
return v
}
if v := checkUnicodeWhitespace(cmd); v != nil {
return v
}
if v := checkZshDangerous(cmd); v != nil {
return v
}
if v := checkCommentQuoteDesync(cmd); v != nil {
return v
}
return nil
}
@@ -238,6 +266,193 @@ func checkSensitiveRedirection(cmd string) *SecurityViolation {
return nil
}
// checkJQInjection detects jq commands with embedded shell metacharacters in the filter.
func checkJQInjection(cmd string) *SecurityViolation {
// Only check commands that invoke jq
if !strings.Contains(cmd, "jq ") && !strings.HasPrefix(cmd, "jq") {
return nil
}
// jq filters with $( or ` indicate shell injection through jq
dangerousInJQ := []string{"$(", "`", "system(", "input|"}
for _, d := range dangerousInJQ {
if strings.Contains(cmd, d) {
return &SecurityViolation{
Check: CheckJQInjection,
Message: fmt.Sprintf("jq command with dangerous pattern: %s", d),
}
}
}
return nil
}
// checkObfuscatedFlags detects Unicode lookalike characters used as hyphens.
// Attackers use en-dash (), em-dash (—), minus sign () instead of ASCII hyphen.
func checkObfuscatedFlags(cmd string) *SecurityViolation {
lookalikes := []rune{
'\u2013', // en-dash
'\u2014', // em-dash —
'\u2212', // minus sign
'\uFE63', // small hyphen-minus ﹣
'\uFF0D', // fullwidth hyphen-minus -
}
for i, r := range cmd {
for _, look := range lookalikes {
if r == look {
return &SecurityViolation{
Check: CheckObfuscatedFlags,
Message: fmt.Sprintf("Unicode lookalike hyphen U+%04X at position %d", r, i),
}
}
}
}
return nil
}
// checkProcEnviron blocks access to /proc/*/environ and /proc/self/mem.
func checkProcEnviron(cmd string) *SecurityViolation {
dangerous := []string{
"/proc/self/environ",
"/proc/self/mem",
"/proc/self/cmdline",
}
lower := strings.ToLower(cmd)
for _, d := range dangerous {
if strings.Contains(lower, d) {
return &SecurityViolation{
Check: CheckProcEnviron,
Message: fmt.Sprintf("access to %s (environment exfiltration)", d),
}
}
}
// Also catch /proc/*/environ with PID
if strings.Contains(lower, "/proc/") && strings.Contains(lower, "/environ") {
return &SecurityViolation{
Check: CheckProcEnviron,
Message: "/proc/PID/environ access (environment exfiltration)",
}
}
return nil
}
// checkBraceExpansion detects dangerous brace expansion patterns.
// {a,b} is used to expand multiple arguments — can bypass argument filters.
func checkBraceExpansion(cmd string) *SecurityViolation {
inSingle := false
inDouble := false
braceDepth := 0
for _, r := range cmd {
if r == '\'' && !inDouble {
inSingle = !inSingle
continue
}
if r == '"' && !inSingle {
inDouble = !inDouble
continue
}
if inSingle || inDouble {
continue
}
if r == '{' {
braceDepth++
}
if r == '}' && braceDepth > 0 {
braceDepth--
}
// Comma inside braces = brace expansion
if r == ',' && braceDepth > 0 {
return &SecurityViolation{
Check: CheckBraceExpansion,
Message: "brace expansion {a,b} (can bypass argument filters)",
}
}
}
return nil
}
// checkUnicodeWhitespace detects non-ASCII whitespace characters that can hide commands.
func checkUnicodeWhitespace(cmd string) *SecurityViolation {
for i, r := range cmd {
if r > 127 && unicode.IsSpace(r) {
return &SecurityViolation{
Check: CheckUnicodeWhitespace,
Message: fmt.Sprintf("non-ASCII whitespace U+%04X at position %d", r, i),
}
}
}
return nil
}
// checkZshDangerous detects zsh-specific dangerous constructs.
func checkZshDangerous(cmd string) *SecurityViolation {
dangerousPatterns := []struct {
pattern string
msg string
}{
{"=(", "zsh process substitution =() (arbitrary execution)"},
{">(", "zsh output process substitution >()"},
{"<(", "zsh input process substitution <()"},
{"zmodload", "zsh module loading (can load arbitrary code)"},
{"sysopen", "zsh sysopen (direct file descriptor access)"},
{"ztcp", "zsh TCP socket access"},
{"zsocket", "zsh socket access"},
}
for _, p := range dangerousPatterns {
if strings.Contains(cmd, p.pattern) {
return &SecurityViolation{
Check: CheckZshDangerous,
Message: p.msg,
}
}
}
return nil
}
// checkCommentQuoteDesync detects # characters that could be interpreted differently
// depending on shell parsing context (e.g., mid-word # in zsh vs bash).
func checkCommentQuoteDesync(cmd string) *SecurityViolation {
inSingle := false
inDouble := false
escaped := false
prevWasSpace := true
for _, r := range cmd {
if escaped {
escaped = false
prevWasSpace = false
continue
}
if r == '\\' && !inSingle {
escaped = true
continue
}
if r == '\'' && !inDouble {
inSingle = !inSingle
prevWasSpace = false
continue
}
if r == '"' && !inSingle {
inDouble = !inDouble
prevWasSpace = false
continue
}
if inSingle || inDouble {
prevWasSpace = false
continue
}
// # at start of word is a comment — legit after whitespace
// # mid-word is suspicious in zsh (history expansion, etc.)
if r == '#' && !prevWasSpace {
return &SecurityViolation{
Check: CheckCommentDesync,
Message: "mid-word # character (comment/history expansion ambiguity)",
}
}
prevWasSpace = unicode.IsSpace(r)
}
return nil
}
// checkDangerousVars blocks attempts to manipulate IFS or PATH.
func checkDangerousVars(cmd string) *SecurityViolation {
upper := strings.ToUpper(cmd)

View File

@@ -6,6 +6,8 @@ import (
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
@@ -14,6 +16,7 @@ import (
"charm.land/glamour/v2"
"charm.land/bubbles/v2/key"
"charm.land/lipgloss/v2"
gnomacfg "somegit.dev/Owlibou/gnoma/internal/config"
"somegit.dev/Owlibou/gnoma/internal/elf"
"somegit.dev/Owlibou/gnoma/internal/engine"
"somegit.dev/Owlibou/gnoma/internal/message"
@@ -377,45 +380,79 @@ func (m Model) handleCommand(cmd string) (tea.Model, tea.Cmd) {
var b strings.Builder
fmt.Fprintf(&b, "current: %s/%s\n", status.Provider, status.Model)
if m.config.Router != nil {
b.WriteString("\nAvailable arms:\n")
for _, arm := range m.config.Router.Arms() {
arms := m.config.Router.Arms()
sort.Slice(arms, func(i, j int) bool {
return string(arms[i].ID) < string(arms[j].ID)
})
b.WriteString("\nAvailable models:\n")
for i, arm := range arms {
marker := " "
if string(arm.ID) == status.Provider+"/"+status.Model {
marker = "→ "
}
caps := ""
var caps []string
if arm.Capabilities.ToolUse {
caps += "tools "
caps = append(caps, "tools")
}
if arm.Capabilities.Thinking {
caps += "thinking "
caps = append(caps, "thinking")
}
if arm.Capabilities.Vision {
caps += "vision "
caps = append(caps, "vision")
}
local := ""
if arm.IsLocal {
local = " (local)"
}
fmt.Fprintf(&b, "%s%s [%s]%s\n", marker, arm.ID, strings.TrimSpace(caps), local)
capStr := ""
if len(caps) > 0 {
capStr = " [" + strings.Join(caps, ", ") + "]"
}
fmt.Fprintf(&b, "%s%d. %s%s%s\n", marker, i+1, arm.ID, capStr, local)
}
}
b.WriteString("\nUsage: /model <model-name>")
b.WriteString("\nUsage: /model <name-or-number>")
m.messages = append(m.messages, chatMessage{role: "system", content: b.String()})
return m, nil
}
if m.config.Engine != nil {
m.config.Engine.SetModel(args)
// Update session status display
modelName := args
// Support numeric selection: /model 3
if n, err := strconv.Atoi(args); err == nil && n >= 1 && m.config.Router != nil {
arms := m.config.Router.Arms()
sort.Slice(arms, func(i, j int) bool {
return string(arms[i].ID) < string(arms[j].ID)
})
if n <= len(arms) {
modelName = arms[n-1].ModelName
}
}
m.config.Engine.SetModel(modelName)
if ls, ok := m.session.(*session.Local); ok {
ls.SetModel(args)
ls.SetModel(modelName)
}
m.messages = append(m.messages, chatMessage{role: "system",
content: fmt.Sprintf("model switched to: %s", args)})
content: fmt.Sprintf("model switched to: %s", modelName)})
}
return m, nil
case "/config":
// /config set <key> <value>
if strings.HasPrefix(args, "set ") {
parts := strings.SplitN(strings.TrimPrefix(args, "set "), " ", 2)
if len(parts) != 2 {
m.messages = append(m.messages, chatMessage{role: "error",
content: "Usage: /config set <key> <value>\nKeys: provider.default, provider.model, permission.mode"})
return m, nil
}
if err := gnomacfg.SetProjectConfig(parts[0], parts[1]); err != nil {
m.messages = append(m.messages, chatMessage{role: "error", content: err.Error()})
} else {
m.messages = append(m.messages, chatMessage{role: "system",
content: fmt.Sprintf("config set: %s = %s (saved to .gnoma/config.toml)", parts[0], parts[1])})
}
return m, nil
}
status := m.session.Status()
var b strings.Builder
b.WriteString("Current configuration:\n")
@@ -430,6 +467,7 @@ func (m Model) handleCommand(cmd string) (tea.Model, tea.Cmd) {
fmt.Fprintf(&b, " git branch: %s\n", m.gitBranch)
}
b.WriteString("\nConfig files: ~/.config/gnoma/config.toml, .gnoma/config.toml")
b.WriteString("\nEdit: /config set <key> <value>")
m.messages = append(m.messages, chatMessage{role: "system", content: b.String()})
return m, nil