fix(ai): per-model cost calc + thinking toggle and token tracking

estimateCost ignored the model name and billed every Gemini call at
hardcoded flash-lite rates ($0.10 / $0.40 per 1M), under-counting Pro
calls by ~12-25x. Switch to priceFor(model) and prefer resp.ModelVersion
so aliases like gemini-pro-latest resolve to their concrete family.

Capture ThoughtsTokenCount as a separate ThinkingTokens column on
ai_usage (migration 000030) and bill it at the output rate.

Add a global thinking on/off toggle that mirrors the grounding pattern:
provider holds an in-memory cache (read at startup from settings.Store),
handler keeps it in sync, Chat() applies ThinkingConfig.ThinkingBudget=0
only when disabled. Default true preserves SDK behavior. Grounding+
thinking get/set helpers folded into shared getBool/setBool to keep
goconst happy.

Web admin settings: new "Modell-Reasoning" toggle card; usage panel sums
include thinking tokens. Types are optional with `?? 0` defaults so a
brief web-before-backend rollout window cannot render NaN.
This commit is contained in:
2026-04-28 12:56:04 +02:00
parent 34a3da6e8b
commit ba4dce1f76
13 changed files with 309 additions and 41 deletions

View File

@@ -19,6 +19,7 @@ type AIStatus struct {
APIKeyFingerprint string `json:"api_key_fingerprint,omitempty"`
GroundingEnabled bool `json:"grounding_enabled"`
GroundingQuota int `json:"grounding_quota"`
ThinkingEnabled bool `json:"thinking_enabled"`
Usage UsageSummary `json:"usage"`
}
@@ -55,6 +56,7 @@ func (h *Handler) GetAI(c *gin.Context) {
}
grounding, _ := h.store.GetGroundingEnabled(ctx)
thinking, _ := h.store.GetThinkingEnabled(ctx)
today, _ := h.usageRepo.Today(ctx)
month, _ := h.usageRepo.Month(ctx)
@@ -68,6 +70,7 @@ func (h *Handler) GetAI(c *gin.Context) {
APIKeyFingerprint: fingerprint,
GroundingEnabled: grounding,
GroundingQuota: 1500,
ThinkingEnabled: thinking,
Usage: UsageSummary{
Today: today,
Month: month,
@@ -150,6 +153,24 @@ func (h *Handler) SetGrounding(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"data": gin.H{"grounding_enabled": req.Enabled}})
}
func (h *Handler) SetThinking(c *gin.Context) {
ctx := c.Request.Context()
var req struct {
Enabled bool `json:"enabled"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "enabled is required"})
return
}
userID := callerID(c)
if err := h.store.SetThinkingEnabled(ctx, req.Enabled, userID); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save thinking setting"})
return
}
h.provider.SetThinkingEnabled(req.Enabled)
c.JSON(http.StatusOK, gin.H{"data": gin.H{"thinking_enabled": req.Enabled}})
}
func (h *Handler) GetUsage(c *gin.Context) {
ctx := c.Request.Context()
limit := 50

View File

@@ -8,5 +8,6 @@ func RegisterRoutes(rg *gin.RouterGroup, h *Handler, requireAuth, requireAdmin g
admin.POST("/settings/ai/model", h.SetModel)
admin.POST("/settings/ai/key", h.SetAPIKey)
admin.POST("/settings/ai/grounding", h.SetGrounding)
admin.POST("/settings/ai/thinking", h.SetThinking)
admin.GET("/settings/ai/usage", h.GetUsage)
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"strconv"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
@@ -16,6 +17,7 @@ const (
keyAPIKey = "gemini.api_key"
keyModel = "gemini.model"
keyGroundingEnabled = "gemini.grounding_enabled"
keyThinkingEnabled = "gemini.thinking_enabled"
)
// Store persists AI provider configuration in system_settings.
@@ -71,19 +73,36 @@ func (s *Store) SetModel(ctx context.Context, model string, updatedBy uuid.UUID)
}
func (s *Store) GetGroundingEnabled(ctx context.Context) (bool, error) {
v, err := s.getText(ctx, keyGroundingEnabled, "true")
if err != nil {
return true, err
}
return v != "false", nil
return s.getBool(ctx, keyGroundingEnabled, true)
}
func (s *Store) SetGroundingEnabled(ctx context.Context, enabled bool, updatedBy uuid.UUID) error {
v := "false"
if enabled {
v = "true"
return s.setBool(ctx, keyGroundingEnabled, enabled, updatedBy)
}
// GetThinkingEnabled controls whether the provider sends ThinkingConfig.ThinkingBudget=0
// to disable model reasoning. Default is true (preserves SDK default of dynamic thinking).
func (s *Store) GetThinkingEnabled(ctx context.Context) (bool, error) {
return s.getBool(ctx, keyThinkingEnabled, true)
}
func (s *Store) SetThinkingEnabled(ctx context.Context, enabled bool, updatedBy uuid.UUID) error {
return s.setBool(ctx, keyThinkingEnabled, enabled, updatedBy)
}
func (s *Store) getBool(ctx context.Context, key string, fallback bool) (bool, error) {
v, err := s.getText(ctx, key, strconv.FormatBool(fallback))
if err != nil {
return fallback, err
}
return s.setText(ctx, keyGroundingEnabled, v, updatedBy)
if b, parseErr := strconv.ParseBool(v); parseErr == nil {
return b, nil
}
return fallback, nil
}
func (s *Store) setBool(ctx context.Context, key string, enabled bool, updatedBy uuid.UUID) error {
return s.setText(ctx, key, strconv.FormatBool(enabled), updatedBy)
}
func (s *Store) getText(ctx context.Context, key, fallback string) (string, error) {

View File

@@ -31,10 +31,10 @@ func (r *UsageRepo) Record(ctx context.Context, e ai.UsageEvent) error {
}
_, err := r.db.Exec(ctx, `
INSERT INTO ai_usage
(provider, model, call_type, input_tokens, output_tokens,
(provider, model, call_type, input_tokens, output_tokens, thinking_tokens,
grounded, duration_ms, estimated_cost_usd, error, prompt_version)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
`, e.Provider, e.Model, e.CallType, e.InputTokens, e.OutputTokens,
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)
`, e.Provider, e.Model, e.CallType, e.InputTokens, e.OutputTokens, e.ThinkingTokens,
e.Grounded, e.DurationMs, e.EstimatedCostUSD, errStr, promptVersion)
if err != nil {
return fmt.Errorf("usage: record: %w", err)
@@ -47,6 +47,7 @@ type UsageStats struct {
Calls int `json:"calls"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
ThinkingTokens int `json:"thinking_tokens"`
GroundingCalls int `json:"grounding_calls"`
EstimatedCostUSD float64 `json:"estimated_cost_usd"`
}
@@ -71,16 +72,17 @@ func (r *UsageRepo) GroundingToday(ctx context.Context) (int, error) {
func (r *UsageRepo) statsWindow(ctx context.Context, interval string) (UsageStats, error) {
row := r.db.QueryRow(ctx, fmt.Sprintf(`
SELECT
COUNT(*) AS calls,
COALESCE(SUM(input_tokens),0) AS input_tokens,
COALESCE(SUM(output_tokens),0) AS output_tokens,
COUNT(*) AS calls,
COALESCE(SUM(input_tokens),0) AS input_tokens,
COALESCE(SUM(output_tokens),0) AS output_tokens,
COALESCE(SUM(thinking_tokens),0) AS thinking_tokens,
COALESCE(SUM(CASE WHEN grounded THEN 1 ELSE 0 END),0) AS grounding_calls,
COALESCE(SUM(estimated_cost_usd),0) AS cost
FROM ai_usage
WHERE created_at >= now() - INTERVAL '%s'
`, interval))
var s UsageStats
if err := row.Scan(&s.Calls, &s.InputTokens, &s.OutputTokens, &s.GroundingCalls, &s.EstimatedCostUSD); err != nil {
if err := row.Scan(&s.Calls, &s.InputTokens, &s.OutputTokens, &s.ThinkingTokens, &s.GroundingCalls, &s.EstimatedCostUSD); err != nil {
return s, fmt.Errorf("usage: stats(%s): %w", interval, err)
}
return s, nil
@@ -95,6 +97,7 @@ type UsageEvent struct {
CallType string `json:"call_type"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
ThinkingTokens int `json:"thinking_tokens"`
Grounded bool `json:"grounded"`
DurationMs int `json:"duration_ms"`
EstimatedCostUSD float64 `json:"estimated_cost_usd"`
@@ -105,7 +108,7 @@ type UsageEvent struct {
func (r *UsageRepo) Recent(ctx context.Context, limit int) ([]UsageEvent, error) {
rows, err := r.db.Query(ctx, `
SELECT id, created_at, provider, model, call_type,
input_tokens, output_tokens, grounded, duration_ms,
input_tokens, output_tokens, thinking_tokens, grounded, duration_ms,
estimated_cost_usd, error, prompt_version
FROM ai_usage
ORDER BY created_at DESC
@@ -120,7 +123,7 @@ func (r *UsageRepo) Recent(ctx context.Context, limit int) ([]UsageEvent, error)
for rows.Next() {
var e UsageEvent
if err := rows.Scan(&e.ID, &e.CreatedAt, &e.Provider, &e.Model, &e.CallType,
&e.InputTokens, &e.OutputTokens, &e.Grounded, &e.DurationMs,
&e.InputTokens, &e.OutputTokens, &e.ThinkingTokens, &e.Grounded, &e.DurationMs,
&e.EstimatedCostUSD, &e.Error, &e.PromptVersion); err != nil {
return nil, fmt.Errorf("usage: scan: %w", err)
}

View File

@@ -11,6 +11,7 @@ import (
type KeySource interface {
GetGeminiAPIKey(ctx context.Context) (string, error)
GetModel(ctx context.Context) (string, error)
GetThinkingEnabled(ctx context.Context) (bool, error)
}
// NewFromConfig creates a GeminiProvider. It reads the API key from store first;
@@ -31,11 +32,22 @@ func NewFromConfig(ctx context.Context, cfg config.AIConfig, store KeySource, re
model = "gemini-2.5-flash-lite"
}
if apiKey == "" {
// No key available. Return an unconfigured provider that will fail on use,
// but allows the server to start so the operator can configure the key via UI.
return newUnconfiguredGeminiProvider(model, recorder), nil
thinking, terr := store.GetThinkingEnabled(ctx)
if terr != nil {
slog.Warn("ai: could not read thinking setting; defaulting to enabled", "error", terr)
thinking = true
}
return NewGeminiProvider(ctx, apiKey, model, recorder)
if apiKey == "" {
p := newUnconfiguredGeminiProvider(model, recorder)
p.SetThinkingEnabled(thinking)
return p, nil
}
p, err := NewGeminiProvider(ctx, apiKey, model, recorder)
if err != nil {
return nil, err
}
p.SetThinkingEnabled(thinking)
return p, nil
}

View File

@@ -101,13 +101,11 @@ func filterCompatibleModels(items []*genai.Model) []ModelInfo {
return out
}
// Gemini API pricing (as of 2026-04). Refresh constants when pricing changes.
// Gemini grounding pricing (as of 2026-04). Per-model token rates live in geminiPricing.
// https://ai.google.dev/gemini-api/docs/pricing
const (
geminiInputCostPerToken = 0.10 / 1_000_000 // $0.10 / 1M tokens
geminiOutputCostPerToken = 0.40 / 1_000_000 // $0.40 / 1M tokens
geminiGroundingCostPer1k = 35.0 / 1_000 // $35 / 1k grounded prompts (above free tier)
geminiGroundingFreeDaily = 1_500 // daily free grounding requests
geminiGroundingCostPer1k = 35.0 / 1_000 // $35 / 1k grounded prompts (above free tier)
geminiGroundingFreeDaily = 1_500 // daily free grounding requests
)
type GeminiProvider struct {
@@ -116,6 +114,11 @@ type GeminiProvider struct {
model string
recorder UsageRecorder
// thinkingEnabled mirrors the persisted setting. When false, Chat() sets
// ThinkingConfig.ThinkingBudget=0 to disable reasoning on capable models.
// Default true preserves the SDK default of dynamic thinking.
thinkingEnabled bool
// groundingCallsToday is an in-process counter used for cost estimation only.
// It is not persisted and resets on restart. The authoritative count lives in ai_usage.
groundingCallsToday int
@@ -126,9 +129,10 @@ type GeminiProvider struct {
// All Chat calls return ErrInternal until Reinitialize is called.
func newUnconfiguredGeminiProvider(model string, recorder UsageRecorder) *GeminiProvider {
return &GeminiProvider{
model: model,
recorder: recorder,
groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
model: model,
recorder: recorder,
thinkingEnabled: true,
groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
}
}
@@ -157,10 +161,11 @@ func NewGeminiProvider(ctx context.Context, apiKey, model string, recorder Usage
return nil, fmt.Errorf("gemini: new client: %w", err)
}
return &GeminiProvider{
client: client,
model: model,
recorder: recorder,
groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
client: client,
model: model,
recorder: recorder,
thinkingEnabled: true,
groundingDate: time.Now().UTC().Truncate(24 * time.Hour),
}, nil
}
@@ -181,6 +186,18 @@ func (p *GeminiProvider) SetModel(model string) {
p.model = model
}
func (p *GeminiProvider) ThinkingEnabled() bool {
p.mu.RLock()
defer p.mu.RUnlock()
return p.thinkingEnabled
}
func (p *GeminiProvider) SetThinkingEnabled(enabled bool) {
p.mu.Lock()
defer p.mu.Unlock()
p.thinkingEnabled = enabled
}
func (p *GeminiProvider) ListModels(ctx context.Context) ([]ModelInfo, error) {
p.mu.RLock()
client := p.client
@@ -244,6 +261,13 @@ func (p *GeminiProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatRespo
}
}
// Disable thinking for thinking-capable models when the operator has opted out.
// SDK default (no ThinkingConfig) keeps dynamic thinking on.
if !p.ThinkingEnabled() {
zero := int32(0)
cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingBudget: &zero}
}
resp, err := client.Models.GenerateContent(ctx, model,
genai.Text(req.UserMessage), cfg)
@@ -303,14 +327,28 @@ func (p *GeminiProvider) buildUsageEvent(model string, req *ChatRequest, resp *g
if resp != nil && resp.UsageMetadata != nil {
e.InputTokens = int(resp.UsageMetadata.PromptTokenCount)
e.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount)
e.ThinkingTokens = int(resp.UsageMetadata.ThoughtsTokenCount)
}
e.EstimatedCostUSD = p.estimateCost(e.InputTokens, e.OutputTokens, req.Grounded)
// Aliases like "gemini-pro-latest" don't match priceFor; the resolved name from
// the response (e.g. "gemini-2.5-pro-002") does. Prefer it when present.
pricingModel := model
if resp != nil && resp.ModelVersion != "" {
pricingModel = resp.ModelVersion
}
e.EstimatedCostUSD = p.estimateCost(pricingModel, e.InputTokens, e.OutputTokens+e.ThinkingTokens, req.Grounded)
return e
}
func (p *GeminiProvider) estimateCost(inputTokens, outputTokens int, grounded bool) float64 {
cost := float64(inputTokens)*geminiInputCostPerToken +
float64(outputTokens)*geminiOutputCostPerToken
// estimateCost returns USD for the given token counts at the model's published rate.
// outputTokens should already include any thinking tokens (Gemini bills thoughts at the output rate).
// TODO: handle the >200K input tier for 2.5-pro / 3.1-pro if prompts ever exceed that.
func (p *GeminiProvider) estimateCost(model string, inputTokens, outputTokens int, grounded bool) float64 {
inUSDPerM, outUSDPerM := priceFor(model)
cost := float64(inputTokens)*inUSDPerM/1_000_000 +
float64(outputTokens)*outUSDPerM/1_000_000
if inUSDPerM == 0 && outUSDPerM == 0 && (inputTokens > 0 || outputTokens > 0) && model != "" {
slog.Warn("ai: unknown model for pricing — estimated cost is 0", "model", model)
}
if grounded {
p.mu.Lock()
today := time.Now().UTC().Truncate(24 * time.Hour)

View File

@@ -156,6 +156,96 @@ func TestPriceFor_UnknownReturnsZero(t *testing.T) {
}
}
func TestThinkingEnabled_DefaultsTrueAndIsTogglable(t *testing.T) {
p := newUnconfiguredGeminiProvider("gemini-2.5-pro", nil)
if !p.ThinkingEnabled() {
t.Errorf("default ThinkingEnabled = false; want true (preserves SDK default)")
}
p.SetThinkingEnabled(false)
if p.ThinkingEnabled() {
t.Errorf("after SetThinkingEnabled(false), still true")
}
p.SetThinkingEnabled(true)
if !p.ThinkingEnabled() {
t.Errorf("after SetThinkingEnabled(true), still false")
}
}
func TestEstimateCost_UsesPriceForModel(t *testing.T) {
p := &GeminiProvider{}
cases := []struct {
name string
model string
inputTokens int
outputTokens int
wantUSD float64
}{
{"2.5-pro 1M+1M", "gemini-2.5-pro", 1_000_000, 1_000_000, 1.25 + 10.00},
{"2.5-flash 1M+1M", "gemini-2.5-flash", 1_000_000, 1_000_000, 0.30 + 2.50},
{"2.5-flash-lite 1M+1M", "gemini-2.5-flash-lite", 1_000_000, 1_000_000, 0.10 + 0.40},
{"3.1-pro 1M+1M", "gemini-3.1-pro", 1_000_000, 1_000_000, 2.00 + 12.00},
{"resolved alias 2.5-pro-002", "gemini-2.5-pro-002", 1000, 500, 1000*1.25/1_000_000 + 500*10.0/1_000_000},
{"unknown returns zero", "gemini-pro-latest", 1000, 1000, 0},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := p.estimateCost(tc.model, tc.inputTokens, tc.outputTokens, false)
if got != tc.wantUSD {
t.Errorf("estimateCost(%q, %d, %d) = %v; want %v",
tc.model, tc.inputTokens, tc.outputTokens, got, tc.wantUSD)
}
})
}
}
func TestBuildUsageEvent_PrefersResolvedModelVersion(t *testing.T) {
p := &GeminiProvider{}
resp := &genai.GenerateContentResponse{
ModelVersion: "gemini-2.5-pro-002",
UsageMetadata: &genai.GenerateContentResponseUsageMetadata{
PromptTokenCount: 1000,
CandidatesTokenCount: 500,
},
}
req := &ChatRequest{CallType: "research"}
// Caller passes the alias; resolved name from response should drive pricing.
e := p.buildUsageEvent("gemini-pro-latest", req, resp, nil, 100)
wantCost := float64(1000)*1.25/1_000_000 + float64(500)*10.0/1_000_000
if e.EstimatedCostUSD != wantCost {
t.Errorf("EstimatedCostUSD = %v; want %v (resolved model should price as 2.5-pro)",
e.EstimatedCostUSD, wantCost)
}
}
func TestBuildUsageEvent_BillsThoughtsTokens(t *testing.T) {
p := &GeminiProvider{}
resp := &genai.GenerateContentResponse{
ModelVersion: "gemini-2.5-pro",
UsageMetadata: &genai.GenerateContentResponseUsageMetadata{
PromptTokenCount: 1000,
CandidatesTokenCount: 500,
ThoughtsTokenCount: 200,
},
}
req := &ChatRequest{CallType: "research"}
e := p.buildUsageEvent("gemini-2.5-pro", req, resp, nil, 100)
if e.ThinkingTokens != 200 {
t.Errorf("ThinkingTokens = %d; want 200", e.ThinkingTokens)
}
if e.OutputTokens != 500 {
t.Errorf("OutputTokens = %d; want 500 (candidates only, thoughts tracked separately)", e.OutputTokens)
}
// Cost: input @ 1.25/1M, (output + thoughts) @ 10/1M
wantCost := float64(1000)*1.25/1_000_000 + float64(500+200)*10.0/1_000_000
if e.EstimatedCostUSD != wantCost {
t.Errorf("EstimatedCostUSD = %v; want %v (thoughts billed at output rate)",
e.EstimatedCostUSD, wantCost)
}
}
func modelNames(ms []ModelInfo) []string {
names := make([]string, len(ms))
for i, m := range ms {

View File

@@ -3,12 +3,17 @@ package ai
import "context"
// UsageEvent holds per-call telemetry recorded after each LLM call.
//
// OutputTokens holds visible response tokens (CandidatesTokenCount).
// ThinkingTokens holds reasoning tokens (ThoughtsTokenCount), tracked separately
// for visibility but billed at the output rate by Gemini.
type UsageEvent struct {
Provider string
Model string
CallType string
InputTokens int
OutputTokens int
ThinkingTokens int
Grounded bool
DurationMs int
EstimatedCostUSD float64

View File

@@ -0,0 +1 @@
ALTER TABLE ai_usage DROP COLUMN thinking_tokens;

View File

@@ -0,0 +1,2 @@
ALTER TABLE ai_usage
ADD COLUMN thinking_tokens INT NOT NULL DEFAULT 0;

View File

@@ -204,6 +204,7 @@ export interface AIUsageStats {
calls: number;
input_tokens: number;
output_tokens: number;
thinking_tokens?: number;
grounding_calls: number;
estimated_cost_usd: number;
}
@@ -216,6 +217,7 @@ export interface AIUsageEvent {
call_type: string;
input_tokens: number;
output_tokens: number;
thinking_tokens?: number;
grounded: boolean;
duration_ms: number;
estimated_cost_usd: number;
@@ -240,6 +242,7 @@ export interface AIStatus {
api_key_fingerprint?: string;
grounding_enabled: boolean;
grounding_quota: number;
thinking_enabled: boolean;
usage: {
today: AIUsageStats;
month: AIUsageStats;

View File

@@ -67,5 +67,20 @@ export const actions: Actions = {
} catch (err) {
return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
}
},
setThinking: async ({ cookies, fetch, request }) => {
const data = await request.formData();
const enabled = data.get('enabled') === 'true';
try {
await serverFetch('/admin/settings/ai/thinking', cookies, {
method: 'POST',
body: JSON.stringify({ enabled }),
fetch
});
return { success: true, action: 'thinking', enabled };
} catch (err) {
return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' });
}
}
};

View File

@@ -14,6 +14,7 @@
let saving = $state(false);
let showKeyInput = $state(untrack(() => !data.ai?.api_key_fingerprint));
let groundingEnabled = $state(untrack(() => data.ai?.grounding_enabled ?? false));
let thinkingEnabled = $state(untrack(() => data.ai?.thinking_enabled ?? true));
let activeModel = $derived(
form?.success && form.action === 'model' && form.model ? form.model : (data.ai?.model ?? '')
@@ -266,6 +267,59 @@
</div>
</div>
<!-- Card 3b: Thinking (Reasoning) -->
<div
class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900"
>
<div class="border-b border-stone-200 px-6 py-4 dark:border-stone-700">
<h2 class="text-base font-semibold text-stone-900 dark:text-stone-100">
Modell-Reasoning (Thinking)
</h2>
</div>
<div class="px-6 py-4">
<div class="flex items-center justify-between">
<div>
<p class="text-sm text-stone-700 dark:text-stone-300">
Thinking-Token bei kompatiblen Modellen erlauben
</p>
<p class="mt-0.5 text-xs text-stone-400">
Aus = günstiger, schneller. An = höhere Antwortqualität bei komplexen Aufgaben.
</p>
</div>
<form
method="POST"
action="?/setThinking"
use:enhance={() => {
return async ({ update }) => {
await update();
};
}}
>
<input type="hidden" name="enabled" value={thinkingEnabled ? 'false' : 'true'} />
<button
type="submit"
onclick={() => (thinkingEnabled = !thinkingEnabled)}
aria-label={thinkingEnabled ? 'Thinking deaktivieren' : 'Thinking aktivieren'}
class="relative inline-flex h-6 w-11 items-center rounded-full transition-colors {thinkingEnabled
? 'bg-primary-600'
: 'bg-stone-300 dark:bg-stone-600'}"
>
<span
class="inline-block h-4 w-4 transform rounded-full bg-white shadow transition-transform {thinkingEnabled
? 'translate-x-6'
: 'translate-x-1'}"
></span>
</button>
</form>
</div>
{#if form?.success && form.action === 'thinking'}
<p class="mt-2 text-xs text-green-600 dark:text-green-400">
Thinking {form.enabled ? 'aktiviert' : 'deaktiviert'}.
</p>
{/if}
</div>
</div>
<!-- Card 4: Usage -->
<div
class="rounded-lg border border-stone-200 bg-white dark:border-stone-700 dark:bg-stone-900"
@@ -276,7 +330,7 @@
<div class="space-y-4 px-6 py-4">
<!-- Rollup stats -->
<div class="grid grid-cols-2 gap-4 sm:grid-cols-4">
{#each [{ label: 'Anfragen heute', value: data.ai.usage.today.calls.toString() }, { label: 'Tokens heute', value: (data.ai.usage.today.input_tokens + data.ai.usage.today.output_tokens).toLocaleString('de-DE') }, { label: 'Kosten heute', value: formatCost(data.ai.usage.today.estimated_cost_usd) }, { label: 'Kosten (30 Tage)', value: formatCost(data.ai.usage.month.estimated_cost_usd) }] as stat}
{#each [{ label: 'Anfragen heute', value: data.ai.usage.today.calls.toString() }, { label: 'Tokens heute', value: (data.ai.usage.today.input_tokens + data.ai.usage.today.output_tokens + (data.ai.usage.today.thinking_tokens ?? 0)).toLocaleString('de-DE') }, { label: 'Kosten heute', value: formatCost(data.ai.usage.today.estimated_cost_usd) }, { label: 'Kosten (30 Tage)', value: formatCost(data.ai.usage.month.estimated_cost_usd) }] as stat}
<div class="rounded-md bg-stone-50 px-3 py-2 dark:bg-stone-800">
<p class="text-xs text-stone-400">{stat.label}</p>
<p class="mt-0.5 text-sm font-semibold text-stone-800 dark:text-stone-200">
@@ -311,7 +365,11 @@
<td class="py-1.5 pr-4">{event.call_type}</td>
<td class="max-w-32 truncate py-1.5 pr-4 font-mono">{event.model}</td>
<td class="py-1.5 pr-4"
>{(event.input_tokens + event.output_tokens).toLocaleString('de-DE')}</td
>{(
event.input_tokens +
event.output_tokens +
(event.thinking_tokens ?? 0)
).toLocaleString('de-DE')}</td
>
<td class="py-1.5 pr-4">{event.grounded ? '✓' : '—'}</td>
<td class="py-1.5">{formatCost(event.estimated_cost_usd)}</td>