diff --git a/backend/internal/domain/settings/handler.go b/backend/internal/domain/settings/handler.go index 949c4ae..76f1767 100644 --- a/backend/internal/domain/settings/handler.go +++ b/backend/internal/domain/settings/handler.go @@ -19,6 +19,7 @@ type AIStatus struct { APIKeyFingerprint string `json:"api_key_fingerprint,omitempty"` GroundingEnabled bool `json:"grounding_enabled"` GroundingQuota int `json:"grounding_quota"` + ThinkingEnabled bool `json:"thinking_enabled"` Usage UsageSummary `json:"usage"` } @@ -55,6 +56,7 @@ func (h *Handler) GetAI(c *gin.Context) { } grounding, _ := h.store.GetGroundingEnabled(ctx) + thinking, _ := h.store.GetThinkingEnabled(ctx) today, _ := h.usageRepo.Today(ctx) month, _ := h.usageRepo.Month(ctx) @@ -68,6 +70,7 @@ func (h *Handler) GetAI(c *gin.Context) { APIKeyFingerprint: fingerprint, GroundingEnabled: grounding, GroundingQuota: 1500, + ThinkingEnabled: thinking, Usage: UsageSummary{ Today: today, Month: month, @@ -150,6 +153,24 @@ func (h *Handler) SetGrounding(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"data": gin.H{"grounding_enabled": req.Enabled}}) } +func (h *Handler) SetThinking(c *gin.Context) { + ctx := c.Request.Context() + var req struct { + Enabled bool `json:"enabled"` + } + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"error": "enabled is required"}) + return + } + userID := callerID(c) + if err := h.store.SetThinkingEnabled(ctx, req.Enabled, userID); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save thinking setting"}) + return + } + h.provider.SetThinkingEnabled(req.Enabled) + c.JSON(http.StatusOK, gin.H{"data": gin.H{"thinking_enabled": req.Enabled}}) +} + func (h *Handler) GetUsage(c *gin.Context) { ctx := c.Request.Context() limit := 50 diff --git a/backend/internal/domain/settings/routes.go b/backend/internal/domain/settings/routes.go index 1ddc6b5..3d2b29c 100644 --- a/backend/internal/domain/settings/routes.go +++ b/backend/internal/domain/settings/routes.go @@ -8,5 +8,6 @@ func RegisterRoutes(rg *gin.RouterGroup, h *Handler, requireAuth, requireAdmin g admin.POST("/settings/ai/model", h.SetModel) admin.POST("/settings/ai/key", h.SetAPIKey) admin.POST("/settings/ai/grounding", h.SetGrounding) + admin.POST("/settings/ai/thinking", h.SetThinking) admin.GET("/settings/ai/usage", h.GetUsage) } diff --git a/backend/internal/domain/settings/store.go b/backend/internal/domain/settings/store.go index bbb8427..146a890 100644 --- a/backend/internal/domain/settings/store.go +++ b/backend/internal/domain/settings/store.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "strconv" "github.com/google/uuid" "github.com/jackc/pgx/v5" @@ -16,6 +17,7 @@ const ( keyAPIKey = "gemini.api_key" keyModel = "gemini.model" keyGroundingEnabled = "gemini.grounding_enabled" + keyThinkingEnabled = "gemini.thinking_enabled" ) // Store persists AI provider configuration in system_settings. @@ -71,19 +73,36 @@ func (s *Store) SetModel(ctx context.Context, model string, updatedBy uuid.UUID) } func (s *Store) GetGroundingEnabled(ctx context.Context) (bool, error) { - v, err := s.getText(ctx, keyGroundingEnabled, "true") - if err != nil { - return true, err - } - return v != "false", nil + return s.getBool(ctx, keyGroundingEnabled, true) } func (s *Store) SetGroundingEnabled(ctx context.Context, enabled bool, updatedBy uuid.UUID) error { - v := "false" - if enabled { - v = "true" + return s.setBool(ctx, keyGroundingEnabled, enabled, updatedBy) +} + +// GetThinkingEnabled controls whether the provider sends ThinkingConfig.ThinkingBudget=0 +// to disable model reasoning. Default is true (preserves SDK default of dynamic thinking). +func (s *Store) GetThinkingEnabled(ctx context.Context) (bool, error) { + return s.getBool(ctx, keyThinkingEnabled, true) +} + +func (s *Store) SetThinkingEnabled(ctx context.Context, enabled bool, updatedBy uuid.UUID) error { + return s.setBool(ctx, keyThinkingEnabled, enabled, updatedBy) +} + +func (s *Store) getBool(ctx context.Context, key string, fallback bool) (bool, error) { + v, err := s.getText(ctx, key, strconv.FormatBool(fallback)) + if err != nil { + return fallback, err } - return s.setText(ctx, keyGroundingEnabled, v, updatedBy) + if b, parseErr := strconv.ParseBool(v); parseErr == nil { + return b, nil + } + return fallback, nil +} + +func (s *Store) setBool(ctx context.Context, key string, enabled bool, updatedBy uuid.UUID) error { + return s.setText(ctx, key, strconv.FormatBool(enabled), updatedBy) } func (s *Store) getText(ctx context.Context, key, fallback string) (string, error) { diff --git a/backend/internal/domain/settings/usage.go b/backend/internal/domain/settings/usage.go index 9939ecd..2967188 100644 --- a/backend/internal/domain/settings/usage.go +++ b/backend/internal/domain/settings/usage.go @@ -31,10 +31,10 @@ func (r *UsageRepo) Record(ctx context.Context, e ai.UsageEvent) error { } _, err := r.db.Exec(ctx, ` INSERT INTO ai_usage - (provider, model, call_type, input_tokens, output_tokens, + (provider, model, call_type, input_tokens, output_tokens, thinking_tokens, grounded, duration_ms, estimated_cost_usd, error, prompt_version) - VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10) - `, e.Provider, e.Model, e.CallType, e.InputTokens, e.OutputTokens, + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) + `, e.Provider, e.Model, e.CallType, e.InputTokens, e.OutputTokens, e.ThinkingTokens, e.Grounded, e.DurationMs, e.EstimatedCostUSD, errStr, promptVersion) if err != nil { return fmt.Errorf("usage: record: %w", err) @@ -47,6 +47,7 @@ type UsageStats struct { Calls int `json:"calls"` InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` + ThinkingTokens int `json:"thinking_tokens"` GroundingCalls int `json:"grounding_calls"` EstimatedCostUSD float64 `json:"estimated_cost_usd"` } @@ -71,16 +72,17 @@ func (r *UsageRepo) GroundingToday(ctx context.Context) (int, error) { func (r *UsageRepo) statsWindow(ctx context.Context, interval string) (UsageStats, error) { row := r.db.QueryRow(ctx, fmt.Sprintf(` SELECT - COUNT(*) AS calls, - COALESCE(SUM(input_tokens),0) AS input_tokens, - COALESCE(SUM(output_tokens),0) AS output_tokens, + COUNT(*) AS calls, + COALESCE(SUM(input_tokens),0) AS input_tokens, + COALESCE(SUM(output_tokens),0) AS output_tokens, + COALESCE(SUM(thinking_tokens),0) AS thinking_tokens, COALESCE(SUM(CASE WHEN grounded THEN 1 ELSE 0 END),0) AS grounding_calls, COALESCE(SUM(estimated_cost_usd),0) AS cost FROM ai_usage WHERE created_at >= now() - INTERVAL '%s' `, interval)) var s UsageStats - if err := row.Scan(&s.Calls, &s.InputTokens, &s.OutputTokens, &s.GroundingCalls, &s.EstimatedCostUSD); err != nil { + if err := row.Scan(&s.Calls, &s.InputTokens, &s.OutputTokens, &s.ThinkingTokens, &s.GroundingCalls, &s.EstimatedCostUSD); err != nil { return s, fmt.Errorf("usage: stats(%s): %w", interval, err) } return s, nil @@ -95,6 +97,7 @@ type UsageEvent struct { CallType string `json:"call_type"` InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` + ThinkingTokens int `json:"thinking_tokens"` Grounded bool `json:"grounded"` DurationMs int `json:"duration_ms"` EstimatedCostUSD float64 `json:"estimated_cost_usd"` @@ -105,7 +108,7 @@ type UsageEvent struct { func (r *UsageRepo) Recent(ctx context.Context, limit int) ([]UsageEvent, error) { rows, err := r.db.Query(ctx, ` SELECT id, created_at, provider, model, call_type, - input_tokens, output_tokens, grounded, duration_ms, + input_tokens, output_tokens, thinking_tokens, grounded, duration_ms, estimated_cost_usd, error, prompt_version FROM ai_usage ORDER BY created_at DESC @@ -120,7 +123,7 @@ func (r *UsageRepo) Recent(ctx context.Context, limit int) ([]UsageEvent, error) for rows.Next() { var e UsageEvent if err := rows.Scan(&e.ID, &e.CreatedAt, &e.Provider, &e.Model, &e.CallType, - &e.InputTokens, &e.OutputTokens, &e.Grounded, &e.DurationMs, + &e.InputTokens, &e.OutputTokens, &e.ThinkingTokens, &e.Grounded, &e.DurationMs, &e.EstimatedCostUSD, &e.Error, &e.PromptVersion); err != nil { return nil, fmt.Errorf("usage: scan: %w", err) } diff --git a/backend/internal/pkg/ai/factory.go b/backend/internal/pkg/ai/factory.go index ece2fb2..0a3c82d 100644 --- a/backend/internal/pkg/ai/factory.go +++ b/backend/internal/pkg/ai/factory.go @@ -11,6 +11,7 @@ import ( type KeySource interface { GetGeminiAPIKey(ctx context.Context) (string, error) GetModel(ctx context.Context) (string, error) + GetThinkingEnabled(ctx context.Context) (bool, error) } // NewFromConfig creates a GeminiProvider. It reads the API key from store first; @@ -31,11 +32,22 @@ func NewFromConfig(ctx context.Context, cfg config.AIConfig, store KeySource, re model = "gemini-2.5-flash-lite" } - if apiKey == "" { - // No key available. Return an unconfigured provider that will fail on use, - // but allows the server to start so the operator can configure the key via UI. - return newUnconfiguredGeminiProvider(model, recorder), nil + thinking, terr := store.GetThinkingEnabled(ctx) + if terr != nil { + slog.Warn("ai: could not read thinking setting; defaulting to enabled", "error", terr) + thinking = true } - return NewGeminiProvider(ctx, apiKey, model, recorder) + if apiKey == "" { + p := newUnconfiguredGeminiProvider(model, recorder) + p.SetThinkingEnabled(thinking) + return p, nil + } + + p, err := NewGeminiProvider(ctx, apiKey, model, recorder) + if err != nil { + return nil, err + } + p.SetThinkingEnabled(thinking) + return p, nil } diff --git a/backend/internal/pkg/ai/gemini.go b/backend/internal/pkg/ai/gemini.go index a46cf78..63aa6a8 100644 --- a/backend/internal/pkg/ai/gemini.go +++ b/backend/internal/pkg/ai/gemini.go @@ -101,13 +101,11 @@ func filterCompatibleModels(items []*genai.Model) []ModelInfo { return out } -// Gemini API pricing (as of 2026-04). Refresh constants when pricing changes. +// Gemini grounding pricing (as of 2026-04). Per-model token rates live in geminiPricing. // https://ai.google.dev/gemini-api/docs/pricing const ( - geminiInputCostPerToken = 0.10 / 1_000_000 // $0.10 / 1M tokens - geminiOutputCostPerToken = 0.40 / 1_000_000 // $0.40 / 1M tokens - geminiGroundingCostPer1k = 35.0 / 1_000 // $35 / 1k grounded prompts (above free tier) - geminiGroundingFreeDaily = 1_500 // daily free grounding requests + geminiGroundingCostPer1k = 35.0 / 1_000 // $35 / 1k grounded prompts (above free tier) + geminiGroundingFreeDaily = 1_500 // daily free grounding requests ) type GeminiProvider struct { @@ -116,6 +114,11 @@ type GeminiProvider struct { model string recorder UsageRecorder + // thinkingEnabled mirrors the persisted setting. When false, Chat() sets + // ThinkingConfig.ThinkingBudget=0 to disable reasoning on capable models. + // Default true preserves the SDK default of dynamic thinking. + thinkingEnabled bool + // groundingCallsToday is an in-process counter used for cost estimation only. // It is not persisted and resets on restart. The authoritative count lives in ai_usage. groundingCallsToday int @@ -126,9 +129,10 @@ type GeminiProvider struct { // All Chat calls return ErrInternal until Reinitialize is called. func newUnconfiguredGeminiProvider(model string, recorder UsageRecorder) *GeminiProvider { return &GeminiProvider{ - model: model, - recorder: recorder, - groundingDate: time.Now().UTC().Truncate(24 * time.Hour), + model: model, + recorder: recorder, + thinkingEnabled: true, + groundingDate: time.Now().UTC().Truncate(24 * time.Hour), } } @@ -157,10 +161,11 @@ func NewGeminiProvider(ctx context.Context, apiKey, model string, recorder Usage return nil, fmt.Errorf("gemini: new client: %w", err) } return &GeminiProvider{ - client: client, - model: model, - recorder: recorder, - groundingDate: time.Now().UTC().Truncate(24 * time.Hour), + client: client, + model: model, + recorder: recorder, + thinkingEnabled: true, + groundingDate: time.Now().UTC().Truncate(24 * time.Hour), }, nil } @@ -181,6 +186,18 @@ func (p *GeminiProvider) SetModel(model string) { p.model = model } +func (p *GeminiProvider) ThinkingEnabled() bool { + p.mu.RLock() + defer p.mu.RUnlock() + return p.thinkingEnabled +} + +func (p *GeminiProvider) SetThinkingEnabled(enabled bool) { + p.mu.Lock() + defer p.mu.Unlock() + p.thinkingEnabled = enabled +} + func (p *GeminiProvider) ListModels(ctx context.Context) ([]ModelInfo, error) { p.mu.RLock() client := p.client @@ -244,6 +261,13 @@ func (p *GeminiProvider) Chat(ctx context.Context, req *ChatRequest) (*ChatRespo } } + // Disable thinking for thinking-capable models when the operator has opted out. + // SDK default (no ThinkingConfig) keeps dynamic thinking on. + if !p.ThinkingEnabled() { + zero := int32(0) + cfg.ThinkingConfig = &genai.ThinkingConfig{ThinkingBudget: &zero} + } + resp, err := client.Models.GenerateContent(ctx, model, genai.Text(req.UserMessage), cfg) @@ -303,14 +327,28 @@ func (p *GeminiProvider) buildUsageEvent(model string, req *ChatRequest, resp *g if resp != nil && resp.UsageMetadata != nil { e.InputTokens = int(resp.UsageMetadata.PromptTokenCount) e.OutputTokens = int(resp.UsageMetadata.CandidatesTokenCount) + e.ThinkingTokens = int(resp.UsageMetadata.ThoughtsTokenCount) } - e.EstimatedCostUSD = p.estimateCost(e.InputTokens, e.OutputTokens, req.Grounded) + // Aliases like "gemini-pro-latest" don't match priceFor; the resolved name from + // the response (e.g. "gemini-2.5-pro-002") does. Prefer it when present. + pricingModel := model + if resp != nil && resp.ModelVersion != "" { + pricingModel = resp.ModelVersion + } + e.EstimatedCostUSD = p.estimateCost(pricingModel, e.InputTokens, e.OutputTokens+e.ThinkingTokens, req.Grounded) return e } -func (p *GeminiProvider) estimateCost(inputTokens, outputTokens int, grounded bool) float64 { - cost := float64(inputTokens)*geminiInputCostPerToken + - float64(outputTokens)*geminiOutputCostPerToken +// estimateCost returns USD for the given token counts at the model's published rate. +// outputTokens should already include any thinking tokens (Gemini bills thoughts at the output rate). +// TODO: handle the >200K input tier for 2.5-pro / 3.1-pro if prompts ever exceed that. +func (p *GeminiProvider) estimateCost(model string, inputTokens, outputTokens int, grounded bool) float64 { + inUSDPerM, outUSDPerM := priceFor(model) + cost := float64(inputTokens)*inUSDPerM/1_000_000 + + float64(outputTokens)*outUSDPerM/1_000_000 + if inUSDPerM == 0 && outUSDPerM == 0 && (inputTokens > 0 || outputTokens > 0) && model != "" { + slog.Warn("ai: unknown model for pricing — estimated cost is 0", "model", model) + } if grounded { p.mu.Lock() today := time.Now().UTC().Truncate(24 * time.Hour) diff --git a/backend/internal/pkg/ai/gemini_test.go b/backend/internal/pkg/ai/gemini_test.go index 067cd2a..2bd5c51 100644 --- a/backend/internal/pkg/ai/gemini_test.go +++ b/backend/internal/pkg/ai/gemini_test.go @@ -156,6 +156,96 @@ func TestPriceFor_UnknownReturnsZero(t *testing.T) { } } +func TestThinkingEnabled_DefaultsTrueAndIsTogglable(t *testing.T) { + p := newUnconfiguredGeminiProvider("gemini-2.5-pro", nil) + if !p.ThinkingEnabled() { + t.Errorf("default ThinkingEnabled = false; want true (preserves SDK default)") + } + p.SetThinkingEnabled(false) + if p.ThinkingEnabled() { + t.Errorf("after SetThinkingEnabled(false), still true") + } + p.SetThinkingEnabled(true) + if !p.ThinkingEnabled() { + t.Errorf("after SetThinkingEnabled(true), still false") + } +} + +func TestEstimateCost_UsesPriceForModel(t *testing.T) { + p := &GeminiProvider{} + cases := []struct { + name string + model string + inputTokens int + outputTokens int + wantUSD float64 + }{ + {"2.5-pro 1M+1M", "gemini-2.5-pro", 1_000_000, 1_000_000, 1.25 + 10.00}, + {"2.5-flash 1M+1M", "gemini-2.5-flash", 1_000_000, 1_000_000, 0.30 + 2.50}, + {"2.5-flash-lite 1M+1M", "gemini-2.5-flash-lite", 1_000_000, 1_000_000, 0.10 + 0.40}, + {"3.1-pro 1M+1M", "gemini-3.1-pro", 1_000_000, 1_000_000, 2.00 + 12.00}, + {"resolved alias 2.5-pro-002", "gemini-2.5-pro-002", 1000, 500, 1000*1.25/1_000_000 + 500*10.0/1_000_000}, + {"unknown returns zero", "gemini-pro-latest", 1000, 1000, 0}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := p.estimateCost(tc.model, tc.inputTokens, tc.outputTokens, false) + if got != tc.wantUSD { + t.Errorf("estimateCost(%q, %d, %d) = %v; want %v", + tc.model, tc.inputTokens, tc.outputTokens, got, tc.wantUSD) + } + }) + } +} + +func TestBuildUsageEvent_PrefersResolvedModelVersion(t *testing.T) { + p := &GeminiProvider{} + resp := &genai.GenerateContentResponse{ + ModelVersion: "gemini-2.5-pro-002", + UsageMetadata: &genai.GenerateContentResponseUsageMetadata{ + PromptTokenCount: 1000, + CandidatesTokenCount: 500, + }, + } + req := &ChatRequest{CallType: "research"} + + // Caller passes the alias; resolved name from response should drive pricing. + e := p.buildUsageEvent("gemini-pro-latest", req, resp, nil, 100) + + wantCost := float64(1000)*1.25/1_000_000 + float64(500)*10.0/1_000_000 + if e.EstimatedCostUSD != wantCost { + t.Errorf("EstimatedCostUSD = %v; want %v (resolved model should price as 2.5-pro)", + e.EstimatedCostUSD, wantCost) + } +} + +func TestBuildUsageEvent_BillsThoughtsTokens(t *testing.T) { + p := &GeminiProvider{} + resp := &genai.GenerateContentResponse{ + ModelVersion: "gemini-2.5-pro", + UsageMetadata: &genai.GenerateContentResponseUsageMetadata{ + PromptTokenCount: 1000, + CandidatesTokenCount: 500, + ThoughtsTokenCount: 200, + }, + } + req := &ChatRequest{CallType: "research"} + e := p.buildUsageEvent("gemini-2.5-pro", req, resp, nil, 100) + + if e.ThinkingTokens != 200 { + t.Errorf("ThinkingTokens = %d; want 200", e.ThinkingTokens) + } + if e.OutputTokens != 500 { + t.Errorf("OutputTokens = %d; want 500 (candidates only, thoughts tracked separately)", e.OutputTokens) + } + // Cost: input @ 1.25/1M, (output + thoughts) @ 10/1M + wantCost := float64(1000)*1.25/1_000_000 + float64(500+200)*10.0/1_000_000 + if e.EstimatedCostUSD != wantCost { + t.Errorf("EstimatedCostUSD = %v; want %v (thoughts billed at output rate)", + e.EstimatedCostUSD, wantCost) + } +} + func modelNames(ms []ModelInfo) []string { names := make([]string, len(ms)) for i, m := range ms { diff --git a/backend/internal/pkg/ai/usage.go b/backend/internal/pkg/ai/usage.go index b540d86..1de5e1c 100644 --- a/backend/internal/pkg/ai/usage.go +++ b/backend/internal/pkg/ai/usage.go @@ -3,12 +3,17 @@ package ai import "context" // UsageEvent holds per-call telemetry recorded after each LLM call. +// +// OutputTokens holds visible response tokens (CandidatesTokenCount). +// ThinkingTokens holds reasoning tokens (ThoughtsTokenCount), tracked separately +// for visibility but billed at the output rate by Gemini. type UsageEvent struct { Provider string Model string CallType string InputTokens int OutputTokens int + ThinkingTokens int Grounded bool DurationMs int EstimatedCostUSD float64 diff --git a/backend/migrations/000030_ai_usage_thinking_tokens.down.sql b/backend/migrations/000030_ai_usage_thinking_tokens.down.sql new file mode 100644 index 0000000..d575dfa --- /dev/null +++ b/backend/migrations/000030_ai_usage_thinking_tokens.down.sql @@ -0,0 +1 @@ +ALTER TABLE ai_usage DROP COLUMN thinking_tokens; diff --git a/backend/migrations/000030_ai_usage_thinking_tokens.up.sql b/backend/migrations/000030_ai_usage_thinking_tokens.up.sql new file mode 100644 index 0000000..ad54d3a --- /dev/null +++ b/backend/migrations/000030_ai_usage_thinking_tokens.up.sql @@ -0,0 +1,2 @@ +ALTER TABLE ai_usage + ADD COLUMN thinking_tokens INT NOT NULL DEFAULT 0; diff --git a/web/src/lib/api/types.ts b/web/src/lib/api/types.ts index a7fba81..eecfece 100644 --- a/web/src/lib/api/types.ts +++ b/web/src/lib/api/types.ts @@ -204,6 +204,7 @@ export interface AIUsageStats { calls: number; input_tokens: number; output_tokens: number; + thinking_tokens?: number; grounding_calls: number; estimated_cost_usd: number; } @@ -216,6 +217,7 @@ export interface AIUsageEvent { call_type: string; input_tokens: number; output_tokens: number; + thinking_tokens?: number; grounded: boolean; duration_ms: number; estimated_cost_usd: number; @@ -240,6 +242,7 @@ export interface AIStatus { api_key_fingerprint?: string; grounding_enabled: boolean; grounding_quota: number; + thinking_enabled: boolean; usage: { today: AIUsageStats; month: AIUsageStats; diff --git a/web/src/routes/admin/einstellungen/+page.server.ts b/web/src/routes/admin/einstellungen/+page.server.ts index 6cda5dc..32ddd1d 100644 --- a/web/src/routes/admin/einstellungen/+page.server.ts +++ b/web/src/routes/admin/einstellungen/+page.server.ts @@ -67,5 +67,20 @@ export const actions: Actions = { } catch (err) { return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' }); } + }, + + setThinking: async ({ cookies, fetch, request }) => { + const data = await request.formData(); + const enabled = data.get('enabled') === 'true'; + try { + await serverFetch('/admin/settings/ai/thinking', cookies, { + method: 'POST', + body: JSON.stringify({ enabled }), + fetch + }); + return { success: true, action: 'thinking', enabled }; + } catch (err) { + return fail(500, { error: err instanceof Error ? err.message : 'Fehler beim Speichern.' }); + } } }; diff --git a/web/src/routes/admin/einstellungen/+page.svelte b/web/src/routes/admin/einstellungen/+page.svelte index 8669d14..86b9479 100644 --- a/web/src/routes/admin/einstellungen/+page.svelte +++ b/web/src/routes/admin/einstellungen/+page.svelte @@ -14,6 +14,7 @@ let saving = $state(false); let showKeyInput = $state(untrack(() => !data.ai?.api_key_fingerprint)); let groundingEnabled = $state(untrack(() => data.ai?.grounding_enabled ?? false)); + let thinkingEnabled = $state(untrack(() => data.ai?.thinking_enabled ?? true)); let activeModel = $derived( form?.success && form.action === 'model' && form.model ? form.model : (data.ai?.model ?? '') @@ -266,6 +267,59 @@ + +
+
+

+ Modell-Reasoning (Thinking) +

+
+
+
+
+

+ Thinking-Token bei kompatiblen Modellen erlauben +

+

+ Aus = günstiger, schneller. An = höhere Antwortqualität bei komplexen Aufgaben. +

+
+
{ + return async ({ update }) => { + await update(); + }; + }} + > + + +
+
+ {#if form?.success && form.action === 'thinking'} +

+ Thinking {form.enabled ? 'aktiviert' : 'deaktiviert'}. +

+ {/if} +
+
+
- {#each [{ label: 'Anfragen heute', value: data.ai.usage.today.calls.toString() }, { label: 'Tokens heute', value: (data.ai.usage.today.input_tokens + data.ai.usage.today.output_tokens).toLocaleString('de-DE') }, { label: 'Kosten heute', value: formatCost(data.ai.usage.today.estimated_cost_usd) }, { label: 'Kosten (30 Tage)', value: formatCost(data.ai.usage.month.estimated_cost_usd) }] as stat} + {#each [{ label: 'Anfragen heute', value: data.ai.usage.today.calls.toString() }, { label: 'Tokens heute', value: (data.ai.usage.today.input_tokens + data.ai.usage.today.output_tokens + (data.ai.usage.today.thinking_tokens ?? 0)).toLocaleString('de-DE') }, { label: 'Kosten heute', value: formatCost(data.ai.usage.today.estimated_cost_usd) }, { label: 'Kosten (30 Tage)', value: formatCost(data.ai.usage.month.estimated_cost_usd) }] as stat}

{stat.label}

@@ -311,7 +365,11 @@ {event.call_type} {event.model} {(event.input_tokens + event.output_tokens).toLocaleString('de-DE')}{( + event.input_tokens + + event.output_tokens + + (event.thinking_tokens ?? 0) + ).toLocaleString('de-DE')} {event.grounded ? '✓' : '—'} {formatCost(event.estimated_cost_usd)}