updated LLM configurations and added support for new models

2025-08-02 23:15:26 +02:00
parent d92ad3eb65
commit 93c06d10ee
3 changed files with 8 additions and 2 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -8,7 +8,7 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
 DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
 CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
 SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
-LLM_MODEL = os.getenv("LLM_MODEL", "qwen2:7b-instruct-q4_K_M")
+LLM_MODEL = os.getenv("LLM_MODEL", "mistral-nemo:12b")
 LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
 OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
 ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
--- a/backend/app/services.py
+++ b/backend/app/services.py
@@ -206,7 +206,11 @@ class NewsFetcher:
            "prompt": prompt,
            "stream": False,
            "temperature": 0.1,
-            "format": "json"
+            "format": "json",
+            "options": {
+                "num_gpu": 1,  # Force GPU usage
+                "num_ctx": 8192,  # Context size
+            }
        }

        try:
--- a/backend/example.env
+++ b/backend/example.env
@@ -12,6 +12,8 @@ SYNC_COOLDOWN_MINUTES=30

 # LLM model to use for summarization
 LLM_MODEL=qwen2:7b-instruct-q4_K_M
+LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
+LLM_MODEL=mistral-nemo:12b

 # Timeout in seconds for LLM requests
 LLM_TIMEOUT_SECONDS=180