From 93c06d10eecac757f041d9352abb9b02c9c1b1cd Mon Sep 17 00:00:00 2001 From: vikingowl Date: Sat, 2 Aug 2025 23:15:26 +0200 Subject: [PATCH] updated LLM configurations and added support for new models --- backend/app/config.py | 2 +- backend/app/services.py | 6 +++++- backend/example.env | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index 2189c21..6d0f3ab 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -8,7 +8,7 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5)) DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS)) CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS) SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30)) -LLM_MODEL = os.getenv("LLM_MODEL", "qwen2:7b-instruct-q4_K_M") +LLM_MODEL = os.getenv("LLM_MODEL", "mistral-nemo:12b") LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180)) OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10)) ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30)) diff --git a/backend/app/services.py b/backend/app/services.py index 4e2f7f0..2cea636 100644 --- a/backend/app/services.py +++ b/backend/app/services.py @@ -206,7 +206,11 @@ class NewsFetcher: "prompt": prompt, "stream": False, "temperature": 0.1, - "format": "json" + "format": "json", + "options": { + "num_gpu": 1, # Force GPU usage + "num_ctx": 8192, # Context size + } } try: diff --git a/backend/example.env b/backend/example.env index a4b1ec0..56a74f9 100644 --- a/backend/example.env +++ b/backend/example.env @@ -12,6 +12,8 @@ SYNC_COOLDOWN_MINUTES=30 # LLM model to use for summarization LLM_MODEL=qwen2:7b-instruct-q4_K_M +LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0 +LLM_MODEL=mistral-nemo:12b # Timeout in seconds for LLM requests LLM_TIMEOUT_SECONDS=180