[update] added gemma2:9b model to example.env, refactored prompt generation with stricter JSON rules, adjusted context size, integrated system prompt for better summaries, and improved error handling in backend services

2025-08-07 15:05:20 +02:00
parent 0a97a57c76
commit 011b256662
3 changed files with 68 additions and 32 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -8,7 +8,7 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
 DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
 CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
 SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
-LLM_MODEL = os.getenv("LLM_MODEL", "phi3:3.8b-mini-128k-instruct-q4_0")
+LLM_MODEL = os.getenv("LLM_MODEL", "gemma2:9b")
 LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
 OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
 ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
--- a/backend/app/services.py
+++ b/backend/app/services.py
@@ -124,7 +124,6 @@ class NewsFetcher:

    @staticmethod
    def build_prompt(
-            url: str,
            title: str = "",
            summary: str = "",
            content: str = "") -> str:
@@ -132,14 +131,13 @@ class NewsFetcher:
        Generate a prompt for the LLM to summarize an article.

        Args:
-            url: Public URL of the article to summarize
            title: Article title from RSS feed (optional)
            summary: Article summary from RSS feed (optional)
            content: Extracted article content (optional)

        Returns:
            A formatted prompt string that instructs the LLM to generate
-            a JSON response with title and summaries in German and English
+            a JSON response with title, summary and tags in German
        """
        context_info = []
        if title:
@@ -155,21 +153,35 @@ class NewsFetcher:
            context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."

        return (
-            "### Aufgabe\n"
-            f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
-            f"URL: {url}\n"
-            f"Verfügbare Informationen:\n{context}\n\n"
+            "### Vorliegende Informationen\n"
+            f"{context}\n\n"
+            "### Längenbegrenzungen\n"
+            "title: max 100 Zeichen\n"
+            "summary: 100–160 Wörter\n"
+            "tags: bis zu 6 Schlüsselwörter, durch Komma getrennt, alles Kleinbuchstaben.\n\n"
            "### Regeln\n"
-            "1. Nutze VORRANGIG den Artikel-Inhalt falls verfügbar, ergänze mit RSS-Informationen\n"
-            "2. Falls kein Artikel-Inhalt verfügbar ist, nutze RSS-Titel und -Beschreibung\n"
-            "3. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
-            "4. Gib ausschließlich **gültiges minifiziertes JSON** zurück – kein Markdown, keine Kommentare\n"
-            "5. Struktur: {\"title\":\"…\",\"summary\":\"…\"}\n"
-            "6. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
-            "7. summary: Deutsche Zusammenfassung (zwischen 100 und 160 Wörter)\n"
-            "8. Kein Text vor oder nach dem JSON\n\n"
+            "1. Nutze ausschließlich Informationen, die im bereitgestellten Material eindeutig vorkommen. Externes Wissen ist untersagt.\n"
+            "2. Liegt sowohl Artikel-Text als auch RSS-Metadaten vor, hat der Artikel-Text Vorrang; verwende RSS nur ergänzend.\n"
+            "3. Liegt nur RSS-Titel und/oder -Beschreibung vor, stütze dich ausschließlich darauf.\n"
+            "4. Sind die Informationen unzureichend, gib exakt {\"title\":\"\",\"summary\":\"\",\"tags\":\"\"} zurück.\n"
+            "5. Gib nur gültiges, minifiziertes JSON zurück – keine Zeilenumbrüche, kein Markdown, keine Kommentare.\n"
+            "6. Verwende keine hypothetischen Formulierungen (\"könnte\", \"möglicherweise\" etc.).\n"
+            "7. Wörtliche Zitate dürfen höchstens 15 % des Summary-Texts ausmachen.\n"
+            "8. Kein Text vor oder nach dem JSON.\n\n"
            "### Ausgabe\n"
-            "Jetzt antworte mit dem JSON:"
+            "Antworte jetzt ausschließlich mit dem JSON:\n"
+        )
+
+    @staticmethod
+    def build_system_prompt():
+        return (
+            "Du bist ein hochpräziser JSON-Summarizer und Experte für die Zusammenfassung von Artikeln.\n\n"
+            "### Vorgehen\n"
+            "Schritt 1: Identifiziere Hauptthema und Zweck.\n"
+            "Schritt 2: Extrahiere die wichtigsten Fakten und Ergebnisse.\n"
+            "Schritt 3: Erkenne die zentralen Argumente und Standpunkte.\n"
+            "Schritt 4: Ordne die Informationen nach Wichtigkeit.\n"
+            "Schritt 5: Erstelle eine prägnante, klare und sachliche Zusammenfassung.\n\n"
        )

    @staticmethod
@@ -199,17 +211,39 @@ class NewsFetcher:
            logger.warning(
                f"⚠️ Could not fetch article content, using RSS data only")

-        prompt = NewsFetcher.build_prompt(
-            url, title, summary, article_content)
+        prompt = NewsFetcher.build_prompt(title, summary, article_content)
+        system_prompt = NewsFetcher.build_system_prompt()
        payload = {
            "model": LLM_MODEL,
            "prompt": prompt,
+            "system": system_prompt,
            "stream": False,
            "temperature": 0.1,
-            "format": "json",
+            "format": {
+                "type": "object",
+                "properties": {
+                    "title": {
+                        "type": "string"
+                    },
+                    "summary": {
+                        "type": "string"
+                    },
+                    "tags": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": [
+                    "title",
+                    "summary",
+                    "tags"
+                ]
+            },
            "options": {
                "num_gpu": 1,  # Force GPU usage
-                "num_ctx": 64_000,  # Context size
+                "num_ctx": 8192,  # Context size
            }
        }

@@ -403,7 +437,8 @@ class NewsFetcher:
                        cursor.execute(
                            """
                            INSERT
-                            OR IGNORE INTO news
+                                OR IGNORE
+                            INTO news
                                (title, summary, url, published, country)
                            VALUES (?, ?, ?, ?, ?)
                            """,
--- a/backend/example.env
+++ b/backend/example.env
@@ -16,6 +16,7 @@ LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0 # ca 6-8GB (langer kontext)
 LLM_MODEL=mistral-nemo:12b # ca 16-24+GB
 LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S # ca 22GB
 LLM_MODEL=yarn-mistral:7b-64k-q4_K_M # ca 11GB
+LLM_MODEL=gemma2:9b # ca 8GB

 # Timeout in seconds for LLM requests
 LLM_TIMEOUT_SECONDS=180