enhance: improve news fetching, processing, and logging architecture

2025-08-01 18:42:34 +02:00
parent eed5f4afbb
commit 003b8da4b2
4 changed files with 356 additions and 108 deletions
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -14,8 +14,10 @@ import os
 import sqlite3
 from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
+from http.client import HTTPException
 from pathlib import Path
 from typing import Dict, List, Optional, Any, Union, Iterator, Tuple, TypedDict, cast
+import logging

 import feedparser
 import httpx
@@ -27,7 +29,7 @@ from pydantic import BaseModel
 # Constants
 DB_PATH = Path("owlynews.sqlite")
 OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
-MIN_CRON_HOURS = 0.5
+MIN_CRON_HOURS = 0.1
 DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
 CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
 SYNC_COOLDOWN_MINUTES = 30
@@ -35,6 +37,13 @@ LLM_MODEL = "qwen2:7b-instruct-q4_K_M"
 LLM_TIMEOUT_SECONDS = 180
 OLLAMA_API_TIMEOUT_SECONDS = 10

+# Add logging configuration at the top of your file
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
 # FastAPI app initialization
 app = FastAPI(
    title="Owly News Summariser",
@@ -46,20 +55,19 @@ app = FastAPI(
 SCHEMA_SQL = [
    """
    CREATE TABLE IF NOT EXISTS news (
-        id          TEXT PRIMARY KEY,    -- e.g. URL as unique identifier
+        id          INTEGER PRIMARY KEY AUTOINCREMENT,
        title       TEXT NOT NULL,
-        summary_de  TEXT,
-        summary_en  TEXT,
-        published   INTEGER,             -- Unix epoch (UTC); use TEXT ISO-8601 if you prefer
-        source      TEXT,
-        country     TEXT,
-        source_feed TEXT
+        description TEXT,
+        url         TEXT NOT NULL,
+        published   TEXT NOT NULL,
+        country     TEXT NOT NULL,
+        created_at  TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )
    """,
    "CREATE INDEX IF NOT EXISTS idx_news_published ON news(published)",
    """
    CREATE TABLE IF NOT EXISTS feeds (
-         id      INTEGER PRIMARY KEY,     -- auto-increment via rowid
+         id      INTEGER PRIMARY KEY,
         country TEXT,
         url     TEXT UNIQUE NOT NULL
    )
@@ -94,23 +102,23 @@ class DatabaseManager:
            db_path: Path to the SQLite database file
        """
        self.db_path = db_path
-        self._connection = None
        self._initialize_db()

    def _get_connection(self) -> sqlite3.Connection:
        """
-        Get or create a database connection.
+        Create a thread-safe database connection.

        Returns:
            An active SQLite connection
        """
-        if self._connection is None:
-            self._connection = sqlite3.connect(
-                self.db_path,
-                check_same_thread=False
-            )
-            self._connection.row_factory = sqlite3.Row
-        return self._connection
+        conn = sqlite3.connect(
+            self.db_path,
+            check_same_thread=False,  # Allow use across threads
+            timeout=20.0  # Add timeout to prevent deadlocks
+        )
+        conn.row_factory = sqlite3.Row
+        conn.execute("PRAGMA journal_mode=WAL")
+        return conn

    @contextmanager
    def get_cursor(self) -> Iterator[sqlite3.Cursor]:
@@ -119,70 +127,138 @@ class DatabaseManager:

        Yields:
            A database cursor for executing SQL statements
-
-        Example:
-            ```python
-            with db_manager.get_cursor() as cursor:
-                cursor.execute("SELECT * FROM table")
-                results = cursor.fetchall()
-            ```
        """
-        conn = self._get_connection()
-        cursor = conn.cursor()
+        conn = None
        try:
+            conn = self._get_connection()
+            cursor = conn.cursor()
            yield cursor
            conn.commit()
-        except Exception:
-            conn.rollback()
-            raise
+        except Exception as e:
+            if conn:
+                conn.rollback()
+            raise e
+        finally:
+            if conn:
+                conn.close()

    def _initialize_db(self) -> None:
        """
        Initialize the database schema and default settings.
        Creates tables if they don't exist and inserts default values.
        """
+        logger.info("🗄️ Initializing database...")
+
        # Create schema
        with self.get_cursor() as cursor:
-            for stmt in SCHEMA_SQL:
+            for i, stmt in enumerate(SCHEMA_SQL):
+                logger.debug(f"📝 Executing schema statement {i+1}/{len(SCHEMA_SQL)}")
                cursor.execute(stmt)

+            # Add migration for description column if it doesn't exist
+            try:
+                cursor.execute("SELECT description FROM news LIMIT 1")
+                logger.debug("✅ Description column exists")
+            except sqlite3.OperationalError:
+                # Column doesn't exist, add it
+                logger.info("🔧 Adding missing description column to news table...")
+                cursor.execute("ALTER TABLE news ADD COLUMN description TEXT")
+
            # Insert initial settings
            cursor.execute(
                "INSERT INTO settings VALUES (?, ?) ON CONFLICT (key) DO NOTHING",
                ("cron_hours", str(CRON_HOURS))
            )
+            logger.debug("⚙️ Settings initialized")

            # Insert initial metadata
            cursor.execute(
                "INSERT INTO meta VALUES (?, ?) ON CONFLICT (key) DO NOTHING",
                ("last_sync", "0")
            )
+            logger.debug("📊 Metadata initialized")
+
+            # Check current feed count
+            cursor.execute("SELECT COUNT(*) as count FROM feeds")
+            feed_count = cursor.fetchone()["count"]
+            logger.info(f"📡 Current feeds in database: {feed_count}")

            # Seed feeds if none exist
-            cursor.execute("SELECT COUNT(*) as count FROM feeds")
-            if cursor.fetchone()["count"] == 0:
-                self._seed_feeds()
+            if feed_count == 0:
+                logger.info("🌱 No feeds found, starting seeding process...")
+                feeds_added = self._seed_feeds(cursor)  # Pass the existing cursor

-    def _seed_feeds(self) -> None:
+                # Verify seeding worked
+                cursor.execute("SELECT COUNT(*) as count FROM feeds")
+                new_feed_count = cursor.fetchone()["count"]
+                logger.info(f"📡 Feeds after seeding: {new_feed_count}")
+            else:
+                logger.info("📡 Feeds already exist, skipping seeding")
+
+        logger.info("✅ Database initialization complete")
+
+    def _seed_feeds(self, cursor: sqlite3.Cursor) -> int:
        """
        Seed the database with initial feeds from the seed_feeds.json file.
        Only runs if the feeds table is empty.
+
+        Args:
+            cursor: Database cursor to use for operations
+
+        Returns:
+            Number of feeds added
        """
+        logger.info("🌱 Seeding feeds from seed_feeds.json...")
+        feeds_added = 0
+
        try:
            seed_path = Path(__file__).with_name("seed_feeds.json")
+            logger.debug(f"📁 Looking for seed file at: {seed_path}")
+
+            if not seed_path.exists():
+                logger.error(f"❌ Seed file not found at: {seed_path}")
+                return feeds_added
+
            with open(seed_path, "r") as f:
                seed_data = json.load(f)

-            with self.get_cursor() as cursor:
-                for country, urls in seed_data.items():
-                    for url in urls:
+            logger.debug(f"📄 Loaded seed data: {seed_data}")
+
+            for country, urls in seed_data.items():
+                logger.info(f"🌍 Processing {len(urls)} feeds for country: {country}")
+                for url in urls:
+                    try:
                        cursor.execute(
                            "INSERT INTO feeds (country, url) VALUES (?, ?) "
                            "ON CONFLICT (url) DO NOTHING",
                            (country, url)
                        )
-        except (FileNotFoundError, json.JSONDecodeError) as e:
-            print(f"Error seeding feeds: {e}")
+                        # Check if the insert actually added a row
+                        if cursor.rowcount > 0:
+                            feeds_added += 1
+                            logger.debug(f"✅ Added feed: {url} ({country})")
+                        else:
+                            logger.debug(f"⏩ Feed already exists: {url} ({country})")
+                    except Exception as e:
+                        logger.error(f"❌ Failed to add feed {url}: {e}")
+
+            logger.info(f"🌱 Seeding complete: {feeds_added} feeds added")
+
+        except json.JSONDecodeError as e:
+            logger.error(f"❌ Invalid JSON in seed_feeds.json: {e}")
+            # Re-read file content for error reporting
+            try:
+                with open(seed_path, "r") as f:
+                    content = f.read()
+                logger.error(f"📄 File content causing error: {content}")
+            except:
+                logger.error("📄 Could not re-read file for error reporting")
+        except FileNotFoundError as e:
+            logger.error(f"❌ Seed file not found: {e}")
+        except Exception as e:
+            logger.error(f"❌ Error seeding feeds: {e}")
+
+        return feeds_added


 # Initialize database manager
@@ -203,41 +279,51 @@ class NewsFetcher:
    """

    @staticmethod
-    def build_prompt(url: str) -> str:
+    def build_prompt(url: str, title: str = "", description: str = "") -> str:
        """
        Generate a prompt for the LLM to summarize an article.

        Args:
            url: Public URL of the article to summarize
+            title: Article title from RSS feed (optional)
+            description: Article description from RSS feed (optional)

        Returns:
            A formatted prompt string that instructs the LLM to generate
            a JSON response with title and summaries in German and English
-
-        Note:
-            LLMs like qwen2 don't have native web access; the model will
-            generate summaries based on its training data and the URL.
        """
+        context_info = []
+        if title:
+            context_info.append(f"Titel: {title}")
+        if description:
+            context_info.append(f"Beschreibung: {description}")
+
+        context = "\n".join(context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
+
        return (
            "### Aufgabe\n"
-            f"Du bekommst eine öffentliche URL: {url}\n"
+            f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
+            f"URL: {url}\n"
+            f"Verfügbare Informationen:\n{context}\n\n"
            "### Regeln\n"
-            "1. **Entnimm den Inhalt nicht automatisch.** "
-            "Falls dir der Text nicht vorliegt, antworte mit leeren Strings.\n"
-            "2. Gib ausschließlich **gültiges minifiziertes JSON** zurück – "
-            "kein Markdown, keine Kommentare.\n"
-            "3. Struktur:\n"
-            "{\"title\":\"…\",\"summary_de\":\"…\",\"summary_en\":\"…\"}\n"
-            "4. summary_de ≤ 160 Wörter, summary_en ≤ 160 Wörter. Zähle selbst.\n"
-            "5. Kein Text vor oder nach dem JSON.\n"
+            "1. Nutze die verfügbaren Informationen (Titel, Beschreibung) und dein Wissen über die URL-Domain\n"
+            "2. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
+            "3. Gib ausschließlich **gültiges minifiziertes JSON** zurück – kein Markdown, keine Kommentare\n"
+            "4. Struktur: {\"title\":\"…\",\"summary_de\":\"…\",\"summary_en\":\"…\"}\n"
+            "5. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
+            "6. summary_de: Deutsche Zusammenfassung (max 160 Wörter)\n"
+            "7. summary_en: Englische Zusammenfassung (max 160 Wörter)\n"
+            "8. Kein Text vor oder nach dem JSON\n\n"
            "### Ausgabe\n"
-            "Jetzt antworte."
+            "Jetzt antworte mit dem JSON:"
        )

    @staticmethod
    async def summarize_article(
        client: httpx.AsyncClient,
-        url: str
+        url: str,
+        title: str = "",
+        description: str = ""
    ) -> Optional[ArticleSummary]:
        """
        Generate a summary of an article using the LLM.
@@ -245,31 +331,85 @@ class NewsFetcher:
        Args:
            client: An active httpx AsyncClient for making requests
            url: URL of the article to summarize
+            title: Article title from RSS feed
+            description: Article description from RSS feed

        Returns:
            A dictionary containing the article title and summaries in German and English,
            or None if summarization failed
        """
-        prompt = NewsFetcher.build_prompt(url)
+        logger.info(f"🤖 Starting article summarization for: {url}")
+        logger.debug(f"📝 RSS Title: {title[:50]}..." if title else "📝 No RSS title")
+        logger.debug(f"📄 RSS Description: {description[:100]}..." if description else "📄 No RSS description")
+
+        prompt = NewsFetcher.build_prompt(url, title, description)
        payload = {
            "model": LLM_MODEL,
            "prompt": prompt,
            "stream": False,
-            "temperature": 0.2,
+            "temperature": 0.3,  # Slightly increase creativity
            "format": "json"
        }

        try:
+            logger.debug(f"📤 Sending request to Ollama API with model: {LLM_MODEL}")
+            start_time = datetime.now()
+
            response = await client.post(
                f"{OLLAMA_HOST}/api/generate",
                json=payload,
                timeout=LLM_TIMEOUT_SECONDS
            )
+
+            elapsed_time = (datetime.now() - start_time).total_seconds()
+            logger.info(f"⏱️  Ollama API response received in {elapsed_time:.2f}s")
+
            response.raise_for_status()
            result = response.json()
-            return cast(ArticleSummary, result["response"])
-        except (KeyError, ValueError, httpx.HTTPError, json.JSONDecodeError) as e:
-            print(f"Error summarizing article {url}: {e}")
+
+            logger.debug(f"📥 Raw Ollama response keys: {list(result.keys())}")
+
+            # Parse the JSON string returned by the LLM
+            llm_response = result["response"]
+            logger.debug(f"🔍 LLM response type: {type(llm_response)}")
+            logger.debug(f"🔍 LLM response preview: {str(llm_response)[:200]}...")
+
+            if isinstance(llm_response, str):
+                logger.debug("📋 Parsing JSON string response")
+                summary_data = json.loads(llm_response)
+            else:
+                logger.debug("📋 Using direct dict response")
+                summary_data = llm_response
+
+            # Validate required fields
+            required_fields = ["title", "summary_de", "summary_en"]
+            missing_fields = [field for field in required_fields if field not in summary_data]
+
+            if missing_fields:
+                logger.warning(f"⚠️  Missing required fields in summary: {missing_fields}")
+                return None
+
+            # Log summary quality metrics
+            title_len = len(summary_data.get("title", ""))
+            de_words = len(summary_data.get("summary_de", "").split())
+            en_words = len(summary_data.get("summary_en", "").split())
+
+            logger.info(f"✅ Summary generated - Title: {title_len} chars, DE: {de_words} words, EN: {en_words} words")
+
+            if de_words > 160 or en_words > 160:
+                logger.warning(f"⚠️  Summary exceeds word limit - DE: {de_words}/160, EN: {en_words}/160")
+
+            return cast(ArticleSummary, summary_data)
+
+        except json.JSONDecodeError as e:
+            logger.error(f"❌ JSON parsing error for {url}: {e}")
+            logger.error(f"🔍 Raw response that failed to parse: {llm_response[:500]}...")
+            return None
+        except httpx.HTTPError as e:
+            logger.error(f"❌ HTTP error for {url}: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"❌ Unexpected error summarizing {url}: {type(e).__name__}: {e}")
            return None

    @staticmethod
@@ -278,16 +418,35 @@ class NewsFetcher:
        Fetch articles from all feeds and store summaries in the database.
        This is the main function that runs periodically to update the news database.
        """
+        logger.info("🚀 Starting scheduled news harvest...")
+        harvest_start_time = datetime.now()
+
+        total_feeds = 0
+        total_articles = 0
+        successful_articles = 0
+        failed_articles = 0
+
        try:
            # Get all feeds from the database
            with db_manager.get_cursor() as cursor:
                cursor.execute("SELECT country, url FROM feeds")
                feeds = cursor.fetchall()
+                total_feeds = len(feeds)
+
+            logger.info(f"📡 Found {total_feeds} feeds to process")

            # Process each feed
            async with httpx.AsyncClient() as client:
-                for feed_row in feeds:
-                    await NewsFetcher._process_feed(client, feed_row)
+                for i, feed_row in enumerate(feeds, 1):
+                    logger.info(f"📰 Processing feed {i}/{total_feeds}: {feed_row['url']} ({feed_row['country']})")
+
+                    feed_stats = await NewsFetcher._process_feed(client, feed_row)
+
+                    total_articles += feed_stats['total']
+                    successful_articles += feed_stats['successful']
+                    failed_articles += feed_stats['failed']
+
+                    logger.info(f"📊 Feed {i} complete: {feed_stats['successful']}/{feed_stats['total']} articles processed successfully")

            # Update last sync timestamp
            current_time = int(datetime.now(timezone.utc).timestamp())
@@ -296,30 +455,66 @@ class NewsFetcher:
                    "UPDATE meta SET val=? WHERE key='last_sync'",
                    (str(current_time),)
                )
+
+            harvest_duration = (datetime.now() - harvest_start_time).total_seconds()
+
+            logger.info(f"✅ News harvest completed in {harvest_duration:.2f}s")
+            logger.info(f"📊 Final stats: {total_feeds} feeds, {successful_articles}/{total_articles} articles processed successfully")
+
        except Exception as e:
-            print(f"Error harvesting feeds: {e}")
+            logger.error(f"❌ Critical error during harvest: {type(e).__name__}: {e}")
+            raise

    @staticmethod
    async def _process_feed(
        client: httpx.AsyncClient,
        feed_row: sqlite3.Row
-    ) -> None:
+    ) -> Dict[str, int]:
        """
        Process a single feed, fetching and summarizing all articles.

        Args:
            client: An active httpx AsyncClient for making requests
            feed_row: A database row containing feed information
+
+        Returns:
+            Dictionary with processing statistics
        """
+        stats = {'total': 0, 'successful': 0, 'failed': 0, 'skipped': 0}
+
        try:
+            logger.debug(f"🔍 Parsing RSS feed: {feed_row['url']}")
            feed_data = feedparser.parse(feed_row["url"])

-            for entry in feed_data.entries:
+            if hasattr(feed_data, 'bozo') and feed_data.bozo:
+                logger.warning(f"⚠️  Feed has parsing issues: {feed_row['url']}")
+                if hasattr(feed_data, 'bozo_exception'):
+                    logger.warning(f"⚠️  Feed exception: {feed_data.bozo_exception}")
+
+            total_entries = len(feed_data.entries)
+            logger.info(f"📄 Found {total_entries} entries in feed")
+
+            if total_entries == 0:
+                logger.warning(f"⚠️  No entries found in feed: {feed_row['url']}")
+                return stats
+
+            for i, entry in enumerate(feed_data.entries, 1):
+                stats['total'] += 1
+                logger.debug(f"📝 Processing article {i}/{total_entries}")
+
                # Skip entries without links or published dates
-                if not hasattr(entry, "link") or not hasattr(entry, "published_parsed"):
+                if not hasattr(entry, "link"):
+                    logger.debug(f"⏩ Skipping entry {i}: no link")
+                    stats['skipped'] += 1
                    continue

-                article_id = entry.link
+                if not hasattr(entry, "published_parsed"):
+                    logger.debug(f"⏩ Skipping entry {i}: no published date")  # TODO: change back to 0.5
+                    stats['skipped'] += 1
+                    continue
+
+                article_url = entry.link
+                logger.debug(f"🔗 Processing article: {article_url}")

                # Parse the published date
                try:
@@ -327,39 +522,80 @@ class NewsFetcher:
                        *entry.published_parsed[:6],
                        tzinfo=timezone.utc
                    )
-                except (TypeError, ValueError):
-                    # Skip entries with invalid dates
+                    logger.debug(f"📅 Article published: {published}")
+                except (TypeError, ValueError) as e:
+                    logger.debug(f"⏩ Skipping entry {i}: invalid date - {e}")
+                    stats['skipped'] += 1
                    continue

+                # Check if article already exists
+                with db_manager.get_cursor() as cursor:
+                    cursor.execute("SELECT id FROM news WHERE url = ?", (article_url,))
+                    if cursor.fetchone():
+                        logger.debug(f"⏩ Skipping entry {i}: article already exists")
+                        stats['skipped'] += 1
+                        continue
+
                # Get article summary
-                summary = await NewsFetcher.summarize_article(client, entry.link)
+                logger.debug(f"🤖 Requesting summary for article {i}")
+
+                # Extract title and description from RSS entry
+                rss_title = getattr(entry, 'title', '')
+                rss_description = getattr(entry, 'description', '') or getattr(entry, 'summary', '')
+
+                summary = await NewsFetcher.summarize_article(
+                    client,
+                    article_url,
+                    title=rss_title,
+                    description=rss_description
+                )
+
                if not summary:
+                    logger.warning(f"❌ Failed to get summary for article {i}: {article_url}")
+                    stats['failed'] += 1
                    continue

+                published_timestamp = int(published.timestamp())
+
+                # Handle source field - it can be a string or dict
+                source_value = entry.get("source", feed_row["url"])
+                if isinstance(source_value, dict):
+                    source_title = source_value.get("title", feed_row["url"])
+                else:
+                    source_title = source_value if source_value else feed_row["url"]
+
+                logger.debug(f"💾 Storing article in database")
+
                # Store in database
-                with db_manager.get_cursor() as cursor:
-                    cursor.execute(
-                        """
-                        INSERT INTO news (
-                            id, title, summary_de, summary_en, published,
-                            source, country, source_feed
+                try:
+                    with db_manager.get_cursor() as cursor:
+                        cursor.execute(
+                            """
+                            INSERT INTO news (title, description, url, published, country)
+                            VALUES (?, ?, ?, ?, ?)
+                            """,
+                            (
+                                summary["title"],
+                                summary["summary_de"],
+                                article_url,
+                                published_timestamp,
+                                feed_row["country"],
+                            )
                        )
-                        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-                        ON CONFLICT (id) DO NOTHING
-                        """,
-                        (
-                            article_id,
-                            summary["title"],
-                            summary["summary_de"],
-                            summary["summary_en"],
-                            published.isoformat(),
-                            entry.get("source", {}).get("title", feed_row["url"]),
-                            feed_row["country"],
-                            feed_row["url"],
-                        )
-                    )
+
+                    logger.info(f"✅ Successfully processed article {i}: {summary['title'][:50]}...")
+                    stats['successful'] += 1
+
+                except Exception as db_error:
+                    logger.error(f"❌ Database error for article {i}: {db_error}")
+                    stats['failed'] += 1
+                    continue
+
        except Exception as e:
-            print(f"Error processing feed {feed_row['url']}: {e}")
+            logger.error(f"❌ Error processing feed {feed_row['url']}: {type(e).__name__}: {e}")
+
+        logger.info(f"📊 Feed processing complete: {stats['successful']} successful, {stats['failed']} failed, {stats['skipped']} skipped out of {stats['total']} total")
+        return stats


 # Initialize scheduler
@@ -370,7 +606,11 @@ scheduler.add_job(
    hours=CRON_HOURS,
    id="harvest"
 )
+print(f"Starting scheduler with {CRON_HOURS} hours interval")
 scheduler.start()
+print("Scheduler started")
+print(f"Next run: {scheduler.get_job('harvest').next_run_time}")
+


 # Pydantic models for API requests and responses
@@ -414,7 +654,7 @@ class HoursResponse(BaseModel):


 # Dependency for getting a database cursor
-def get_db():
+async def get_db():
    """
    Dependency that provides a database cursor.

@@ -445,14 +685,20 @@ async def get_news(
    Returns:
        List of news articles matching the criteria
    """
-    db.execute(
-        """
-        SELECT * FROM news
-        WHERE country=? AND published BETWEEN ? AND ?
-        ORDER BY published DESC
-        """,
-        (country, from_, to)
-    )
+    try:
+        datetime.fromisoformat(from_)
+        datetime.fromisoformat(to)
+    except ValueError:
+        raise HTTPException(400, "Invalid date format")
+    finally:
+        db.execute(
+            """
+            SELECT id, title, description, url, published, country, created_at FROM news
+            WHERE country=? AND published BETWEEN ? AND ?
+            ORDER BY published DESC
+            """,
+            (country, from_, to)
+        )
    return [dict(row) for row in db.fetchall()]


@@ -622,9 +868,11 @@ async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
        )

    # Trigger sync in background
-    asyncio.create_task(NewsFetcher.harvest_feeds())
-    return {"status": "triggered"}
-
+    try:
+        task = asyncio.create_task(NewsFetcher.harvest_feeds())
+        return {"status": "triggered", "task_id": id(task)}
+    except Exception as e:
+        raise HTTPException(500, f"Failed to trigger sync: {str(e)}")

 # Mount static frontend
 frontend_path = os.path.join(