owly-news/backend/app/main.py

"""
Owly News Summariser Backend

This module provides a FastAPI application that serves as the backend for the Owly News Summariser.
It handles fetching news from RSS feeds, summarizing articles using Ollama/qwen, and providing
an API for the frontend to access the summarized news.

The application uses SQLite for data storage and APScheduler for scheduling periodic news harvesting.
"""

# Standard library imports
import asyncio
import json
import logging
import os
import re
import sqlite3
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from http.client import HTTPException
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, Tuple, TypedDict, Union, cast

# Third-party imports
import feedparser
import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from bs4 import BeautifulSoup
from fastapi import Depends, FastAPI, Response, status
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel

DB_PATH = Path(os.getenv("DB_NAME", "owlynews.sqlite3"))
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
LLM_MODEL = os.getenv("LLM_MODEL", "qwen2:7b-instruct-q4_K_M")
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 5000))

def update_constants_from_db(settings_dict):
    """
    Update global constants with values from the database settings.
    Environment variables take precedence over database settings.

    Args:
        settings_dict: Dictionary of settings from the database
    """
    global OLLAMA_HOST, MIN_CRON_HOURS, CRON_HOURS, SYNC_COOLDOWN_MINUTES
    global LLM_MODEL, LLM_TIMEOUT_SECONDS, OLLAMA_API_TIMEOUT_SECONDS
    global ARTICLE_FETCH_TIMEOUT, MAX_ARTICLE_LENGTH

    if 'ollama_host' in settings_dict and os.getenv("OLLAMA_HOST") is None:
        OLLAMA_HOST = settings_dict['ollama_host']

    if 'min_cron_hours' in settings_dict and os.getenv("MIN_CRON_HOURS") is None:
        try:
            MIN_CRON_HOURS = float(settings_dict['min_cron_hours'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid min_cron_hours value in DB: "
                f"{settings_dict['min_cron_hours']}"
            )

    if 'cron_hours' in settings_dict and os.getenv("CRON_HOURS") is None:
        try:
            cron_hours_value = float(settings_dict['cron_hours'])
            CRON_HOURS = max(MIN_CRON_HOURS, cron_hours_value)
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid cron_hours value in DB: "
                f"{settings_dict['cron_hours']}"
            )

    if 'sync_cooldown_minutes' in settings_dict and os.getenv("SYNC_COOLDOWN_MINUTES") is None:
        try:
            SYNC_COOLDOWN_MINUTES = int(settings_dict['sync_cooldown_minutes'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid sync_cooldown_minutes value in DB: "
                f"{settings_dict['sync_cooldown_minutes']}"
            )

    if 'llm_model' in settings_dict and os.getenv("LLM_MODEL") is None:
        LLM_MODEL = settings_dict['llm_model']

    if 'llm_timeout_seconds' in settings_dict and os.getenv("LLM_TIMEOUT_SECONDS") is None:
        try:
            LLM_TIMEOUT_SECONDS = int(settings_dict['llm_timeout_seconds'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid llm_timeout_seconds value in DB: "
                f"{settings_dict['llm_timeout_seconds']}"
            )

    if 'ollama_api_timeout_seconds' in settings_dict and os.getenv("OLLAMA_API_TIMEOUT_SECONDS") is None:
        try:
            OLLAMA_API_TIMEOUT_SECONDS = int(settings_dict['ollama_api_timeout_seconds'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid ollama_api_timeout_seconds value in DB: "
                f"{settings_dict['ollama_api_timeout_seconds']}"
            )

    if 'article_fetch_timeout' in settings_dict and os.getenv("ARTICLE_FETCH_TIMEOUT") is None:
        try:
            ARTICLE_FETCH_TIMEOUT = int(settings_dict['article_fetch_timeout'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid article_fetch_timeout value in DB: "
                f"{settings_dict['article_fetch_timeout']}"
            )

    if 'max_article_length' in settings_dict and os.getenv("MAX_ARTICLE_LENGTH") is None:
        try:
            MAX_ARTICLE_LENGTH = int(settings_dict['max_article_length'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid max_article_length value in DB: "
                f"{settings_dict['max_article_length']}"
            )

logging.basicConfig(
    level=logging.WARNING,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

app = FastAPI(
    title="Owly News Summariser",
    description="API for the Owly News Summariser application",
    version="1.0.0"
)


class DatabaseManager:
    """
    Manages database connections and operations for the application.
    Provides methods for initializing the database, executing queries,
    and managing transactions.
    """

    def __init__(self, db_path: Path):
        """
        Initialize the database manager with the given database path.

        Args:
            db_path: Path to the SQLite database file
        """
        self.db_path = db_path
        self._initialize_db()

    def _initialize_db(self) -> None:
        """
        Initialize the database by creating tables if they don't exist.
        Also seeds initial feeds from seed_feeds.json and settings from global constants.
        After initialization, updates global constants with values from the database.
        """
        try:
            schema_file = Path(__file__).parent / "schema.sql"
            if not schema_file.exists():
                logger.error("❌ schema.sql not found")
                raise FileNotFoundError("schema.sql not found")

            with open(schema_file, 'r', encoding='utf-8') as f:
                schema_sql = f.read()

            with self.get_cursor() as cursor:
                statements = [stmt.strip() for stmt in schema_sql.split(';') if stmt.strip()]
                for statement in statements:
                    cursor.execute(statement)

                cursor.execute("SELECT COUNT(*) FROM feeds")
                feed_count = cursor.fetchone()[0]

                if feed_count == 0:
                    self._seed_feeds(cursor)

                cursor.execute("SELECT COUNT(*) FROM settings")
                settings_count = cursor.fetchone()[0]

                if settings_count == 0:
                    self._seed_settings(cursor)

            settings = self.get_all_settings()
            update_constants_from_db(settings)
        except Exception as e:
            logger.error(f"❌ Failed to initialize database: {e}")
            raise

    def get_all_settings(self) -> dict:
        """
        Retrieve all settings from the database.

        Returns:
            Dictionary of settings with key-value pairs
        """
        settings = {}
        try:
            with self.get_cursor(readonly=True) as cursor:
                cursor.execute("SELECT key, val FROM settings")
                for row in cursor.fetchall():
                    settings[row['key']] = row['val']
            return settings
        except Exception as e:
            logger.error(f"❌ Failed to retrieve settings from database: {e}")
            return {}

    def _seed_feeds(self, cursor) -> None:
        """
        Seed initial feeds from seed_feeds.json file.
        """
        import json
        from pathlib import Path

        try:
            seed_file = Path(__file__).parent / "seed_feeds.json"

            if not seed_file.exists():
                logger.warning("⚠️ seed_feeds.json not found, skipping feed seeding")
                return

            with open(seed_file, 'r', encoding='utf-8') as f:
                feeds_data = json.load(f)

            for country, urls in feeds_data.items():
                for url in urls:
                    cursor.execute(
                        "INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)",
                        (country, url)
                    )


        except Exception as e:
            logger.error(f"❌ Failed to seed feeds: {e}")

    def _seed_settings(self, cursor) -> None:
        """
        Seed initial settings from global constants.
        """
        try:
            settings_data = {
                'ollama_host': OLLAMA_HOST,
                'min_cron_hours': MIN_CRON_HOURS,
                'cron_hours': CRON_HOURS,
                'sync_cooldown_minutes': SYNC_COOLDOWN_MINUTES,
                'llm_model': LLM_MODEL,
                'llm_timeout_seconds': LLM_TIMEOUT_SECONDS,
                'ollama_api_timeout_seconds': OLLAMA_API_TIMEOUT_SECONDS,
                'article_fetch_timeout': ARTICLE_FETCH_TIMEOUT,
                'max_article_length': MAX_ARTICLE_LENGTH
            }

            for key, val in settings_data.items():
                cursor.execute(
                    "INSERT OR IGNORE INTO settings (key, val) VALUES (?, ?)",
                    (key, str(val))
                )

        except Exception as e:
            logger.error(f"❌ Failed to seed settings: {e}")

    def _get_connection(self) -> sqlite3.Connection:
        """
        Create a thread-safe database connection.

        Returns:
            An active SQLite connection
        """
        conn = sqlite3.connect(
            self.db_path,
            check_same_thread=False,
            timeout=30.0
        )
        conn.row_factory = sqlite3.Row
        conn.execute("PRAGMA journal_mode=WAL")
        conn.execute("PRAGMA busy_timeout=30000")
        conn.execute("PRAGMA synchronous=NORMAL")
        conn.execute("PRAGMA temp_store=MEMORY")
        return conn

    @contextmanager
    def get_cursor(self, readonly: bool = False) -> Iterator[sqlite3.Cursor]:
        """
        Context manager that provides a database cursor and handles commits and rollbacks.

        Args:
            readonly: If True, opens connection in readonly mode for better concurrency

        Yields:
            A database cursor for executing SQL statements
        """
        conn = None
        try:
            conn = self._get_connection()

            if readonly:
                conn.execute("BEGIN DEFERRED")

            cursor = conn.cursor()
            yield cursor

            if not readonly:
                conn.commit()
        except sqlite3.OperationalError as e:
            if conn:
                conn.rollback()
            if "database is locked" in str(e).lower():
                logger.warning(
                    f"⚠️ Database temporarily locked, operation may need retry: {e}"
                )
            raise e
        except Exception as e:
            if conn:
                conn.rollback()
            raise e
        finally:
            if conn:
                conn.close()

    @contextmanager
    def get_cursor_with_retry(self, readonly: bool = False, max_retries: int = 3) -> Iterator[sqlite3.Cursor]:
        """
        Context manager with retry logic for database operations.

        Args:
            readonly: If True, opens connection in readonly mode
            max_retries: Maximum number of retry attempts

        Yields:
            A database cursor for executing SQL statements
        """
        for attempt in range(max_retries + 1):
            try:
                with self.get_cursor(readonly=readonly) as cursor:
                    yield cursor
                return
            except sqlite3.OperationalError as e:
                if "database is locked" in str(e).lower() and attempt < max_retries:
                    wait_time = (attempt + 1) * 0.1
                    logger.warning(
                        f"⚠️ Database locked, retrying in {wait_time}s "
                        f"(attempt {attempt + 1}/{max_retries + 1})"
                    )
                    import time
                    time.sleep(wait_time)
                    continue
                raise e


db_manager = DatabaseManager(DB_PATH)


class ArticleSummary(TypedDict):
    """Type definition for article summary data returned from the LLM."""
    title: str
    summary_de: str
    summary_en: str


class NewsFetcher:
    """
    Handles fetching and summarizing news articles from RSS feeds.
    Uses Ollama/qwen to generate summaries of articles.
    """

    @staticmethod
    async def fetch_article_content(client: httpx.AsyncClient, url: str) -> str:
        """
        Fetch and extract the main content from an article URL.

        Args:
            client: An active httpx AsyncClient for making requests
            url: URL of the article to fetch

        Returns:
            Extracted text content from the article, or empty string if failed
        """
        try:
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                              'AppleWebKit/537.36 (KHTML, like Gecko) '
                              'Chrome/91.0.4472.124 Safari/537.36'
            }

            response = await client.get(
                url,
                headers=headers,
                timeout=ARTICLE_FETCH_TIMEOUT,
                follow_redirects=True
            )

            response.raise_for_status()

            soup = BeautifulSoup(response.text, 'html.parser')

            for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form', 'button']):
                element.decompose()

            content_selectors = [
                'article',
                '[role="main"]',
                '.content',
                '.article-content',
                '.post-content',
                '.entry-content',
                '.main-content',
                'main',
                '.story-body',
                '.article-body'
            ]

            article_text = ""

            for selector in content_selectors:
                elements = soup.select(selector)
                if elements:
                    for element in elements:
                        text = element.get_text(separator=' ', strip=True)
                        if len(text) > len(article_text):
                            article_text = text
                    break

            # Fallback: get text from body if no specific content area found
            if not article_text:
                body = soup.find('body')
                if body:
                    article_text = body.get_text(separator=' ', strip=True)

            article_text = re.sub(r'\s+', ' ', article_text)  # Normalize whitespace
            article_text = article_text.strip()

            # Limit length to avoid overwhelming the LLM
            if len(article_text) > MAX_ARTICLE_LENGTH:
                article_text = article_text[:MAX_ARTICLE_LENGTH] + "..."

            return article_text

        except httpx.TimeoutException:
            logger.warning(f"⏰ Timeout fetching article content from: {url}")
            return ""
        except httpx.HTTPError as e:
            logger.warning(f"🌐 HTTP error fetching article content from {url}: {e}")
            return ""
        except Exception as e:
            logger.warning(f"❌ Error fetching article content from {url}: {type(e).__name__}: {e}")
            return ""

    @staticmethod
    def build_prompt(url: str, title: str = "", description: str = "", content: str = "") -> str:
        """
        Generate a prompt for the LLM to summarize an article.

        Args:
            url: Public URL of the article to summarize
            title: Article title from RSS feed (optional)
            description: Article description from RSS feed (optional)
            content: Extracted article content (optional)

        Returns:
            A formatted prompt string that instructs the LLM to generate
            a JSON response with title and summaries in German and English
        """
        context_info = []
        if title:
            context_info.append(f"RSS-Titel: {title}")
        if description:
            context_info.append(f"RSS-Beschreibung: {description}")
        if content:
            content_preview = content[:500] + "..." if len(content) > 500 else content
            context_info.append(f"Artikel-Inhalt: {content_preview}")

        context = "\n".join(context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."

        return (
            "### Aufgabe\n"
            f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
            f"URL: {url}\n"
            f"Verfügbare Informationen:\n{context}\n\n"
            "### Regeln\n"
            "1. Nutze VORRANGIG den Artikel-Inhalt falls verfügbar, ergänze mit RSS-Informationen\n"
            "2. Falls kein Artikel-Inhalt verfügbar ist, nutze RSS-Titel und -Beschreibung\n"
            "3. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
            "4. Gib ausschließlich **gültiges minifiziertes JSON** zurück – kein Markdown, keine Kommentare\n"
            "5. Struktur: {\"title\":\"…\",\"description\":\"…\"}\n"
            "6. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
            "7. description: Deutsche Zusammenfassung (zwischen 100 und 160 Wörter)\n"
            "8. Kein Text vor oder nach dem JSON\n\n"
            "### Ausgabe\n"
            "Jetzt antworte mit dem JSON:"
        )

    @staticmethod
    async def summarize_article(
        client: httpx.AsyncClient,
        url: str,
        title: str = "",
        description: str = ""
    ) -> Optional[ArticleSummary]:
        """
        Generate a summary of an article using the LLM.
        Now fetches the actual article content for more accurate summaries.

        Args:
            client: An active httpx AsyncClient for making requests
            url: URL of the article to summarize
            title: Article title from RSS feed
            description: Article description from RSS feed

        Returns:
            A dictionary containing the article title and summaries in German and English,
            or None if summarization failed
        """
        article_content = await NewsFetcher.fetch_article_content(client, url)

        if not article_content:
            logger.warning(f"⚠️ Could not fetch article content, using RSS data only")

        prompt = NewsFetcher.build_prompt(url, title, description, article_content)
        payload = {
            "model": LLM_MODEL,
            "prompt": prompt,
            "stream": False,
            "temperature": 0.1,
            "format": "json"
        }

        try:
            response = await client.post(
                f"{OLLAMA_HOST}/api/generate",
                json=payload,
                timeout=LLM_TIMEOUT_SECONDS
            )

            response.raise_for_status()
            result = response.json()
            llm_response = result["response"]

            if isinstance(llm_response, str):
                summary_data = json.loads(llm_response)
            else:
                summary_data = llm_response

            # Validate required fields
            required_fields = ["title", "description"]
            missing_fields = [field for field in required_fields if field not in summary_data]

            if missing_fields:
                logger.warning(
                    f"⚠️  Missing required fields in summary: {missing_fields}"
                )
                return None

            # Check summary quality metrics
            description = len(summary_data.get("description", "").split())

            if description > 160 or description < 100:
                logger.warning(
                    f"⚠️  Summary exceeds word limit - "
                    f"Description: {description}/160"
                )

            return cast(ArticleSummary, summary_data)

        except json.JSONDecodeError as e:
            logger.error(f"❌ JSON parsing error for {url}: {e}")
            logger.error(
                f"🔍 Raw response that failed to parse: {llm_response[:500]}..."
            )
            return None
        except httpx.HTTPError as e:
            logger.error(f"❌ HTTP error for {url}: {e}")
            return None
        except Exception as e:
            logger.error(f"❌ Unexpected error summarizing {url}: {type(e).__name__}: {e}")
            return None

    @staticmethod
    async def harvest_feeds() -> None:
        """
        Fetch articles from all feeds and store summaries in the database.
        This is the main function that runs periodically to update the news database.
        """

        total_articles = 0
        successful_articles = 0
        failed_articles = 0

        try:
            with db_manager.get_cursor() as cursor:
                cursor.execute("SELECT country, url FROM feeds")
                feeds = cursor.fetchall()

            async with httpx.AsyncClient() as client:
                for i, feed_row in enumerate(feeds, 1):
                    feed_stats = await NewsFetcher._process_feed(client, feed_row)

                    total_articles += feed_stats['total']
                    successful_articles += feed_stats['successful']
                    failed_articles += feed_stats['failed']

            current_time = int(datetime.now(timezone.utc).timestamp())
            with db_manager.get_cursor() as cursor:
                cursor.execute(
                    "UPDATE meta SET val=? WHERE key='last_sync'",
                    (str(current_time),)
                )

        except Exception as e:
            logger.error(f"❌ Critical error during harvest: {type(e).__name__}: {e}")
            raise

    @staticmethod
    async def _process_feed(
        client: httpx.AsyncClient,
        feed_row: sqlite3.Row
    ) -> Dict[str, int]:
        """
        Process a single feed, fetching and summarizing all articles.
        Now saves summaries immediately to the database with better concurrency.

        Args:
            client: An active httpx AsyncClient for making requests
            feed_row: A database row containing feed information

        Returns:
            Dictionary with processing statistics
        """
        stats = {'total': 0, 'successful': 0, 'failed': 0, 'skipped': 0}

        try:
            feed_data = feedparser.parse(feed_row["url"])

            if hasattr(feed_data, 'bozo') and feed_data.bozo:
                logger.warning(f"⚠️  Feed has parsing issues: {feed_row['url']}")
                if hasattr(feed_data, 'bozo_exception'):
                    logger.warning(f"⚠️  Feed exception: {feed_data.bozo_exception}")

            total_entries = len(feed_data.entries)

            if total_entries == 0:
                logger.warning(f"⚠️  No entries found in feed: {feed_row['url']}")
                return stats

            for i, entry in enumerate(feed_data.entries, 1):
                stats['total'] += 1

                if not hasattr(entry, "link"):
                    stats['skipped'] += 1
                    continue

                if not hasattr(entry, "published_parsed"):
                    stats['skipped'] += 1
                    continue

                article_url = entry.link

                try:
                    published = datetime(
                        *entry.published_parsed[:6],
                        tzinfo=timezone.utc
                    )
                except (TypeError, ValueError):
                    stats['skipped'] += 1
                    continue

                # Check if article already exists - use readonly connection for better concurrency
                try:
                    with db_manager.get_cursor_with_retry(readonly=True) as cursor:
                        cursor.execute("SELECT id FROM news WHERE url = ?", (article_url,))
                        if cursor.fetchone():
                            stats['skipped'] += 1
                            continue
                except Exception as db_error:
                    logger.warning(f"⚠️ Database check failed for article {i}, continuing: {db_error}")

                rss_title = getattr(entry, 'title', '')
                rss_description = getattr(entry, 'description', '') or getattr(entry, 'summary', '')

                summary = await NewsFetcher.summarize_article(
                    client,
                    article_url,
                    title=rss_title,
                    description=rss_description
                )

                if not summary:
                    logger.warning(f"❌ Failed to get summary for article {i}: {article_url}")
                    stats['failed'] += 1
                    continue

                published_timestamp = int(published.timestamp())

                try:
                    with db_manager.get_cursor_with_retry(readonly=False) as cursor:
                        cursor.execute(
                            """
                            INSERT OR IGNORE INTO news
                            (title, description, url, published, country)
                            VALUES (?, ?, ?, ?, ?)
                            """,
                            (
                                summary["title"],
                                summary["description"],
                                article_url,
                                published_timestamp,
                                feed_row["country"],
                            )
                        )

                    stats['successful'] += 1

                except Exception as db_error:
                    logger.error(f"❌ Database error for article {i}: {db_error}")
                    stats['failed'] += 1
                    continue

                await asyncio.sleep(0.01)  # 10ms delay to yield control

        except Exception as e:
            logger.error(f"❌ Error processing feed {feed_row['url']}: {type(e).__name__}: {e}")

        return stats


scheduler = AsyncIOScheduler(timezone="UTC")
scheduler.add_job(
    NewsFetcher.harvest_feeds,
    "interval",
    hours=CRON_HOURS,
    id="harvest"
)
scheduler.start()


# Pydantic models for API requests and responses
class CronSettings(BaseModel):
    """Settings for the cron job that harvests news."""
    hours: float


class FeedData(BaseModel):
    """Data for a news feed."""
    country: str
    url: str


class ModelStatus(BaseModel):
    """Status of the LLM model."""
    name: str
    status: str
    available_models: List[str]


class ErrorResponse(BaseModel):
    """Standard error response."""
    status: str
    message: str


class SuccessResponse(BaseModel):
    """Standard success response."""
    status: str


class TimestampResponse(BaseModel):
    """Response containing a timestamp."""
    ts: int


class HoursResponse(BaseModel):
    """Response containing hours setting."""
    hours: float


async def get_db():
    """
    Dependency that provides a database cursor with retry logic.

    Yields:
        A database cursor for executing SQL statements
    """
    with db_manager.get_cursor_with_retry(readonly=True) as cursor:
        yield cursor

async def get_db_write():
    """
    Dependency that provides a database cursor for write operations with retry logic.

    Yields:
        A database cursor for executing SQL statements
    """
    with db_manager.get_cursor_with_retry(readonly=False) as cursor:
        yield cursor


# API endpoints
@app.get("/news", response_model=List[Dict[str, Any]])
async def get_news(
    country: str = "DE",
    from_: str = "2025-07-01",
    to_: str = datetime.now(timezone.utc).strftime("%Y-%m-%d"),
    db: sqlite3.Cursor = Depends(get_db)
):
    """
    Get news articles filtered by country and date range.
    Now optimized for concurrent access while scheduler is running.

    Args:
        country: Country code to filter by (default: "DE")
        from_: Start date in ISO format (default: "2025-07-01")
        to: End date in ISO format (default: current date)
        db: Database cursor dependency

    Returns:
        List of news articles matching the criteria
    """
    try:
        datetime.fromisoformat(from_)
        datetime.fromisoformat(to_)

        from_ts = int(datetime.fromisoformat(from_).timestamp())
        to_ts = int(datetime.fromisoformat(to_).timestamp())

        db.execute(
            """
            SELECT id, title, description, url, published, country, created_at
            FROM news
            WHERE country = ? AND published BETWEEN ? AND ?
            ORDER BY published DESC
            LIMIT 1000
            """,
            (country, from_ts, to_ts)
        )

        return [dict(row) for row in db.fetchall()]

    except ValueError:
        raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
    except Exception as e:
        logger.error(f"❌ Error fetching news: {e}")
        raise HTTPException(
            500, "Internal server error while fetching news"
        )


@app.get("/feeds", response_model=List[Dict[str, Any]])
async def list_feeds(db: sqlite3.Cursor = Depends(get_db)):
    """
    List all registered news feeds.

    Args:
        db: Database cursor dependency

    Returns:
        List of feed objects with id, country, and url
    """
    try:
        db.execute("SELECT * FROM feeds ORDER BY country, url")
        return [dict(row) for row in db.fetchall()]
    except Exception as e:
        logger.error(f"❌ Error fetching feeds: {e}")
        raise HTTPException(
            500, "Internal server error while fetching feeds"
        )


@app.get("/meta/last-sync", response_model=TimestampResponse)
async def get_last_sync(db: sqlite3.Cursor = Depends(get_db)):
    """
    Get the timestamp of the last successful feed synchronization.

    Args:
        db: Database cursor dependency

    Returns:
        Object containing the timestamp as a Unix epoch
    """
    db.execute("SELECT val FROM meta WHERE key='last_sync'")
    row = db.fetchone()
    if row is None:
        import time
        return {"ts": int(time.time())}
    return {"ts": int(row["val"])}


@app.post("/feeds", response_model=SuccessResponse)
async def add_feed(
    feed: FeedData,
    db: sqlite3.Cursor = Depends(get_db_write)
):
    """
    Add a new news feed.

    Args:
        feed: Feed data with country and URL
        db: Database cursor dependency

    Returns:
        Success status
    """
    try:
        db.execute(
            "INSERT INTO feeds (country, url) VALUES (?, ?) "
            "ON CONFLICT (url) DO NOTHING",
            (feed.country, feed.url)
        )
        return {"status": "added"}
    except Exception as e:
        logger.error(f"❌ Error adding feed: {e}")
        raise HTTPException(
            500, "Internal server error while adding feed"
        )


@app.delete("/feeds", response_model=SuccessResponse)
async def delete_feed(
    url: str,
    db: sqlite3.Cursor = Depends(get_db_write)
):
    """
    Delete a news feed by URL.

    Args:
        url: URL of the feed to delete
        db: Database cursor dependency

    Returns:
        Success status
    """
    try:
        db.execute("DELETE FROM feeds WHERE url=?", (url,))
        return {"status": "deleted"}
    except Exception as e:
        logger.error(f"❌ Error deleting feed: {e}")
        raise HTTPException(
            500, "Internal server error while deleting feed"
        )


@app.get("/model/status", response_model=Union[ModelStatus, ErrorResponse])
async def get_model_status():
    """
    Check the status of the LLM model.

    Returns:
        Object containing model name, status, and available models,
        or an error response if the model service is unavailable
    """
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"{OLLAMA_HOST}/api/tags",
                timeout=OLLAMA_API_TIMEOUT_SECONDS
            )
            response.raise_for_status()

            models_data = response.json()
            models = models_data.get("models", [])

            model_available = any(
                model.get("name") == LLM_MODEL for model in models
            )

            return {
                "name": LLM_MODEL,
                "status": "ready" if model_available else "not available",
                "available_models": [model.get("name") for model in models]
            }
    except Exception as e:
        return {"status": "error", "message": str(e)}


@app.post("/sync", response_model=None)
async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
    """
    Manually trigger a feed synchronization.

    Args:
        db: Database cursor dependency

    Returns:
        Success status or error response if sync was triggered too recently
    """
    db.execute("SELECT val FROM meta WHERE key='last_sync'")
    row = db.fetchone()
    last_sync_ts = int(row["val"])

    now = datetime.now(timezone.utc)
    last_sync_time = datetime.fromtimestamp(last_sync_ts, timezone.utc)

    if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
        return Response(
            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
            content="Sync was triggered too recently. Please wait before triggering again."
        )

    try:
        task = asyncio.create_task(NewsFetcher.harvest_feeds())
        return {"status": "triggered", "task_id": id(task)}
    except Exception as e:
        raise HTTPException(
            500, f"Failed to trigger sync: {str(e)}"
        )


@app.get("/settings/cron", response_model=HoursResponse)
async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
    """
    Get the current cron schedule for harvesting news.

    Args:
        db: Database cursor dependency

    Returns:
        Object containing the current hours setting
    """
    db.execute("SELECT val FROM settings WHERE key='cron_hours'")
    row = db.fetchone()

    if row is None:
        return {"hours": CRON_HOURS}

    try:
        hours = float(row["val"])
        return {"hours": hours}
    except (ValueError, TypeError):
        return {"hours": CRON_HOURS}


@app.post("/settings/cron", response_model=HoursResponse)
async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)):
    """
    Update the cron schedule for harvesting news.

    Args:
        data: New cron settings with hours interval
        db: Database cursor dependency

    Returns:
        Object containing the updated hours setting
    """
    hours = max(MIN_CRON_HOURS, data.hours)

    scheduler.get_job("harvest").modify(trigger=IntervalTrigger(hours=hours))

    if os.getenv("CRON_HOURS") is None:
        db.execute(
            "UPDATE settings SET val=? WHERE key='cron_hours'",
            (str(hours),)
        )

    global CRON_HOURS
    CRON_HOURS = hours

    return {"hours": hours}


# Mount static frontend
frontend_path = os.path.join(
    os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
    "frontend",
    "dist"
)
app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static")