""" Owly News Summariser Backend This module provides a FastAPI application that serves as the backend for the Owly News Summariser. It handles fetching news from RSS feeds, summarizing articles using Ollama/qwen, and providing an API for the frontend to access the summarized news. The application uses SQLite for data storage and APScheduler for scheduling periodic news harvesting. """ # Standard library imports import asyncio import os import sqlite3 import time from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Union import subprocess import threading # Third-party imports import httpx from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.interval import IntervalTrigger from fastapi import Depends, FastAPI, HTTPException, Response, status from fastapi.staticfiles import StaticFiles from starlette.responses import JSONResponse from backend.app.config import ( CRON_HOURS, LLM_MODEL, MIN_CRON_HOURS, OLLAMA_API_TIMEOUT_SECONDS, OLLAMA_HOST, SYNC_COOLDOWN_MINUTES, frontend_path, logger, ) from backend.app.database import get_db, get_db_write from backend.app.models import ( CronSettings, ErrorResponse, FeedData, HoursResponse, ModelStatus, SuccessResponse, TimestampResponse, ) from backend.app.services import NewsFetcher app = FastAPI( title="Owly News Summariser", description="API for the Owly News Summariser application", version="1.0.0" ) scheduler = AsyncIOScheduler(timezone="UTC") scheduler.add_job( NewsFetcher.harvest_feeds, "interval", hours=CRON_HOURS, id="harvest" ) scheduler.start() def start_frontend_build(): try: subprocess.Popen( ["yarn", "build"], cwd="../frontend", stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) print("Frontend build started successfully") except Exception as e: print(f"Failed to start frontend build: {e}") # API endpoints @app.get("/news", response_model=List[Dict[str, Any]]) async def get_news( country: str = "DE", from_: str = None, to_: str = None, timezone_name: str = "UTC", all_countries: bool = False, all_dates: bool = False, db: sqlite3.Cursor = Depends(get_db) ): """ Get news articles filtered by country and date range. Now handles client timezone properly and supports multiple countries and all news. Args: country: Country code(s) to filter by (default: "DE"). For multiple countries, use comma-separated values like "DE,US,GB" from_: Start date in ISO format (optional) to_: End date in ISO format (optional) timezone_name: Client timezone for date interpretation (default: "UTC") all_countries: If True, returns news from all countries (overrides country param) all_dates: If True, returns news from all dates (overrides date params) db: Database cursor dependency Returns: List of news articles matching the criteria """ try: from zoneinfo import ZoneInfo # Handle timezone try: client_tz = ZoneInfo(timezone_name) except Exception: logger.warning(f"Invalid timezone '{timezone_name}', using UTC") client_tz = timezone.utc where_conditions = [] params = [] if not all_countries: countries = [c.strip().upper() for c in country.split(',') if c.strip()] if len(countries) == 1: where_conditions.append("country = ?") params.append(countries[0]) elif len(countries) > 1: placeholders = ','.join(['?' for _ in countries]) where_conditions.append(f"country IN ({placeholders})") params.extend(countries) if not all_dates and (from_ or to_): if not from_: from_ = "2025-01-01" # Default start date if not to_: to_ = datetime.now(timezone.utc).strftime("%Y-%m-%d") # Default to today # Parse and convert dates from_date_naive = datetime.fromisoformat(from_) to_date_naive = datetime.fromisoformat(to_) from_date = from_date_naive.replace(tzinfo=client_tz) to_date = to_date_naive.replace(tzinfo=client_tz) # Include the entire end date to_date = to_date.replace(hour=23, minute=59, second=59) from_ts = int(from_date.timestamp()) to_ts = int(to_date.timestamp()) where_conditions.append("published BETWEEN ? AND ?") params.extend([from_ts, to_ts]) logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})") # Build the complete SQL query base_sql = """ SELECT id, title, summary, url, published, country, created_at FROM news \ """ if where_conditions: sql = base_sql + " WHERE " + " AND ".join(where_conditions) else: sql = base_sql sql += " ORDER BY published DESC LIMIT 1000" # Log query info if all_countries and all_dates: logger.info("Querying ALL news articles (no filters)") elif all_countries: logger.info(f"Querying news from ALL countries with date filter") elif all_dates: logger.info(f"Querying ALL dates for countries: {country}") else: logger.info(f"Querying news: countries={country}, timezone={timezone_name}") logger.info(f"SQL: {sql}") logger.info(f"Parameters: {params}") # Execute the query db.execute(sql, params) rows = db.fetchall() result = [dict(row) for row in rows] logger.info(f"Found {len(result)} news articles") return result except ValueError as e: logger.error(f"Date parsing error: {e}") raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)") except Exception as e: logger.error(f"❌ Error fetching news: {e}") raise HTTPException(500, "Internal server error while fetching news") @app.get("/feeds", response_model=List[Dict[str, Any]]) async def list_feeds(db: sqlite3.Cursor = Depends(get_db)): """ List all registered news feeds. Args: db: Database cursor dependency Returns: List of feed objects with id, country, and url """ try: db.execute("SELECT * FROM feeds ORDER BY country, url") return JSONResponse(content=[dict(row) for row in db.fetchall()]) except Exception as e: logger.error(f"❌ Error fetching feeds: {e}") raise HTTPException( 500, "Internal server error while fetching feeds" ) @app.post("/feeds", response_model=SuccessResponse) async def add_feed( feed: FeedData, db: sqlite3.Cursor = Depends(get_db_write) ): """ Add a new news feed. Args: feed: Feed data with country and URL db: Database cursor dependency Returns: Success status """ try: db.execute( "INSERT INTO feeds (country, url) VALUES (?, ?) " "ON CONFLICT (url) DO NOTHING", (feed.country, feed.url) ) return {"status": "added"} except Exception as e: logger.error(f"❌ Error adding feed: {e}") raise HTTPException( 500, "Internal server error while adding feed" ) @app.delete("/feeds", response_model=SuccessResponse) async def delete_feed( url: str, db: sqlite3.Cursor = Depends(get_db_write) ): """ Delete a news feed by URL. Args: url: URL of the feed to delete db: Database cursor dependency Returns: Success status """ try: db.execute("DELETE FROM feeds WHERE url=?", (url,)) return {"status": "deleted"} except Exception as e: logger.error(f"❌ Error deleting feed: {e}") raise HTTPException( 500, "Internal server error while deleting feed" ) @app.get("/model/status", response_model=Union[ModelStatus, ErrorResponse]) async def get_model_status(): """ Check the status of the LLM model. Returns: Object containing model name, status, and available models, or an error response if the model service is unavailable """ try: async with httpx.AsyncClient() as client: response = await client.get( f"{OLLAMA_HOST}/api/tags", timeout=OLLAMA_API_TIMEOUT_SECONDS ) response.raise_for_status() models_data = response.json() models = models_data.get("models", []) model_available = any( model.get("name") == LLM_MODEL for model in models ) return { "name": LLM_MODEL, "status": "ready" if model_available else "not available", "available_models": [model.get("name") for model in models] } except Exception as e: return {"status": "error", "message": str(e)} @app.post("/sync", response_model=None) async def manual_sync(db: sqlite3.Cursor = Depends(get_db_write)): # Note: changed to get_db_write """ Manually trigger a feed synchronization. Args: db: Database cursor dependency Returns: Success status or error response if sync was triggered too recently """ db.execute("SELECT val FROM meta WHERE key='last_sync'") row = db.fetchone() if row is None: # Initialize the last_sync key if it doesn't exist import time last_sync_ts = int(time.time()) - (SYNC_COOLDOWN_MINUTES * 60 + 1) # Set to a time that allows sync db.execute("INSERT INTO meta (key, val) VALUES ('last_sync', ?)", (str(last_sync_ts),)) else: last_sync_ts = int(row["val"]) now = datetime.now(timezone.utc) last_sync_time = datetime.fromtimestamp(last_sync_ts, timezone.utc) if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES): return Response( status_code=status.HTTP_429_TOO_MANY_REQUESTS, content="Sync was triggered too recently. Please wait before triggering again.") try: task = asyncio.create_task(NewsFetcher.harvest_feeds()) # Update the last_sync timestamp after triggering the sync current_ts = int(time.time()) db.execute("UPDATE meta SET val=? WHERE key='last_sync'", (str(current_ts),)) return {"status": "triggered", "task_id": id(task)} except Exception as e: logger.error(f"❌ Failed to trigger sync: {e}") raise HTTPException( 500, f"Failed to trigger sync: {str(e)}" ) @app.get("/meta/last-sync", response_model=TimestampResponse) async def get_last_sync(db: sqlite3.Cursor = Depends(get_db)): """ Get the timestamp of the last successful feed synchronization. Args: db: Database cursor dependency Returns: Object containing the timestamp as a Unix epoch """ db.execute("SELECT val FROM meta WHERE key='last_sync'") row = db.fetchone() if row is None: import time return {"ts": int(time.time())} return {"ts": int(row["val"])} @app.get("/settings/cron", response_model=HoursResponse) async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)): """ Get the current cron schedule for harvesting news. Args: db: Database cursor dependency Returns: Object containing the current hours setting """ db.execute("SELECT val FROM settings WHERE key='cron_hours'") row = db.fetchone() if row is None: return {"hours": CRON_HOURS} try: hours = float(row["val"]) return {"hours": hours} except (ValueError, TypeError): return {"hours": CRON_HOURS} @app.post("/settings/cron", response_model=HoursResponse) async def update_cron_schedule( data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)): """ Update the cron schedule for harvesting news. Args: data: New cron settings with hours interval db: Database cursor dependency Returns: Object containing the updated hours setting """ hours = max(MIN_CRON_HOURS, data.hours) scheduler.get_job("harvest").modify(trigger=IntervalTrigger(hours=hours)) if os.getenv("CRON_HOURS") is None: db.execute( "UPDATE settings SET val=? WHERE key='cron_hours'", (str(hours),) ) global CRON_HOURS CRON_HOURS = hours return {"hours": hours} app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static") if __name__ == "__main__": threading.Thread(target=start_frontend_build, daemon=True).start() import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)