Files
owly-news/backend/app/main.py

411 lines
12 KiB
Python

"""
Owly News Summariser Backend
This module provides a FastAPI application that serves as the backend for the Owly News Summariser.
It handles fetching news from RSS feeds, summarizing articles using Ollama/qwen, and providing
an API for the frontend to access the summarized news.
The application uses SQLite for data storage and APScheduler for scheduling periodic news harvesting.
"""
# Standard library imports
import asyncio
import os
import sqlite3
import time
from datetime import datetime, timedelta, timezone
from typing import Any, Dict, List, Union
# Third-party imports
import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from fastapi import Depends, FastAPI, HTTPException, Response, status
from fastapi.staticfiles import StaticFiles
from starlette.responses import JSONResponse
from backend.app.config import (
CRON_HOURS,
LLM_MODEL,
MIN_CRON_HOURS,
OLLAMA_API_TIMEOUT_SECONDS,
OLLAMA_HOST,
SYNC_COOLDOWN_MINUTES,
frontend_path,
logger,
)
from backend.app.database import get_db, get_db_write
from backend.app.models import (
CronSettings,
ErrorResponse,
FeedData,
HoursResponse,
ModelStatus,
SuccessResponse,
TimestampResponse,
)
from backend.app.services import NewsFetcher
app = FastAPI(
title="Owly News Summariser",
description="API for the Owly News Summariser application",
version="1.0.0"
)
scheduler = AsyncIOScheduler(timezone="UTC")
scheduler.add_job(
NewsFetcher.harvest_feeds,
"interval",
hours=CRON_HOURS,
id="harvest"
)
scheduler.start()
# API endpoints
@app.get("/news", response_model=List[Dict[str, Any]])
async def get_news(
country: str = "DE",
from_: str = None,
to_: str = None,
timezone_name: str = "UTC",
all_countries: bool = False,
all_dates: bool = False,
db: sqlite3.Cursor = Depends(get_db)
):
"""
Get news articles filtered by country and date range.
Now handles client timezone properly and supports multiple countries and all news.
Args:
country: Country code(s) to filter by (default: "DE").
For multiple countries, use comma-separated values like "DE,US,GB"
from_: Start date in ISO format (optional)
to_: End date in ISO format (optional)
timezone_name: Client timezone for date interpretation (default: "UTC")
all_countries: If True, returns news from all countries (overrides country param)
all_dates: If True, returns news from all dates (overrides date params)
db: Database cursor dependency
Returns:
List of news articles matching the criteria
"""
try:
from zoneinfo import ZoneInfo
# Handle timezone
try:
client_tz = ZoneInfo(timezone_name)
except Exception:
logger.warning(f"Invalid timezone '{timezone_name}', using UTC")
client_tz = timezone.utc
where_conditions = []
params = []
if not all_countries:
countries = [c.strip().upper() for c in country.split(',') if c.strip()]
if len(countries) == 1:
where_conditions.append("country = ?")
params.append(countries[0])
elif len(countries) > 1:
placeholders = ','.join(['?' for _ in countries])
where_conditions.append(f"country IN ({placeholders})")
params.extend(countries)
if not all_dates and (from_ or to_):
if not from_:
from_ = "2025-01-01" # Default start date
if not to_:
to_ = datetime.now(timezone.utc).strftime("%Y-%m-%d") # Default to today
# Parse and convert dates
from_date_naive = datetime.fromisoformat(from_)
to_date_naive = datetime.fromisoformat(to_)
from_date = from_date_naive.replace(tzinfo=client_tz)
to_date = to_date_naive.replace(tzinfo=client_tz)
# Include the entire end date
to_date = to_date.replace(hour=23, minute=59, second=59)
from_ts = int(from_date.timestamp())
to_ts = int(to_date.timestamp())
where_conditions.append("published BETWEEN ? AND ?")
params.extend([from_ts, to_ts])
logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})")
# Build the complete SQL query
base_sql = """
SELECT id, title, summary, url, published, country, created_at
FROM news \
"""
if where_conditions:
sql = base_sql + " WHERE " + " AND ".join(where_conditions)
else:
sql = base_sql
sql += " ORDER BY published DESC LIMIT 1000"
# Log query info
if all_countries and all_dates:
logger.info("Querying ALL news articles (no filters)")
elif all_countries:
logger.info(f"Querying news from ALL countries with date filter")
elif all_dates:
logger.info(f"Querying ALL dates for countries: {country}")
else:
logger.info(f"Querying news: countries={country}, timezone={timezone_name}")
logger.info(f"SQL: {sql}")
logger.info(f"Parameters: {params}")
# Execute the query
db.execute(sql, params)
rows = db.fetchall()
result = [dict(row) for row in rows]
logger.info(f"Found {len(result)} news articles")
return result
except ValueError as e:
logger.error(f"Date parsing error: {e}")
raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
except Exception as e:
logger.error(f"❌ Error fetching news: {e}")
raise HTTPException(500, "Internal server error while fetching news")
@app.get("/feeds", response_model=List[Dict[str, Any]])
async def list_feeds(db: sqlite3.Cursor = Depends(get_db)):
"""
List all registered news feeds.
Args:
db: Database cursor dependency
Returns:
List of feed objects with id, country, and url
"""
try:
db.execute("SELECT * FROM feeds ORDER BY country, url")
return JSONResponse(content=[dict(row) for row in db.fetchall()])
except Exception as e:
logger.error(f"❌ Error fetching feeds: {e}")
raise HTTPException(
500, "Internal server error while fetching feeds"
)
@app.post("/feeds", response_model=SuccessResponse)
async def add_feed(
feed: FeedData,
db: sqlite3.Cursor = Depends(get_db_write)
):
"""
Add a new news feed.
Args:
feed: Feed data with country and URL
db: Database cursor dependency
Returns:
Success status
"""
try:
db.execute(
"INSERT INTO feeds (country, url) VALUES (?, ?) "
"ON CONFLICT (url) DO NOTHING",
(feed.country, feed.url)
)
return {"status": "added"}
except Exception as e:
logger.error(f"❌ Error adding feed: {e}")
raise HTTPException(
500, "Internal server error while adding feed"
)
@app.delete("/feeds", response_model=SuccessResponse)
async def delete_feed(
url: str,
db: sqlite3.Cursor = Depends(get_db_write)
):
"""
Delete a news feed by URL.
Args:
url: URL of the feed to delete
db: Database cursor dependency
Returns:
Success status
"""
try:
db.execute("DELETE FROM feeds WHERE url=?", (url,))
return {"status": "deleted"}
except Exception as e:
logger.error(f"❌ Error deleting feed: {e}")
raise HTTPException(
500, "Internal server error while deleting feed"
)
@app.get("/model/status", response_model=Union[ModelStatus, ErrorResponse])
async def get_model_status():
"""
Check the status of the LLM model.
Returns:
Object containing model name, status, and available models,
or an error response if the model service is unavailable
"""
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{OLLAMA_HOST}/api/tags",
timeout=OLLAMA_API_TIMEOUT_SECONDS
)
response.raise_for_status()
models_data = response.json()
models = models_data.get("models", [])
model_available = any(
model.get("name") == LLM_MODEL for model in models
)
return {
"name": LLM_MODEL,
"status": "ready" if model_available else "not available",
"available_models": [model.get("name") for model in models]
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/sync", response_model=None)
async def manual_sync(db: sqlite3.Cursor = Depends(get_db_write)): # Note: changed to get_db_write
"""
Manually trigger a feed synchronization.
Args:
db: Database cursor dependency
Returns:
Success status or error response if sync was triggered too recently
"""
db.execute("SELECT val FROM meta WHERE key='last_sync'")
row = db.fetchone()
if row is None:
# Initialize the last_sync key if it doesn't exist
import time
last_sync_ts = int(time.time()) - (SYNC_COOLDOWN_MINUTES * 60 + 1) # Set to a time that allows sync
db.execute("INSERT INTO meta (key, val) VALUES ('last_sync', ?)", (str(last_sync_ts),))
else:
last_sync_ts = int(row["val"])
now = datetime.now(timezone.utc)
last_sync_time = datetime.fromtimestamp(last_sync_ts, timezone.utc)
if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
return Response(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
content="Sync was triggered too recently. Please wait before triggering again.")
try:
task = asyncio.create_task(NewsFetcher.harvest_feeds())
# Update the last_sync timestamp after triggering the sync
current_ts = int(time.time())
db.execute("UPDATE meta SET val=? WHERE key='last_sync'", (str(current_ts),))
return {"status": "triggered", "task_id": id(task)}
except Exception as e:
logger.error(f"❌ Failed to trigger sync: {e}")
raise HTTPException(
500, f"Failed to trigger sync: {str(e)}"
)
@app.get("/meta/last-sync", response_model=TimestampResponse)
async def get_last_sync(db: sqlite3.Cursor = Depends(get_db)):
"""
Get the timestamp of the last successful feed synchronization.
Args:
db: Database cursor dependency
Returns:
Object containing the timestamp as a Unix epoch
"""
db.execute("SELECT val FROM meta WHERE key='last_sync'")
row = db.fetchone()
if row is None:
import time
return {"ts": int(time.time())}
return {"ts": int(row["val"])}
@app.get("/settings/cron", response_model=HoursResponse)
async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
"""
Get the current cron schedule for harvesting news.
Args:
db: Database cursor dependency
Returns:
Object containing the current hours setting
"""
db.execute("SELECT val FROM settings WHERE key='cron_hours'")
row = db.fetchone()
if row is None:
return {"hours": CRON_HOURS}
try:
hours = float(row["val"])
return {"hours": hours}
except (ValueError, TypeError):
return {"hours": CRON_HOURS}
@app.post("/settings/cron", response_model=HoursResponse)
async def update_cron_schedule(
data: CronSettings,
db: sqlite3.Cursor = Depends(get_db_write)):
"""
Update the cron schedule for harvesting news.
Args:
data: New cron settings with hours interval
db: Database cursor dependency
Returns:
Object containing the updated hours setting
"""
hours = max(MIN_CRON_HOURS, data.hours)
scheduler.get_job("harvest").modify(trigger=IntervalTrigger(hours=hours))
if os.getenv("CRON_HOURS") is None:
db.execute(
"UPDATE settings SET val=? WHERE key='cron_hours'",
(str(hours),)
)
global CRON_HOURS
CRON_HOURS = hours
return {"hours": hours}
# Mount static frontend
app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static")