Files
owly-news/backend/app/main.py

313 lines
9.0 KiB
Python

"""
Owly News Summariser Backend
This module provides a FastAPI application that serves as the backend for the Owly News Summariser.
It handles fetching news from RSS feeds, summarizing articles using Ollama/qwen, and providing
an API for the frontend to access the summarized news.
The application uses SQLite for data storage and APScheduler for scheduling periodic news harvesting.
"""
# Standard library imports
import asyncio
import os
import sqlite3
from datetime import datetime, timedelta, timezone
from http.client import HTTPException
from typing import Any, Dict, List, Union
# Third-party imports
import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from fastapi import Depends, FastAPI, Response, status
from fastapi.staticfiles import StaticFiles
from backend.app.config import logger, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \
SYNC_COOLDOWN_MINUTES, LLM_MODEL, OLLAMA_API_TIMEOUT_SECONDS, frontend_path
from backend.app.database import get_db, get_db_write
from backend.app.models import TimestampResponse, SuccessResponse, FeedData, ModelStatus, ErrorResponse, HoursResponse, \
CronSettings
from backend.app.services import NewsFetcher
app = FastAPI(
title="Owly News Summariser",
description="API for the Owly News Summariser application",
version="1.0.0"
)
scheduler = AsyncIOScheduler(timezone="UTC")
scheduler.add_job(
NewsFetcher.harvest_feeds,
"interval",
hours=CRON_HOURS,
id="harvest"
)
scheduler.start()
# API endpoints
@app.get("/news", response_model=List[Dict[str, Any]])
async def get_news(
country: str = "DE",
from_: str = "2025-07-01",
to_: str = datetime.now(timezone.utc).strftime("%Y-%m-%d"),
db: sqlite3.Cursor = Depends(get_db)
):
"""
Get news articles filtered by country and date range.
Now optimized for concurrent access while scheduler is running.
Args:
country: Country code to filter by (default: "DE")
from_: Start date in ISO format (default: "2025-07-01")
to: End date in ISO format (default: current date)
db: Database cursor dependency
Returns:
List of news articles matching the criteria
"""
try:
datetime.fromisoformat(from_)
datetime.fromisoformat(to_)
from_ts = int(datetime.fromisoformat(from_).timestamp())
to_ts = int(datetime.fromisoformat(to_).timestamp())
db.execute(
"""
SELECT id, title, description, url, published, country, created_at
FROM news
WHERE country = ?
AND published BETWEEN ? AND ?
ORDER BY published DESC LIMIT 1000
""",
(country, from_ts, to_ts)
)
return [dict(row) for row in db.fetchall()]
except ValueError:
raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
except Exception as e:
logger.error(f"❌ Error fetching news: {e}")
raise HTTPException(
500, "Internal server error while fetching news"
)
@app.get("/feeds", response_model=List[Dict[str, Any]])
async def list_feeds(db: sqlite3.Cursor = Depends(get_db)):
"""
List all registered news feeds.
Args:
db: Database cursor dependency
Returns:
List of feed objects with id, country, and url
"""
try:
db.execute("SELECT * FROM feeds ORDER BY country, url")
return [dict(row) for row in db.fetchall()]
except Exception as e:
logger.error(f"❌ Error fetching feeds: {e}")
raise HTTPException(
500, "Internal server error while fetching feeds"
)
@app.get("/meta/last-sync", response_model=TimestampResponse)
async def get_last_sync(db: sqlite3.Cursor = Depends(get_db)):
"""
Get the timestamp of the last successful feed synchronization.
Args:
db: Database cursor dependency
Returns:
Object containing the timestamp as a Unix epoch
"""
db.execute("SELECT val FROM meta WHERE key='last_sync'")
row = db.fetchone()
if row is None:
import time
return {"ts": int(time.time())}
return {"ts": int(row["val"])}
@app.post("/feeds", response_model=SuccessResponse)
async def add_feed(
feed: FeedData,
db: sqlite3.Cursor = Depends(get_db_write)
):
"""
Add a new news feed.
Args:
feed: Feed data with country and URL
db: Database cursor dependency
Returns:
Success status
"""
try:
db.execute(
"INSERT INTO feeds (country, url) VALUES (?, ?) "
"ON CONFLICT (url) DO NOTHING",
(feed.country, feed.url)
)
return {"status": "added"}
except Exception as e:
logger.error(f"❌ Error adding feed: {e}")
raise HTTPException(
500, "Internal server error while adding feed"
)
@app.delete("/feeds", response_model=SuccessResponse)
async def delete_feed(
url: str,
db: sqlite3.Cursor = Depends(get_db_write)
):
"""
Delete a news feed by URL.
Args:
url: URL of the feed to delete
db: Database cursor dependency
Returns:
Success status
"""
try:
db.execute("DELETE FROM feeds WHERE url=?", (url,))
return {"status": "deleted"}
except Exception as e:
logger.error(f"❌ Error deleting feed: {e}")
raise HTTPException(
500, "Internal server error while deleting feed"
)
@app.get("/model/status", response_model=Union[ModelStatus, ErrorResponse])
async def get_model_status():
"""
Check the status of the LLM model.
Returns:
Object containing model name, status, and available models,
or an error response if the model service is unavailable
"""
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{OLLAMA_HOST}/api/tags",
timeout=OLLAMA_API_TIMEOUT_SECONDS
)
response.raise_for_status()
models_data = response.json()
models = models_data.get("models", [])
model_available = any(
model.get("name") == LLM_MODEL for model in models
)
return {
"name": LLM_MODEL,
"status": "ready" if model_available else "not available",
"available_models": [model.get("name") for model in models]
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.post("/sync", response_model=None)
async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
"""
Manually trigger a feed synchronization.
Args:
db: Database cursor dependency
Returns:
Success status or error response if sync was triggered too recently
"""
db.execute("SELECT val FROM meta WHERE key='last_sync'")
row = db.fetchone()
last_sync_ts = int(row["val"])
now = datetime.now(timezone.utc)
last_sync_time = datetime.fromtimestamp(last_sync_ts, timezone.utc)
if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
return Response(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
content="Sync was triggered too recently. Please wait before triggering again."
)
try:
task = asyncio.create_task(NewsFetcher.harvest_feeds())
return {"status": "triggered", "task_id": id(task)}
except Exception as e:
raise HTTPException(
500, f"Failed to trigger sync: {str(e)}"
)
@app.get("/settings/cron", response_model=HoursResponse)
async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
"""
Get the current cron schedule for harvesting news.
Args:
db: Database cursor dependency
Returns:
Object containing the current hours setting
"""
db.execute("SELECT val FROM settings WHERE key='cron_hours'")
row = db.fetchone()
if row is None:
return {"hours": CRON_HOURS}
try:
hours = float(row["val"])
return {"hours": hours}
except (ValueError, TypeError):
return {"hours": CRON_HOURS}
@app.post("/settings/cron", response_model=HoursResponse)
async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)):
"""
Update the cron schedule for harvesting news.
Args:
data: New cron settings with hours interval
db: Database cursor dependency
Returns:
Object containing the updated hours setting
"""
hours = max(MIN_CRON_HOURS, data.hours)
scheduler.get_job("harvest").modify(trigger=IntervalTrigger(hours=hours))
if os.getenv("CRON_HOURS") is None:
db.execute(
"UPDATE settings SET val=? WHERE key='cron_hours'",
(str(hours),)
)
global CRON_HOURS
CRON_HOURS = hours
return {"hours": hours}
# Mount static frontend
app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static")