313 lines
9.0 KiB
Python
313 lines
9.0 KiB
Python
"""
|
|
Owly News Summariser Backend
|
|
|
|
This module provides a FastAPI application that serves as the backend for the Owly News Summariser.
|
|
It handles fetching news from RSS feeds, summarizing articles using Ollama/qwen, and providing
|
|
an API for the frontend to access the summarized news.
|
|
|
|
The application uses SQLite for data storage and APScheduler for scheduling periodic news harvesting.
|
|
"""
|
|
|
|
# Standard library imports
|
|
import asyncio
|
|
import os
|
|
import sqlite3
|
|
from datetime import datetime, timedelta, timezone
|
|
from http.client import HTTPException
|
|
from typing import Any, Dict, List, Union
|
|
|
|
# Third-party imports
|
|
import httpx
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from apscheduler.triggers.interval import IntervalTrigger
|
|
from fastapi import Depends, FastAPI, Response, status
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
from backend.app.config import logger, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \
|
|
SYNC_COOLDOWN_MINUTES, LLM_MODEL, OLLAMA_API_TIMEOUT_SECONDS, frontend_path
|
|
from backend.app.database import get_db, get_db_write
|
|
from backend.app.models import TimestampResponse, SuccessResponse, FeedData, ModelStatus, ErrorResponse, HoursResponse, \
|
|
CronSettings
|
|
from backend.app.services import NewsFetcher
|
|
|
|
app = FastAPI(
|
|
title="Owly News Summariser",
|
|
description="API for the Owly News Summariser application",
|
|
version="1.0.0"
|
|
)
|
|
|
|
scheduler = AsyncIOScheduler(timezone="UTC")
|
|
scheduler.add_job(
|
|
NewsFetcher.harvest_feeds,
|
|
"interval",
|
|
hours=CRON_HOURS,
|
|
id="harvest"
|
|
)
|
|
scheduler.start()
|
|
|
|
|
|
# API endpoints
|
|
@app.get("/news", response_model=List[Dict[str, Any]])
|
|
async def get_news(
|
|
country: str = "DE",
|
|
from_: str = "2025-07-01",
|
|
to_: str = datetime.now(timezone.utc).strftime("%Y-%m-%d"),
|
|
db: sqlite3.Cursor = Depends(get_db)
|
|
):
|
|
"""
|
|
Get news articles filtered by country and date range.
|
|
Now optimized for concurrent access while scheduler is running.
|
|
|
|
Args:
|
|
country: Country code to filter by (default: "DE")
|
|
from_: Start date in ISO format (default: "2025-07-01")
|
|
to: End date in ISO format (default: current date)
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
List of news articles matching the criteria
|
|
"""
|
|
try:
|
|
datetime.fromisoformat(from_)
|
|
datetime.fromisoformat(to_)
|
|
|
|
from_ts = int(datetime.fromisoformat(from_).timestamp())
|
|
to_ts = int(datetime.fromisoformat(to_).timestamp())
|
|
|
|
db.execute(
|
|
"""
|
|
SELECT id, title, description, url, published, country, created_at
|
|
FROM news
|
|
WHERE country = ?
|
|
AND published BETWEEN ? AND ?
|
|
ORDER BY published DESC LIMIT 1000
|
|
""",
|
|
(country, from_ts, to_ts)
|
|
)
|
|
|
|
return [dict(row) for row in db.fetchall()]
|
|
|
|
except ValueError:
|
|
raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
|
|
except Exception as e:
|
|
logger.error(f"❌ Error fetching news: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while fetching news"
|
|
)
|
|
|
|
|
|
@app.get("/feeds", response_model=List[Dict[str, Any]])
|
|
async def list_feeds(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
List all registered news feeds.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
List of feed objects with id, country, and url
|
|
"""
|
|
try:
|
|
db.execute("SELECT * FROM feeds ORDER BY country, url")
|
|
return [dict(row) for row in db.fetchall()]
|
|
except Exception as e:
|
|
logger.error(f"❌ Error fetching feeds: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while fetching feeds"
|
|
)
|
|
|
|
|
|
@app.get("/meta/last-sync", response_model=TimestampResponse)
|
|
async def get_last_sync(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
Get the timestamp of the last successful feed synchronization.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Object containing the timestamp as a Unix epoch
|
|
"""
|
|
db.execute("SELECT val FROM meta WHERE key='last_sync'")
|
|
row = db.fetchone()
|
|
if row is None:
|
|
import time
|
|
return {"ts": int(time.time())}
|
|
return {"ts": int(row["val"])}
|
|
|
|
|
|
@app.post("/feeds", response_model=SuccessResponse)
|
|
async def add_feed(
|
|
feed: FeedData,
|
|
db: sqlite3.Cursor = Depends(get_db_write)
|
|
):
|
|
"""
|
|
Add a new news feed.
|
|
|
|
Args:
|
|
feed: Feed data with country and URL
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Success status
|
|
"""
|
|
try:
|
|
db.execute(
|
|
"INSERT INTO feeds (country, url) VALUES (?, ?) "
|
|
"ON CONFLICT (url) DO NOTHING",
|
|
(feed.country, feed.url)
|
|
)
|
|
return {"status": "added"}
|
|
except Exception as e:
|
|
logger.error(f"❌ Error adding feed: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while adding feed"
|
|
)
|
|
|
|
|
|
@app.delete("/feeds", response_model=SuccessResponse)
|
|
async def delete_feed(
|
|
url: str,
|
|
db: sqlite3.Cursor = Depends(get_db_write)
|
|
):
|
|
"""
|
|
Delete a news feed by URL.
|
|
|
|
Args:
|
|
url: URL of the feed to delete
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Success status
|
|
"""
|
|
try:
|
|
db.execute("DELETE FROM feeds WHERE url=?", (url,))
|
|
return {"status": "deleted"}
|
|
except Exception as e:
|
|
logger.error(f"❌ Error deleting feed: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while deleting feed"
|
|
)
|
|
|
|
|
|
@app.get("/model/status", response_model=Union[ModelStatus, ErrorResponse])
|
|
async def get_model_status():
|
|
"""
|
|
Check the status of the LLM model.
|
|
|
|
Returns:
|
|
Object containing model name, status, and available models,
|
|
or an error response if the model service is unavailable
|
|
"""
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(
|
|
f"{OLLAMA_HOST}/api/tags",
|
|
timeout=OLLAMA_API_TIMEOUT_SECONDS
|
|
)
|
|
response.raise_for_status()
|
|
|
|
models_data = response.json()
|
|
models = models_data.get("models", [])
|
|
|
|
model_available = any(
|
|
model.get("name") == LLM_MODEL for model in models
|
|
)
|
|
|
|
return {
|
|
"name": LLM_MODEL,
|
|
"status": "ready" if model_available else "not available",
|
|
"available_models": [model.get("name") for model in models]
|
|
}
|
|
except Exception as e:
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
|
|
@app.post("/sync", response_model=None)
|
|
async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
Manually trigger a feed synchronization.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Success status or error response if sync was triggered too recently
|
|
"""
|
|
db.execute("SELECT val FROM meta WHERE key='last_sync'")
|
|
row = db.fetchone()
|
|
last_sync_ts = int(row["val"])
|
|
|
|
now = datetime.now(timezone.utc)
|
|
last_sync_time = datetime.fromtimestamp(last_sync_ts, timezone.utc)
|
|
|
|
if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
|
|
return Response(
|
|
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
|
content="Sync was triggered too recently. Please wait before triggering again."
|
|
)
|
|
|
|
try:
|
|
task = asyncio.create_task(NewsFetcher.harvest_feeds())
|
|
return {"status": "triggered", "task_id": id(task)}
|
|
except Exception as e:
|
|
raise HTTPException(
|
|
500, f"Failed to trigger sync: {str(e)}"
|
|
)
|
|
|
|
|
|
@app.get("/settings/cron", response_model=HoursResponse)
|
|
async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
Get the current cron schedule for harvesting news.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Object containing the current hours setting
|
|
"""
|
|
db.execute("SELECT val FROM settings WHERE key='cron_hours'")
|
|
row = db.fetchone()
|
|
|
|
if row is None:
|
|
return {"hours": CRON_HOURS}
|
|
|
|
try:
|
|
hours = float(row["val"])
|
|
return {"hours": hours}
|
|
except (ValueError, TypeError):
|
|
return {"hours": CRON_HOURS}
|
|
|
|
|
|
@app.post("/settings/cron", response_model=HoursResponse)
|
|
async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)):
|
|
"""
|
|
Update the cron schedule for harvesting news.
|
|
|
|
Args:
|
|
data: New cron settings with hours interval
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Object containing the updated hours setting
|
|
"""
|
|
hours = max(MIN_CRON_HOURS, data.hours)
|
|
|
|
scheduler.get_job("harvest").modify(trigger=IntervalTrigger(hours=hours))
|
|
|
|
if os.getenv("CRON_HOURS") is None:
|
|
db.execute(
|
|
"UPDATE settings SET val=? WHERE key='cron_hours'",
|
|
(str(hours),)
|
|
)
|
|
|
|
global CRON_HOURS
|
|
CRON_HOURS = hours
|
|
|
|
return {"hours": hours}
|
|
|
|
|
|
# Mount static frontend
|
|
app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static")
|