411 lines
12 KiB
Python
411 lines
12 KiB
Python
"""
|
|
Owly News Summariser Backend
|
|
|
|
This module provides a FastAPI application that serves as the backend for the Owly News Summariser.
|
|
It handles fetching news from RSS feeds, summarizing articles using Ollama/qwen, and providing
|
|
an API for the frontend to access the summarized news.
|
|
|
|
The application uses SQLite for data storage and APScheduler for scheduling periodic news harvesting.
|
|
"""
|
|
|
|
# Standard library imports
|
|
import asyncio
|
|
import os
|
|
import sqlite3
|
|
import time
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Dict, List, Union
|
|
|
|
# Third-party imports
|
|
import httpx
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from apscheduler.triggers.interval import IntervalTrigger
|
|
from fastapi import Depends, FastAPI, HTTPException, Response, status
|
|
from fastapi.staticfiles import StaticFiles
|
|
from starlette.responses import JSONResponse
|
|
|
|
from backend.app.config import (
|
|
CRON_HOURS,
|
|
LLM_MODEL,
|
|
MIN_CRON_HOURS,
|
|
OLLAMA_API_TIMEOUT_SECONDS,
|
|
OLLAMA_HOST,
|
|
SYNC_COOLDOWN_MINUTES,
|
|
frontend_path,
|
|
logger,
|
|
)
|
|
from backend.app.database import get_db, get_db_write
|
|
from backend.app.models import (
|
|
CronSettings,
|
|
ErrorResponse,
|
|
FeedData,
|
|
HoursResponse,
|
|
ModelStatus,
|
|
SuccessResponse,
|
|
TimestampResponse,
|
|
)
|
|
from backend.app.services import NewsFetcher
|
|
|
|
app = FastAPI(
|
|
title="Owly News Summariser",
|
|
description="API for the Owly News Summariser application",
|
|
version="1.0.0"
|
|
)
|
|
|
|
scheduler = AsyncIOScheduler(timezone="UTC")
|
|
scheduler.add_job(
|
|
NewsFetcher.harvest_feeds,
|
|
"interval",
|
|
hours=CRON_HOURS,
|
|
id="harvest"
|
|
)
|
|
scheduler.start()
|
|
|
|
|
|
# API endpoints
|
|
@app.get("/news", response_model=List[Dict[str, Any]])
|
|
async def get_news(
|
|
country: str = "DE",
|
|
from_: str = None,
|
|
to_: str = None,
|
|
timezone_name: str = "UTC",
|
|
all_countries: bool = False,
|
|
all_dates: bool = False,
|
|
db: sqlite3.Cursor = Depends(get_db)
|
|
):
|
|
"""
|
|
Get news articles filtered by country and date range.
|
|
Now handles client timezone properly and supports multiple countries and all news.
|
|
|
|
Args:
|
|
country: Country code(s) to filter by (default: "DE").
|
|
For multiple countries, use comma-separated values like "DE,US,GB"
|
|
from_: Start date in ISO format (optional)
|
|
to_: End date in ISO format (optional)
|
|
timezone_name: Client timezone for date interpretation (default: "UTC")
|
|
all_countries: If True, returns news from all countries (overrides country param)
|
|
all_dates: If True, returns news from all dates (overrides date params)
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
List of news articles matching the criteria
|
|
"""
|
|
try:
|
|
from zoneinfo import ZoneInfo
|
|
|
|
# Handle timezone
|
|
try:
|
|
client_tz = ZoneInfo(timezone_name)
|
|
except Exception:
|
|
logger.warning(f"Invalid timezone '{timezone_name}', using UTC")
|
|
client_tz = timezone.utc
|
|
|
|
where_conditions = []
|
|
params = []
|
|
|
|
if not all_countries:
|
|
countries = [c.strip().upper() for c in country.split(',') if c.strip()]
|
|
if len(countries) == 1:
|
|
where_conditions.append("country = ?")
|
|
params.append(countries[0])
|
|
elif len(countries) > 1:
|
|
placeholders = ','.join(['?' for _ in countries])
|
|
where_conditions.append(f"country IN ({placeholders})")
|
|
params.extend(countries)
|
|
|
|
if not all_dates and (from_ or to_):
|
|
if not from_:
|
|
from_ = "2025-01-01" # Default start date
|
|
if not to_:
|
|
to_ = datetime.now(timezone.utc).strftime("%Y-%m-%d") # Default to today
|
|
|
|
# Parse and convert dates
|
|
from_date_naive = datetime.fromisoformat(from_)
|
|
to_date_naive = datetime.fromisoformat(to_)
|
|
|
|
from_date = from_date_naive.replace(tzinfo=client_tz)
|
|
to_date = to_date_naive.replace(tzinfo=client_tz)
|
|
|
|
# Include the entire end date
|
|
to_date = to_date.replace(hour=23, minute=59, second=59)
|
|
|
|
from_ts = int(from_date.timestamp())
|
|
to_ts = int(to_date.timestamp())
|
|
|
|
where_conditions.append("published BETWEEN ? AND ?")
|
|
params.extend([from_ts, to_ts])
|
|
|
|
logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})")
|
|
|
|
# Build the complete SQL query
|
|
base_sql = """
|
|
SELECT id, title, summary, url, published, country, created_at
|
|
FROM news \
|
|
"""
|
|
|
|
if where_conditions:
|
|
sql = base_sql + " WHERE " + " AND ".join(where_conditions)
|
|
else:
|
|
sql = base_sql
|
|
|
|
sql += " ORDER BY published DESC LIMIT 1000"
|
|
|
|
# Log query info
|
|
if all_countries and all_dates:
|
|
logger.info("Querying ALL news articles (no filters)")
|
|
elif all_countries:
|
|
logger.info(f"Querying news from ALL countries with date filter")
|
|
elif all_dates:
|
|
logger.info(f"Querying ALL dates for countries: {country}")
|
|
else:
|
|
logger.info(f"Querying news: countries={country}, timezone={timezone_name}")
|
|
|
|
logger.info(f"SQL: {sql}")
|
|
logger.info(f"Parameters: {params}")
|
|
|
|
# Execute the query
|
|
db.execute(sql, params)
|
|
rows = db.fetchall()
|
|
result = [dict(row) for row in rows]
|
|
|
|
logger.info(f"Found {len(result)} news articles")
|
|
return result
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Date parsing error: {e}")
|
|
raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
|
|
except Exception as e:
|
|
logger.error(f"❌ Error fetching news: {e}")
|
|
raise HTTPException(500, "Internal server error while fetching news")
|
|
|
|
|
|
@app.get("/feeds", response_model=List[Dict[str, Any]])
|
|
async def list_feeds(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
List all registered news feeds.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
List of feed objects with id, country, and url
|
|
"""
|
|
try:
|
|
db.execute("SELECT * FROM feeds ORDER BY country, url")
|
|
return JSONResponse(content=[dict(row) for row in db.fetchall()])
|
|
except Exception as e:
|
|
logger.error(f"❌ Error fetching feeds: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while fetching feeds"
|
|
)
|
|
|
|
|
|
|
|
|
|
@app.post("/feeds", response_model=SuccessResponse)
|
|
async def add_feed(
|
|
feed: FeedData,
|
|
db: sqlite3.Cursor = Depends(get_db_write)
|
|
):
|
|
"""
|
|
Add a new news feed.
|
|
|
|
Args:
|
|
feed: Feed data with country and URL
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Success status
|
|
"""
|
|
try:
|
|
db.execute(
|
|
"INSERT INTO feeds (country, url) VALUES (?, ?) "
|
|
"ON CONFLICT (url) DO NOTHING",
|
|
(feed.country, feed.url)
|
|
)
|
|
return {"status": "added"}
|
|
except Exception as e:
|
|
logger.error(f"❌ Error adding feed: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while adding feed"
|
|
)
|
|
|
|
|
|
@app.delete("/feeds", response_model=SuccessResponse)
|
|
async def delete_feed(
|
|
url: str,
|
|
db: sqlite3.Cursor = Depends(get_db_write)
|
|
):
|
|
"""
|
|
Delete a news feed by URL.
|
|
|
|
Args:
|
|
url: URL of the feed to delete
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Success status
|
|
"""
|
|
try:
|
|
db.execute("DELETE FROM feeds WHERE url=?", (url,))
|
|
return {"status": "deleted"}
|
|
except Exception as e:
|
|
logger.error(f"❌ Error deleting feed: {e}")
|
|
raise HTTPException(
|
|
500, "Internal server error while deleting feed"
|
|
)
|
|
|
|
|
|
@app.get("/model/status", response_model=Union[ModelStatus, ErrorResponse])
|
|
async def get_model_status():
|
|
"""
|
|
Check the status of the LLM model.
|
|
|
|
Returns:
|
|
Object containing model name, status, and available models,
|
|
or an error response if the model service is unavailable
|
|
"""
|
|
try:
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(
|
|
f"{OLLAMA_HOST}/api/tags",
|
|
timeout=OLLAMA_API_TIMEOUT_SECONDS
|
|
)
|
|
response.raise_for_status()
|
|
|
|
models_data = response.json()
|
|
models = models_data.get("models", [])
|
|
|
|
model_available = any(
|
|
model.get("name") == LLM_MODEL for model in models
|
|
)
|
|
|
|
return {
|
|
"name": LLM_MODEL,
|
|
"status": "ready" if model_available else "not available",
|
|
"available_models": [model.get("name") for model in models]
|
|
}
|
|
except Exception as e:
|
|
return {"status": "error", "message": str(e)}
|
|
|
|
|
|
@app.post("/sync", response_model=None)
|
|
async def manual_sync(db: sqlite3.Cursor = Depends(get_db_write)): # Note: changed to get_db_write
|
|
"""
|
|
Manually trigger a feed synchronization.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Success status or error response if sync was triggered too recently
|
|
"""
|
|
db.execute("SELECT val FROM meta WHERE key='last_sync'")
|
|
row = db.fetchone()
|
|
|
|
if row is None:
|
|
# Initialize the last_sync key if it doesn't exist
|
|
import time
|
|
last_sync_ts = int(time.time()) - (SYNC_COOLDOWN_MINUTES * 60 + 1) # Set to a time that allows sync
|
|
db.execute("INSERT INTO meta (key, val) VALUES ('last_sync', ?)", (str(last_sync_ts),))
|
|
else:
|
|
last_sync_ts = int(row["val"])
|
|
|
|
now = datetime.now(timezone.utc)
|
|
last_sync_time = datetime.fromtimestamp(last_sync_ts, timezone.utc)
|
|
|
|
if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
|
|
return Response(
|
|
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
|
content="Sync was triggered too recently. Please wait before triggering again.")
|
|
|
|
try:
|
|
task = asyncio.create_task(NewsFetcher.harvest_feeds())
|
|
# Update the last_sync timestamp after triggering the sync
|
|
current_ts = int(time.time())
|
|
db.execute("UPDATE meta SET val=? WHERE key='last_sync'", (str(current_ts),))
|
|
return {"status": "triggered", "task_id": id(task)}
|
|
except Exception as e:
|
|
logger.error(f"❌ Failed to trigger sync: {e}")
|
|
raise HTTPException(
|
|
500, f"Failed to trigger sync: {str(e)}"
|
|
)
|
|
|
|
|
|
|
|
@app.get("/meta/last-sync", response_model=TimestampResponse)
|
|
async def get_last_sync(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
Get the timestamp of the last successful feed synchronization.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Object containing the timestamp as a Unix epoch
|
|
"""
|
|
db.execute("SELECT val FROM meta WHERE key='last_sync'")
|
|
row = db.fetchone()
|
|
if row is None:
|
|
import time
|
|
return {"ts": int(time.time())}
|
|
return {"ts": int(row["val"])}
|
|
|
|
|
|
@app.get("/settings/cron", response_model=HoursResponse)
|
|
async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
|
|
"""
|
|
Get the current cron schedule for harvesting news.
|
|
|
|
Args:
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Object containing the current hours setting
|
|
"""
|
|
db.execute("SELECT val FROM settings WHERE key='cron_hours'")
|
|
row = db.fetchone()
|
|
|
|
if row is None:
|
|
return {"hours": CRON_HOURS}
|
|
|
|
try:
|
|
hours = float(row["val"])
|
|
return {"hours": hours}
|
|
except (ValueError, TypeError):
|
|
return {"hours": CRON_HOURS}
|
|
|
|
|
|
@app.post("/settings/cron", response_model=HoursResponse)
|
|
async def update_cron_schedule(
|
|
data: CronSettings,
|
|
db: sqlite3.Cursor = Depends(get_db_write)):
|
|
"""
|
|
Update the cron schedule for harvesting news.
|
|
|
|
Args:
|
|
data: New cron settings with hours interval
|
|
db: Database cursor dependency
|
|
|
|
Returns:
|
|
Object containing the updated hours setting
|
|
"""
|
|
hours = max(MIN_CRON_HOURS, data.hours)
|
|
|
|
scheduler.get_job("harvest").modify(trigger=IntervalTrigger(hours=hours))
|
|
|
|
if os.getenv("CRON_HOURS") is None:
|
|
db.execute(
|
|
"UPDATE settings SET val=? WHERE key='cron_hours'",
|
|
(str(hours),)
|
|
)
|
|
|
|
global CRON_HOURS
|
|
CRON_HOURS = hours
|
|
|
|
return {"hours": hours}
|
|
|
|
|
|
# Mount static frontend
|
|
app.mount("/", StaticFiles(directory=frontend_path, html=True), name="static")
|