refactor: apply consistent formatting and improve code readability across backend modules

This commit is contained in:
2025-08-01 22:51:38 +02:00
parent 0fd2c7a8b6
commit e1f51794af
5 changed files with 147 additions and 65 deletions

View File

@@ -1,6 +1,6 @@
from pathlib import Path
import os
import logging
import os
from pathlib import Path
DB_PATH = Path(os.getenv("DB_NAME", "owlynews.sqlite3"))
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
@@ -42,7 +42,8 @@ def update_constants_from_db(settings_dict):
if 'ollama_host' in settings_dict and os.getenv("OLLAMA_HOST") is None:
OLLAMA_HOST = settings_dict['ollama_host']
if 'min_cron_hours' in settings_dict and os.getenv("MIN_CRON_HOURS") is None:
if 'min_cron_hours' in settings_dict and os.getenv(
"MIN_CRON_HOURS") is None:
try:
MIN_CRON_HOURS = float(settings_dict['min_cron_hours'])
except (ValueError, TypeError):
@@ -61,7 +62,8 @@ def update_constants_from_db(settings_dict):
f"{settings_dict['cron_hours']}"
)
if 'sync_cooldown_minutes' in settings_dict and os.getenv("SYNC_COOLDOWN_MINUTES") is None:
if 'sync_cooldown_minutes' in settings_dict and os.getenv(
"SYNC_COOLDOWN_MINUTES") is None:
try:
SYNC_COOLDOWN_MINUTES = int(settings_dict['sync_cooldown_minutes'])
except (ValueError, TypeError):
@@ -73,7 +75,8 @@ def update_constants_from_db(settings_dict):
if 'llm_model' in settings_dict and os.getenv("LLM_MODEL") is None:
LLM_MODEL = settings_dict['llm_model']
if 'llm_timeout_seconds' in settings_dict and os.getenv("LLM_TIMEOUT_SECONDS") is None:
if 'llm_timeout_seconds' in settings_dict and os.getenv(
"LLM_TIMEOUT_SECONDS") is None:
try:
LLM_TIMEOUT_SECONDS = int(settings_dict['llm_timeout_seconds'])
except (ValueError, TypeError):
@@ -82,16 +85,19 @@ def update_constants_from_db(settings_dict):
f"{settings_dict['llm_timeout_seconds']}"
)
if 'ollama_api_timeout_seconds' in settings_dict and os.getenv("OLLAMA_API_TIMEOUT_SECONDS") is None:
if 'ollama_api_timeout_seconds' in settings_dict and os.getenv(
"OLLAMA_API_TIMEOUT_SECONDS") is None:
try:
OLLAMA_API_TIMEOUT_SECONDS = int(settings_dict['ollama_api_timeout_seconds'])
OLLAMA_API_TIMEOUT_SECONDS = int(
settings_dict['ollama_api_timeout_seconds'])
except (ValueError, TypeError):
logger.warning(
f"⚠️ Invalid ollama_api_timeout_seconds value in DB: "
f"{settings_dict['ollama_api_timeout_seconds']}"
)
if 'article_fetch_timeout' in settings_dict and os.getenv("ARTICLE_FETCH_TIMEOUT") is None:
if 'article_fetch_timeout' in settings_dict and os.getenv(
"ARTICLE_FETCH_TIMEOUT") is None:
try:
ARTICLE_FETCH_TIMEOUT = int(settings_dict['article_fetch_timeout'])
except (ValueError, TypeError):
@@ -100,7 +106,8 @@ def update_constants_from_db(settings_dict):
f"{settings_dict['article_fetch_timeout']}"
)
if 'max_article_length' in settings_dict and os.getenv("MAX_ARTICLE_LENGTH") is None:
if 'max_article_length' in settings_dict and os.getenv(
"MAX_ARTICLE_LENGTH") is None:
try:
MAX_ARTICLE_LENGTH = int(settings_dict['max_article_length'])
except (ValueError, TypeError):

View File

@@ -1,11 +1,24 @@
import json
import sqlite3
import time
from contextlib import contextmanager
from pathlib import Path
import sqlite3
from typing import Iterator
from backend.app.config import logger, DB_PATH, update_constants_from_db, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \
SYNC_COOLDOWN_MINUTES, LLM_MODEL, LLM_TIMEOUT_SECONDS, OLLAMA_API_TIMEOUT_SECONDS, ARTICLE_FETCH_TIMEOUT, \
MAX_ARTICLE_LENGTH
from backend.app.config import (
ARTICLE_FETCH_TIMEOUT,
CRON_HOURS,
DB_PATH,
LLM_MODEL,
LLM_TIMEOUT_SECONDS,
MAX_ARTICLE_LENGTH,
MIN_CRON_HOURS,
OLLAMA_API_TIMEOUT_SECONDS,
OLLAMA_HOST,
SYNC_COOLDOWN_MINUTES,
logger,
update_constants_from_db,
)
class DatabaseManager:
@@ -41,7 +54,8 @@ class DatabaseManager:
schema_sql = f.read()
with self.get_cursor() as cursor:
statements = [stmt.strip() for stmt in schema_sql.split(';') if stmt.strip()]
statements = [stmt.strip()
for stmt in schema_sql.split(';') if stmt.strip()]
for statement in statements:
cursor.execute(statement)
@@ -85,14 +99,12 @@ class DatabaseManager:
"""
Seed initial feeds from seed_feeds.json file.
"""
import json
from pathlib import Path
try:
seed_file = Path(__file__).parent / "seed_feeds.json"
if not seed_file.exists():
logger.warning("⚠️ seed_feeds.json not found, skipping feed seeding")
logger.warning(
"⚠️ seed_feeds.json not found, skipping feed seeding")
return
with open(seed_file, 'r', encoding='utf-8') as f:
@@ -101,10 +113,7 @@ class DatabaseManager:
for country, urls in feeds_data.items():
for url in urls:
cursor.execute(
"INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)",
(country, url)
)
"INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)", (country, url))
except Exception as e:
logger.error(f"❌ Failed to seed feeds: {e}")
@@ -182,8 +191,7 @@ class DatabaseManager:
conn.rollback()
if "database is locked" in str(e).lower():
logger.warning(
f"⚠️ Database temporarily locked, operation may need retry: {e}"
)
f"⚠️ Database temporarily locked, operation may need retry: {e}")
raise e
except Exception as e:
if conn:
@@ -194,7 +202,9 @@ class DatabaseManager:
conn.close()
@contextmanager
def get_cursor_with_retry(self, readonly: bool = False, max_retries: int = 3) -> Iterator[sqlite3.Cursor]:
def get_cursor_with_retry(self,
readonly: bool = False,
max_retries: int = 3) -> Iterator[sqlite3.Cursor]:
"""
Context manager with retry logic for database operations.
@@ -211,13 +221,13 @@ class DatabaseManager:
yield cursor
return
except sqlite3.OperationalError as e:
if "database is locked" in str(e).lower() and attempt < max_retries:
if "database is locked" in str(
e).lower() and attempt < max_retries:
wait_time = (attempt + 1) * 0.1
logger.warning(
f"⚠️ Database locked, retrying in {wait_time}s "
f"(attempt {attempt + 1}/{max_retries + 1})"
)
import time
time.sleep(wait_time)
continue
raise e

View File

@@ -12,22 +12,37 @@ The application uses SQLite for data storage and APScheduler for scheduling peri
import asyncio
import os
import sqlite3
import time
from datetime import datetime, timedelta, timezone
from http.client import HTTPException
from typing import Any, Dict, List, Union
# Third-party imports
import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from fastapi import Depends, FastAPI, Response, status
from fastapi import Depends, FastAPI, HTTPException, Response, status
from fastapi.staticfiles import StaticFiles
from backend.app.config import logger, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \
SYNC_COOLDOWN_MINUTES, LLM_MODEL, OLLAMA_API_TIMEOUT_SECONDS, frontend_path
from backend.app.config import (
CRON_HOURS,
LLM_MODEL,
MIN_CRON_HOURS,
OLLAMA_API_TIMEOUT_SECONDS,
OLLAMA_HOST,
SYNC_COOLDOWN_MINUTES,
frontend_path,
logger,
)
from backend.app.database import get_db, get_db_write
from backend.app.models import TimestampResponse, SuccessResponse, FeedData, ModelStatus, ErrorResponse, HoursResponse, \
CronSettings
from backend.app.models import (
CronSettings,
ErrorResponse,
FeedData,
HoursResponse,
ModelStatus,
SuccessResponse,
TimestampResponse,
)
from backend.app.services import NewsFetcher
app = FastAPI(
@@ -88,7 +103,8 @@ async def get_news(
return [dict(row) for row in db.fetchall()]
except ValueError:
raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
raise HTTPException(
400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
except Exception as e:
logger.error(f"❌ Error fetching news: {e}")
raise HTTPException(
@@ -244,8 +260,7 @@ async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
return Response(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
content="Sync was triggered too recently. Please wait before triggering again."
)
content="Sync was triggered too recently. Please wait before triggering again.")
try:
task = asyncio.create_task(NewsFetcher.harvest_feeds())
@@ -281,7 +296,9 @@ async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
@app.post("/settings/cron", response_model=HoursResponse)
async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)):
async def update_cron_schedule(
data: CronSettings,
db: sqlite3.Cursor = Depends(get_db_write)):
"""
Update the cron schedule for harvesting news.

View File

@@ -1,4 +1,4 @@
from typing import TypedDict, List
from typing import List, TypedDict
from pydantic import BaseModel

View File

@@ -3,14 +3,20 @@ import json
import re
import sqlite3
from datetime import datetime, timezone
from typing import Optional, cast, Dict
from typing import Dict, Optional, cast
import feedparser
import httpx
from bs4 import BeautifulSoup
from backend.app.config import ARTICLE_FETCH_TIMEOUT, MAX_ARTICLE_LENGTH, logger, LLM_MODEL, OLLAMA_HOST, \
LLM_TIMEOUT_SECONDS
from backend.app.config import (
ARTICLE_FETCH_TIMEOUT,
LLM_MODEL,
LLM_TIMEOUT_SECONDS,
MAX_ARTICLE_LENGTH,
OLLAMA_HOST,
logger,
)
from backend.app.database import db_manager
from backend.app.models import ArticleSummary
@@ -22,7 +28,9 @@ class NewsFetcher:
"""
@staticmethod
async def fetch_article_content(client: httpx.AsyncClient, url: str) -> str:
async def fetch_article_content(
client: httpx.AsyncClient,
url: str) -> str:
"""
Fetch and extract the main content from an article URL.
@@ -51,7 +59,14 @@ class NewsFetcher:
soup = BeautifulSoup(response.text, 'html.parser')
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form', 'button']):
for element in soup(['script',
'style',
'nav',
'header',
'footer',
'aside',
'form',
'button']):
element.decompose()
content_selectors = [
@@ -84,7 +99,8 @@ class NewsFetcher:
if body:
article_text = body.get_text(separator=' ', strip=True)
article_text = re.sub(r'\s+', ' ', article_text) # Normalize whitespace
article_text = re.sub(
r'\s+', ' ', article_text) # Normalize whitespace
article_text = article_text.strip()
# Limit length to avoid overwhelming the LLM
@@ -97,14 +113,21 @@ class NewsFetcher:
logger.warning(f"⏰ Timeout fetching article content from: {url}")
return ""
except httpx.HTTPError as e:
logger.warning(f"🌐 HTTP error fetching article content from {url}: {e}")
logger.warning(
f"🌐 HTTP error fetching article content from {url}: {e}")
return ""
except Exception as e:
logger.warning(f"❌ Error fetching article content from {url}: {type(e).__name__}: {e}")
logger.warning(
f"❌ Error fetching article content from {url}: {
type(e).__name__}: {e}")
return ""
@staticmethod
def build_prompt(url: str, title: str = "", description: str = "", content: str = "") -> str:
def build_prompt(
url: str,
title: str = "",
description: str = "",
content: str = "") -> str:
"""
Generate a prompt for the LLM to summarize an article.
@@ -124,10 +147,12 @@ class NewsFetcher:
if description:
context_info.append(f"RSS-Beschreibung: {description}")
if content:
content_preview = content[:500] + "..." if len(content) > 500 else content
content_preview = content[:500] + \
"..." if len(content) > 500 else content
context_info.append(f"Artikel-Inhalt: {content_preview}")
context = "\n".join(context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
context = "\n".join(
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
return (
"### Aufgabe\n"
@@ -171,9 +196,11 @@ class NewsFetcher:
article_content = await NewsFetcher.fetch_article_content(client, url)
if not article_content:
logger.warning(f"⚠️ Could not fetch article content, using RSS data only")
logger.warning(
f"⚠️ Could not fetch article content, using RSS data only")
prompt = NewsFetcher.build_prompt(url, title, description, article_content)
prompt = NewsFetcher.build_prompt(
url, title, description, article_content)
payload = {
"model": LLM_MODEL,
"prompt": prompt,
@@ -200,7 +227,8 @@ class NewsFetcher:
# Validate required fields
required_fields = ["title", "description"]
missing_fields = [field for field in required_fields if field not in summary_data]
missing_fields = [
field for field in required_fields if field not in summary_data]
if missing_fields:
logger.warning(
@@ -229,7 +257,9 @@ class NewsFetcher:
logger.error(f"❌ HTTP error for {url}: {e}")
return None
except Exception as e:
logger.error(f"❌ Unexpected error summarizing {url}: {type(e).__name__}: {e}")
logger.error(
f"❌ Unexpected error summarizing {url}: {
type(e).__name__}: {e}")
return None
@staticmethod
@@ -264,7 +294,9 @@ class NewsFetcher:
)
except Exception as e:
logger.error(f"❌ Critical error during harvest: {type(e).__name__}: {e}")
logger.error(
f"❌ Critical error during harvest: {
type(e).__name__}: {e}")
raise
@staticmethod
@@ -289,14 +321,20 @@ class NewsFetcher:
feed_data = feedparser.parse(feed_row["url"])
if hasattr(feed_data, 'bozo') and feed_data.bozo:
logger.warning(f"⚠️ Feed has parsing issues: {feed_row['url']}")
logger.warning(
f"⚠️ Feed has parsing issues: {
feed_row['url']}")
if hasattr(feed_data, 'bozo_exception'):
logger.warning(f"⚠️ Feed exception: {feed_data.bozo_exception}")
logger.warning(
f"⚠️ Feed exception: {
feed_data.bozo_exception}")
total_entries = len(feed_data.entries)
if total_entries == 0:
logger.warning(f"⚠️ No entries found in feed: {feed_row['url']}")
logger.warning(
f"⚠️ No entries found in feed: {
feed_row['url']}")
return stats
for i, entry in enumerate(feed_data.entries, 1):
@@ -321,18 +359,23 @@ class NewsFetcher:
stats['skipped'] += 1
continue
# Check if article already exists - use readonly connection for better concurrency
# Check if article already exists - use readonly connection for
# better concurrency
try:
with db_manager.get_cursor_with_retry(readonly=True) as cursor:
cursor.execute("SELECT id FROM news WHERE url = ?", (article_url,))
cursor.execute(
"SELECT id FROM news WHERE url = ?", (article_url,))
if cursor.fetchone():
stats['skipped'] += 1
continue
except Exception as db_error:
logger.warning(f"⚠️ Database check failed for article {i}, continuing: {db_error}")
logger.warning(
f"⚠️ Database check failed for article {i}, continuing: {db_error}")
rss_title = getattr(entry, 'title', '')
rss_description = getattr(entry, 'description', '') or getattr(entry, 'summary', '')
rss_description = getattr(
entry, 'description', '') or getattr(
entry, 'summary', '')
summary = await NewsFetcher.summarize_article(
client,
@@ -342,7 +385,8 @@ class NewsFetcher:
)
if not summary:
logger.warning(f"❌ Failed to get summary for article {i}: {article_url}")
logger.warning(
f"❌ Failed to get summary for article {i}: {article_url}")
stats['failed'] += 1
continue
@@ -353,7 +397,7 @@ class NewsFetcher:
cursor.execute(
"""
INSERT
OR IGNORE INTO news
OR IGNORE INTO news
(title, description, url, published, country)
VALUES (?, ?, ?, ?, ?)
""",
@@ -369,13 +413,17 @@ class NewsFetcher:
stats['successful'] += 1
except Exception as db_error:
logger.error(f"❌ Database error for article {i}: {db_error}")
logger.error(
f"❌ Database error for article {i}: {db_error}")
stats['failed'] += 1
continue
await asyncio.sleep(0.01) # 10ms delay to yield control
except Exception as e:
logger.error(f"❌ Error processing feed {feed_row['url']}: {type(e).__name__}: {e}")
logger.error(
f"❌ Error processing feed {
feed_row['url']}: {
type(e).__name__}: {e}")
return stats