refactor: apply consistent formatting and improve code readability across backend modules

This commit is contained in:
2025-08-01 22:51:38 +02:00
parent 0fd2c7a8b6
commit e1f51794af
5 changed files with 147 additions and 65 deletions

View File

@@ -1,6 +1,6 @@
from pathlib import Path
import os
import logging import logging
import os
from pathlib import Path
DB_PATH = Path(os.getenv("DB_NAME", "owlynews.sqlite3")) DB_PATH = Path(os.getenv("DB_NAME", "owlynews.sqlite3"))
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434") OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
@@ -42,7 +42,8 @@ def update_constants_from_db(settings_dict):
if 'ollama_host' in settings_dict and os.getenv("OLLAMA_HOST") is None: if 'ollama_host' in settings_dict and os.getenv("OLLAMA_HOST") is None:
OLLAMA_HOST = settings_dict['ollama_host'] OLLAMA_HOST = settings_dict['ollama_host']
if 'min_cron_hours' in settings_dict and os.getenv("MIN_CRON_HOURS") is None: if 'min_cron_hours' in settings_dict and os.getenv(
"MIN_CRON_HOURS") is None:
try: try:
MIN_CRON_HOURS = float(settings_dict['min_cron_hours']) MIN_CRON_HOURS = float(settings_dict['min_cron_hours'])
except (ValueError, TypeError): except (ValueError, TypeError):
@@ -61,7 +62,8 @@ def update_constants_from_db(settings_dict):
f"{settings_dict['cron_hours']}" f"{settings_dict['cron_hours']}"
) )
if 'sync_cooldown_minutes' in settings_dict and os.getenv("SYNC_COOLDOWN_MINUTES") is None: if 'sync_cooldown_minutes' in settings_dict and os.getenv(
"SYNC_COOLDOWN_MINUTES") is None:
try: try:
SYNC_COOLDOWN_MINUTES = int(settings_dict['sync_cooldown_minutes']) SYNC_COOLDOWN_MINUTES = int(settings_dict['sync_cooldown_minutes'])
except (ValueError, TypeError): except (ValueError, TypeError):
@@ -73,7 +75,8 @@ def update_constants_from_db(settings_dict):
if 'llm_model' in settings_dict and os.getenv("LLM_MODEL") is None: if 'llm_model' in settings_dict and os.getenv("LLM_MODEL") is None:
LLM_MODEL = settings_dict['llm_model'] LLM_MODEL = settings_dict['llm_model']
if 'llm_timeout_seconds' in settings_dict and os.getenv("LLM_TIMEOUT_SECONDS") is None: if 'llm_timeout_seconds' in settings_dict and os.getenv(
"LLM_TIMEOUT_SECONDS") is None:
try: try:
LLM_TIMEOUT_SECONDS = int(settings_dict['llm_timeout_seconds']) LLM_TIMEOUT_SECONDS = int(settings_dict['llm_timeout_seconds'])
except (ValueError, TypeError): except (ValueError, TypeError):
@@ -82,16 +85,19 @@ def update_constants_from_db(settings_dict):
f"{settings_dict['llm_timeout_seconds']}" f"{settings_dict['llm_timeout_seconds']}"
) )
if 'ollama_api_timeout_seconds' in settings_dict and os.getenv("OLLAMA_API_TIMEOUT_SECONDS") is None: if 'ollama_api_timeout_seconds' in settings_dict and os.getenv(
"OLLAMA_API_TIMEOUT_SECONDS") is None:
try: try:
OLLAMA_API_TIMEOUT_SECONDS = int(settings_dict['ollama_api_timeout_seconds']) OLLAMA_API_TIMEOUT_SECONDS = int(
settings_dict['ollama_api_timeout_seconds'])
except (ValueError, TypeError): except (ValueError, TypeError):
logger.warning( logger.warning(
f"⚠️ Invalid ollama_api_timeout_seconds value in DB: " f"⚠️ Invalid ollama_api_timeout_seconds value in DB: "
f"{settings_dict['ollama_api_timeout_seconds']}" f"{settings_dict['ollama_api_timeout_seconds']}"
) )
if 'article_fetch_timeout' in settings_dict and os.getenv("ARTICLE_FETCH_TIMEOUT") is None: if 'article_fetch_timeout' in settings_dict and os.getenv(
"ARTICLE_FETCH_TIMEOUT") is None:
try: try:
ARTICLE_FETCH_TIMEOUT = int(settings_dict['article_fetch_timeout']) ARTICLE_FETCH_TIMEOUT = int(settings_dict['article_fetch_timeout'])
except (ValueError, TypeError): except (ValueError, TypeError):
@@ -100,7 +106,8 @@ def update_constants_from_db(settings_dict):
f"{settings_dict['article_fetch_timeout']}" f"{settings_dict['article_fetch_timeout']}"
) )
if 'max_article_length' in settings_dict and os.getenv("MAX_ARTICLE_LENGTH") is None: if 'max_article_length' in settings_dict and os.getenv(
"MAX_ARTICLE_LENGTH") is None:
try: try:
MAX_ARTICLE_LENGTH = int(settings_dict['max_article_length']) MAX_ARTICLE_LENGTH = int(settings_dict['max_article_length'])
except (ValueError, TypeError): except (ValueError, TypeError):

View File

@@ -1,11 +1,24 @@
import json
import sqlite3
import time
from contextlib import contextmanager from contextlib import contextmanager
from pathlib import Path from pathlib import Path
import sqlite3
from typing import Iterator from typing import Iterator
from backend.app.config import logger, DB_PATH, update_constants_from_db, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \ from backend.app.config import (
SYNC_COOLDOWN_MINUTES, LLM_MODEL, LLM_TIMEOUT_SECONDS, OLLAMA_API_TIMEOUT_SECONDS, ARTICLE_FETCH_TIMEOUT, \ ARTICLE_FETCH_TIMEOUT,
MAX_ARTICLE_LENGTH CRON_HOURS,
DB_PATH,
LLM_MODEL,
LLM_TIMEOUT_SECONDS,
MAX_ARTICLE_LENGTH,
MIN_CRON_HOURS,
OLLAMA_API_TIMEOUT_SECONDS,
OLLAMA_HOST,
SYNC_COOLDOWN_MINUTES,
logger,
update_constants_from_db,
)
class DatabaseManager: class DatabaseManager:
@@ -41,7 +54,8 @@ class DatabaseManager:
schema_sql = f.read() schema_sql = f.read()
with self.get_cursor() as cursor: with self.get_cursor() as cursor:
statements = [stmt.strip() for stmt in schema_sql.split(';') if stmt.strip()] statements = [stmt.strip()
for stmt in schema_sql.split(';') if stmt.strip()]
for statement in statements: for statement in statements:
cursor.execute(statement) cursor.execute(statement)
@@ -85,14 +99,12 @@ class DatabaseManager:
""" """
Seed initial feeds from seed_feeds.json file. Seed initial feeds from seed_feeds.json file.
""" """
import json
from pathlib import Path
try: try:
seed_file = Path(__file__).parent / "seed_feeds.json" seed_file = Path(__file__).parent / "seed_feeds.json"
if not seed_file.exists(): if not seed_file.exists():
logger.warning("⚠️ seed_feeds.json not found, skipping feed seeding") logger.warning(
"⚠️ seed_feeds.json not found, skipping feed seeding")
return return
with open(seed_file, 'r', encoding='utf-8') as f: with open(seed_file, 'r', encoding='utf-8') as f:
@@ -101,10 +113,7 @@ class DatabaseManager:
for country, urls in feeds_data.items(): for country, urls in feeds_data.items():
for url in urls: for url in urls:
cursor.execute( cursor.execute(
"INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)", "INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)", (country, url))
(country, url)
)
except Exception as e: except Exception as e:
logger.error(f"❌ Failed to seed feeds: {e}") logger.error(f"❌ Failed to seed feeds: {e}")
@@ -182,8 +191,7 @@ class DatabaseManager:
conn.rollback() conn.rollback()
if "database is locked" in str(e).lower(): if "database is locked" in str(e).lower():
logger.warning( logger.warning(
f"⚠️ Database temporarily locked, operation may need retry: {e}" f"⚠️ Database temporarily locked, operation may need retry: {e}")
)
raise e raise e
except Exception as e: except Exception as e:
if conn: if conn:
@@ -194,7 +202,9 @@ class DatabaseManager:
conn.close() conn.close()
@contextmanager @contextmanager
def get_cursor_with_retry(self, readonly: bool = False, max_retries: int = 3) -> Iterator[sqlite3.Cursor]: def get_cursor_with_retry(self,
readonly: bool = False,
max_retries: int = 3) -> Iterator[sqlite3.Cursor]:
""" """
Context manager with retry logic for database operations. Context manager with retry logic for database operations.
@@ -211,13 +221,13 @@ class DatabaseManager:
yield cursor yield cursor
return return
except sqlite3.OperationalError as e: except sqlite3.OperationalError as e:
if "database is locked" in str(e).lower() and attempt < max_retries: if "database is locked" in str(
e).lower() and attempt < max_retries:
wait_time = (attempt + 1) * 0.1 wait_time = (attempt + 1) * 0.1
logger.warning( logger.warning(
f"⚠️ Database locked, retrying in {wait_time}s " f"⚠️ Database locked, retrying in {wait_time}s "
f"(attempt {attempt + 1}/{max_retries + 1})" f"(attempt {attempt + 1}/{max_retries + 1})"
) )
import time
time.sleep(wait_time) time.sleep(wait_time)
continue continue
raise e raise e

View File

@@ -12,22 +12,37 @@ The application uses SQLite for data storage and APScheduler for scheduling peri
import asyncio import asyncio
import os import os
import sqlite3 import sqlite3
import time
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from http.client import HTTPException
from typing import Any, Dict, List, Union from typing import Any, Dict, List, Union
# Third-party imports # Third-party imports
import httpx import httpx
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger from apscheduler.triggers.interval import IntervalTrigger
from fastapi import Depends, FastAPI, Response, status from fastapi import Depends, FastAPI, HTTPException, Response, status
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from backend.app.config import logger, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \ from backend.app.config import (
SYNC_COOLDOWN_MINUTES, LLM_MODEL, OLLAMA_API_TIMEOUT_SECONDS, frontend_path CRON_HOURS,
LLM_MODEL,
MIN_CRON_HOURS,
OLLAMA_API_TIMEOUT_SECONDS,
OLLAMA_HOST,
SYNC_COOLDOWN_MINUTES,
frontend_path,
logger,
)
from backend.app.database import get_db, get_db_write from backend.app.database import get_db, get_db_write
from backend.app.models import TimestampResponse, SuccessResponse, FeedData, ModelStatus, ErrorResponse, HoursResponse, \ from backend.app.models import (
CronSettings CronSettings,
ErrorResponse,
FeedData,
HoursResponse,
ModelStatus,
SuccessResponse,
TimestampResponse,
)
from backend.app.services import NewsFetcher from backend.app.services import NewsFetcher
app = FastAPI( app = FastAPI(
@@ -88,7 +103,8 @@ async def get_news(
return [dict(row) for row in db.fetchall()] return [dict(row) for row in db.fetchall()]
except ValueError: except ValueError:
raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)") raise HTTPException(
400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
except Exception as e: except Exception as e:
logger.error(f"❌ Error fetching news: {e}") logger.error(f"❌ Error fetching news: {e}")
raise HTTPException( raise HTTPException(
@@ -244,8 +260,7 @@ async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES): if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
return Response( return Response(
status_code=status.HTTP_429_TOO_MANY_REQUESTS, status_code=status.HTTP_429_TOO_MANY_REQUESTS,
content="Sync was triggered too recently. Please wait before triggering again." content="Sync was triggered too recently. Please wait before triggering again.")
)
try: try:
task = asyncio.create_task(NewsFetcher.harvest_feeds()) task = asyncio.create_task(NewsFetcher.harvest_feeds())
@@ -281,7 +296,9 @@ async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
@app.post("/settings/cron", response_model=HoursResponse) @app.post("/settings/cron", response_model=HoursResponse)
async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)): async def update_cron_schedule(
data: CronSettings,
db: sqlite3.Cursor = Depends(get_db_write)):
""" """
Update the cron schedule for harvesting news. Update the cron schedule for harvesting news.

View File

@@ -1,4 +1,4 @@
from typing import TypedDict, List from typing import List, TypedDict
from pydantic import BaseModel from pydantic import BaseModel

View File

@@ -3,14 +3,20 @@ import json
import re import re
import sqlite3 import sqlite3
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Optional, cast, Dict from typing import Dict, Optional, cast
import feedparser import feedparser
import httpx import httpx
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from backend.app.config import ARTICLE_FETCH_TIMEOUT, MAX_ARTICLE_LENGTH, logger, LLM_MODEL, OLLAMA_HOST, \ from backend.app.config import (
LLM_TIMEOUT_SECONDS ARTICLE_FETCH_TIMEOUT,
LLM_MODEL,
LLM_TIMEOUT_SECONDS,
MAX_ARTICLE_LENGTH,
OLLAMA_HOST,
logger,
)
from backend.app.database import db_manager from backend.app.database import db_manager
from backend.app.models import ArticleSummary from backend.app.models import ArticleSummary
@@ -22,7 +28,9 @@ class NewsFetcher:
""" """
@staticmethod @staticmethod
async def fetch_article_content(client: httpx.AsyncClient, url: str) -> str: async def fetch_article_content(
client: httpx.AsyncClient,
url: str) -> str:
""" """
Fetch and extract the main content from an article URL. Fetch and extract the main content from an article URL.
@@ -51,7 +59,14 @@ class NewsFetcher:
soup = BeautifulSoup(response.text, 'html.parser') soup = BeautifulSoup(response.text, 'html.parser')
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form', 'button']): for element in soup(['script',
'style',
'nav',
'header',
'footer',
'aside',
'form',
'button']):
element.decompose() element.decompose()
content_selectors = [ content_selectors = [
@@ -84,7 +99,8 @@ class NewsFetcher:
if body: if body:
article_text = body.get_text(separator=' ', strip=True) article_text = body.get_text(separator=' ', strip=True)
article_text = re.sub(r'\s+', ' ', article_text) # Normalize whitespace article_text = re.sub(
r'\s+', ' ', article_text) # Normalize whitespace
article_text = article_text.strip() article_text = article_text.strip()
# Limit length to avoid overwhelming the LLM # Limit length to avoid overwhelming the LLM
@@ -97,14 +113,21 @@ class NewsFetcher:
logger.warning(f"⏰ Timeout fetching article content from: {url}") logger.warning(f"⏰ Timeout fetching article content from: {url}")
return "" return ""
except httpx.HTTPError as e: except httpx.HTTPError as e:
logger.warning(f"🌐 HTTP error fetching article content from {url}: {e}") logger.warning(
f"🌐 HTTP error fetching article content from {url}: {e}")
return "" return ""
except Exception as e: except Exception as e:
logger.warning(f"❌ Error fetching article content from {url}: {type(e).__name__}: {e}") logger.warning(
f"❌ Error fetching article content from {url}: {
type(e).__name__}: {e}")
return "" return ""
@staticmethod @staticmethod
def build_prompt(url: str, title: str = "", description: str = "", content: str = "") -> str: def build_prompt(
url: str,
title: str = "",
description: str = "",
content: str = "") -> str:
""" """
Generate a prompt for the LLM to summarize an article. Generate a prompt for the LLM to summarize an article.
@@ -124,10 +147,12 @@ class NewsFetcher:
if description: if description:
context_info.append(f"RSS-Beschreibung: {description}") context_info.append(f"RSS-Beschreibung: {description}")
if content: if content:
content_preview = content[:500] + "..." if len(content) > 500 else content content_preview = content[:500] + \
"..." if len(content) > 500 else content
context_info.append(f"Artikel-Inhalt: {content_preview}") context_info.append(f"Artikel-Inhalt: {content_preview}")
context = "\n".join(context_info) if context_info else "Keine zusätzlichen Informationen verfügbar." context = "\n".join(
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
return ( return (
"### Aufgabe\n" "### Aufgabe\n"
@@ -171,9 +196,11 @@ class NewsFetcher:
article_content = await NewsFetcher.fetch_article_content(client, url) article_content = await NewsFetcher.fetch_article_content(client, url)
if not article_content: if not article_content:
logger.warning(f"⚠️ Could not fetch article content, using RSS data only") logger.warning(
f"⚠️ Could not fetch article content, using RSS data only")
prompt = NewsFetcher.build_prompt(url, title, description, article_content) prompt = NewsFetcher.build_prompt(
url, title, description, article_content)
payload = { payload = {
"model": LLM_MODEL, "model": LLM_MODEL,
"prompt": prompt, "prompt": prompt,
@@ -200,7 +227,8 @@ class NewsFetcher:
# Validate required fields # Validate required fields
required_fields = ["title", "description"] required_fields = ["title", "description"]
missing_fields = [field for field in required_fields if field not in summary_data] missing_fields = [
field for field in required_fields if field not in summary_data]
if missing_fields: if missing_fields:
logger.warning( logger.warning(
@@ -229,7 +257,9 @@ class NewsFetcher:
logger.error(f"❌ HTTP error for {url}: {e}") logger.error(f"❌ HTTP error for {url}: {e}")
return None return None
except Exception as e: except Exception as e:
logger.error(f"❌ Unexpected error summarizing {url}: {type(e).__name__}: {e}") logger.error(
f"❌ Unexpected error summarizing {url}: {
type(e).__name__}: {e}")
return None return None
@staticmethod @staticmethod
@@ -264,7 +294,9 @@ class NewsFetcher:
) )
except Exception as e: except Exception as e:
logger.error(f"❌ Critical error during harvest: {type(e).__name__}: {e}") logger.error(
f"❌ Critical error during harvest: {
type(e).__name__}: {e}")
raise raise
@staticmethod @staticmethod
@@ -289,14 +321,20 @@ class NewsFetcher:
feed_data = feedparser.parse(feed_row["url"]) feed_data = feedparser.parse(feed_row["url"])
if hasattr(feed_data, 'bozo') and feed_data.bozo: if hasattr(feed_data, 'bozo') and feed_data.bozo:
logger.warning(f"⚠️ Feed has parsing issues: {feed_row['url']}") logger.warning(
f"⚠️ Feed has parsing issues: {
feed_row['url']}")
if hasattr(feed_data, 'bozo_exception'): if hasattr(feed_data, 'bozo_exception'):
logger.warning(f"⚠️ Feed exception: {feed_data.bozo_exception}") logger.warning(
f"⚠️ Feed exception: {
feed_data.bozo_exception}")
total_entries = len(feed_data.entries) total_entries = len(feed_data.entries)
if total_entries == 0: if total_entries == 0:
logger.warning(f"⚠️ No entries found in feed: {feed_row['url']}") logger.warning(
f"⚠️ No entries found in feed: {
feed_row['url']}")
return stats return stats
for i, entry in enumerate(feed_data.entries, 1): for i, entry in enumerate(feed_data.entries, 1):
@@ -321,18 +359,23 @@ class NewsFetcher:
stats['skipped'] += 1 stats['skipped'] += 1
continue continue
# Check if article already exists - use readonly connection for better concurrency # Check if article already exists - use readonly connection for
# better concurrency
try: try:
with db_manager.get_cursor_with_retry(readonly=True) as cursor: with db_manager.get_cursor_with_retry(readonly=True) as cursor:
cursor.execute("SELECT id FROM news WHERE url = ?", (article_url,)) cursor.execute(
"SELECT id FROM news WHERE url = ?", (article_url,))
if cursor.fetchone(): if cursor.fetchone():
stats['skipped'] += 1 stats['skipped'] += 1
continue continue
except Exception as db_error: except Exception as db_error:
logger.warning(f"⚠️ Database check failed for article {i}, continuing: {db_error}") logger.warning(
f"⚠️ Database check failed for article {i}, continuing: {db_error}")
rss_title = getattr(entry, 'title', '') rss_title = getattr(entry, 'title', '')
rss_description = getattr(entry, 'description', '') or getattr(entry, 'summary', '') rss_description = getattr(
entry, 'description', '') or getattr(
entry, 'summary', '')
summary = await NewsFetcher.summarize_article( summary = await NewsFetcher.summarize_article(
client, client,
@@ -342,7 +385,8 @@ class NewsFetcher:
) )
if not summary: if not summary:
logger.warning(f"❌ Failed to get summary for article {i}: {article_url}") logger.warning(
f"❌ Failed to get summary for article {i}: {article_url}")
stats['failed'] += 1 stats['failed'] += 1
continue continue
@@ -353,7 +397,7 @@ class NewsFetcher:
cursor.execute( cursor.execute(
""" """
INSERT INSERT
OR IGNORE INTO news OR IGNORE INTO news
(title, description, url, published, country) (title, description, url, published, country)
VALUES (?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?)
""", """,
@@ -369,13 +413,17 @@ class NewsFetcher:
stats['successful'] += 1 stats['successful'] += 1
except Exception as db_error: except Exception as db_error:
logger.error(f"❌ Database error for article {i}: {db_error}") logger.error(
f"❌ Database error for article {i}: {db_error}")
stats['failed'] += 1 stats['failed'] += 1
continue continue
await asyncio.sleep(0.01) # 10ms delay to yield control await asyncio.sleep(0.01) # 10ms delay to yield control
except Exception as e: except Exception as e:
logger.error(f"❌ Error processing feed {feed_row['url']}: {type(e).__name__}: {e}") logger.error(
f"❌ Error processing feed {
feed_row['url']}: {
type(e).__name__}: {e}")
return stats return stats