From e1f51794afc2d76b62cc54d36f9b2b7fe21b32f2 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Fri, 1 Aug 2025 22:51:38 +0200 Subject: [PATCH] refactor: apply consistent formatting and improve code readability across backend modules --- backend/app/config.py | 25 ++++++---- backend/app/database.py | 46 +++++++++++------- backend/app/main.py | 37 +++++++++++---- backend/app/models.py | 2 +- backend/app/services.py | 102 +++++++++++++++++++++++++++++----------- 5 files changed, 147 insertions(+), 65 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index 4863f1b..2189c21 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,6 +1,6 @@ -from pathlib import Path -import os import logging +import os +from pathlib import Path DB_PATH = Path(os.getenv("DB_NAME", "owlynews.sqlite3")) OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434") @@ -42,7 +42,8 @@ def update_constants_from_db(settings_dict): if 'ollama_host' in settings_dict and os.getenv("OLLAMA_HOST") is None: OLLAMA_HOST = settings_dict['ollama_host'] - if 'min_cron_hours' in settings_dict and os.getenv("MIN_CRON_HOURS") is None: + if 'min_cron_hours' in settings_dict and os.getenv( + "MIN_CRON_HOURS") is None: try: MIN_CRON_HOURS = float(settings_dict['min_cron_hours']) except (ValueError, TypeError): @@ -61,7 +62,8 @@ def update_constants_from_db(settings_dict): f"{settings_dict['cron_hours']}" ) - if 'sync_cooldown_minutes' in settings_dict and os.getenv("SYNC_COOLDOWN_MINUTES") is None: + if 'sync_cooldown_minutes' in settings_dict and os.getenv( + "SYNC_COOLDOWN_MINUTES") is None: try: SYNC_COOLDOWN_MINUTES = int(settings_dict['sync_cooldown_minutes']) except (ValueError, TypeError): @@ -73,7 +75,8 @@ def update_constants_from_db(settings_dict): if 'llm_model' in settings_dict and os.getenv("LLM_MODEL") is None: LLM_MODEL = settings_dict['llm_model'] - if 'llm_timeout_seconds' in settings_dict and os.getenv("LLM_TIMEOUT_SECONDS") is None: + if 'llm_timeout_seconds' in settings_dict and os.getenv( + "LLM_TIMEOUT_SECONDS") is None: try: LLM_TIMEOUT_SECONDS = int(settings_dict['llm_timeout_seconds']) except (ValueError, TypeError): @@ -82,16 +85,19 @@ def update_constants_from_db(settings_dict): f"{settings_dict['llm_timeout_seconds']}" ) - if 'ollama_api_timeout_seconds' in settings_dict and os.getenv("OLLAMA_API_TIMEOUT_SECONDS") is None: + if 'ollama_api_timeout_seconds' in settings_dict and os.getenv( + "OLLAMA_API_TIMEOUT_SECONDS") is None: try: - OLLAMA_API_TIMEOUT_SECONDS = int(settings_dict['ollama_api_timeout_seconds']) + OLLAMA_API_TIMEOUT_SECONDS = int( + settings_dict['ollama_api_timeout_seconds']) except (ValueError, TypeError): logger.warning( f"⚠️ Invalid ollama_api_timeout_seconds value in DB: " f"{settings_dict['ollama_api_timeout_seconds']}" ) - if 'article_fetch_timeout' in settings_dict and os.getenv("ARTICLE_FETCH_TIMEOUT") is None: + if 'article_fetch_timeout' in settings_dict and os.getenv( + "ARTICLE_FETCH_TIMEOUT") is None: try: ARTICLE_FETCH_TIMEOUT = int(settings_dict['article_fetch_timeout']) except (ValueError, TypeError): @@ -100,7 +106,8 @@ def update_constants_from_db(settings_dict): f"{settings_dict['article_fetch_timeout']}" ) - if 'max_article_length' in settings_dict and os.getenv("MAX_ARTICLE_LENGTH") is None: + if 'max_article_length' in settings_dict and os.getenv( + "MAX_ARTICLE_LENGTH") is None: try: MAX_ARTICLE_LENGTH = int(settings_dict['max_article_length']) except (ValueError, TypeError): diff --git a/backend/app/database.py b/backend/app/database.py index fd503b2..67d2059 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -1,11 +1,24 @@ +import json +import sqlite3 +import time from contextlib import contextmanager from pathlib import Path -import sqlite3 from typing import Iterator -from backend.app.config import logger, DB_PATH, update_constants_from_db, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \ - SYNC_COOLDOWN_MINUTES, LLM_MODEL, LLM_TIMEOUT_SECONDS, OLLAMA_API_TIMEOUT_SECONDS, ARTICLE_FETCH_TIMEOUT, \ - MAX_ARTICLE_LENGTH +from backend.app.config import ( + ARTICLE_FETCH_TIMEOUT, + CRON_HOURS, + DB_PATH, + LLM_MODEL, + LLM_TIMEOUT_SECONDS, + MAX_ARTICLE_LENGTH, + MIN_CRON_HOURS, + OLLAMA_API_TIMEOUT_SECONDS, + OLLAMA_HOST, + SYNC_COOLDOWN_MINUTES, + logger, + update_constants_from_db, +) class DatabaseManager: @@ -41,7 +54,8 @@ class DatabaseManager: schema_sql = f.read() with self.get_cursor() as cursor: - statements = [stmt.strip() for stmt in schema_sql.split(';') if stmt.strip()] + statements = [stmt.strip() + for stmt in schema_sql.split(';') if stmt.strip()] for statement in statements: cursor.execute(statement) @@ -85,14 +99,12 @@ class DatabaseManager: """ Seed initial feeds from seed_feeds.json file. """ - import json - from pathlib import Path - try: seed_file = Path(__file__).parent / "seed_feeds.json" if not seed_file.exists(): - logger.warning("⚠️ seed_feeds.json not found, skipping feed seeding") + logger.warning( + "⚠️ seed_feeds.json not found, skipping feed seeding") return with open(seed_file, 'r', encoding='utf-8') as f: @@ -101,10 +113,7 @@ class DatabaseManager: for country, urls in feeds_data.items(): for url in urls: cursor.execute( - "INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)", - (country, url) - ) - + "INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)", (country, url)) except Exception as e: logger.error(f"❌ Failed to seed feeds: {e}") @@ -182,8 +191,7 @@ class DatabaseManager: conn.rollback() if "database is locked" in str(e).lower(): logger.warning( - f"⚠️ Database temporarily locked, operation may need retry: {e}" - ) + f"⚠️ Database temporarily locked, operation may need retry: {e}") raise e except Exception as e: if conn: @@ -194,7 +202,9 @@ class DatabaseManager: conn.close() @contextmanager - def get_cursor_with_retry(self, readonly: bool = False, max_retries: int = 3) -> Iterator[sqlite3.Cursor]: + def get_cursor_with_retry(self, + readonly: bool = False, + max_retries: int = 3) -> Iterator[sqlite3.Cursor]: """ Context manager with retry logic for database operations. @@ -211,13 +221,13 @@ class DatabaseManager: yield cursor return except sqlite3.OperationalError as e: - if "database is locked" in str(e).lower() and attempt < max_retries: + if "database is locked" in str( + e).lower() and attempt < max_retries: wait_time = (attempt + 1) * 0.1 logger.warning( f"⚠️ Database locked, retrying in {wait_time}s " f"(attempt {attempt + 1}/{max_retries + 1})" ) - import time time.sleep(wait_time) continue raise e diff --git a/backend/app/main.py b/backend/app/main.py index c0a5c71..14d27b8 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -12,22 +12,37 @@ The application uses SQLite for data storage and APScheduler for scheduling peri import asyncio import os import sqlite3 +import time from datetime import datetime, timedelta, timezone -from http.client import HTTPException from typing import Any, Dict, List, Union # Third-party imports import httpx from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.interval import IntervalTrigger -from fastapi import Depends, FastAPI, Response, status +from fastapi import Depends, FastAPI, HTTPException, Response, status from fastapi.staticfiles import StaticFiles -from backend.app.config import logger, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \ - SYNC_COOLDOWN_MINUTES, LLM_MODEL, OLLAMA_API_TIMEOUT_SECONDS, frontend_path +from backend.app.config import ( + CRON_HOURS, + LLM_MODEL, + MIN_CRON_HOURS, + OLLAMA_API_TIMEOUT_SECONDS, + OLLAMA_HOST, + SYNC_COOLDOWN_MINUTES, + frontend_path, + logger, +) from backend.app.database import get_db, get_db_write -from backend.app.models import TimestampResponse, SuccessResponse, FeedData, ModelStatus, ErrorResponse, HoursResponse, \ - CronSettings +from backend.app.models import ( + CronSettings, + ErrorResponse, + FeedData, + HoursResponse, + ModelStatus, + SuccessResponse, + TimestampResponse, +) from backend.app.services import NewsFetcher app = FastAPI( @@ -88,7 +103,8 @@ async def get_news( return [dict(row) for row in db.fetchall()] except ValueError: - raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)") + raise HTTPException( + 400, "Invalid date format. Use ISO format (YYYY-MM-DD)") except Exception as e: logger.error(f"❌ Error fetching news: {e}") raise HTTPException( @@ -244,8 +260,7 @@ async def manual_sync(db: sqlite3.Cursor = Depends(get_db)): if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES): return Response( status_code=status.HTTP_429_TOO_MANY_REQUESTS, - content="Sync was triggered too recently. Please wait before triggering again." - ) + content="Sync was triggered too recently. Please wait before triggering again.") try: task = asyncio.create_task(NewsFetcher.harvest_feeds()) @@ -281,7 +296,9 @@ async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)): @app.post("/settings/cron", response_model=HoursResponse) -async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)): +async def update_cron_schedule( + data: CronSettings, + db: sqlite3.Cursor = Depends(get_db_write)): """ Update the cron schedule for harvesting news. diff --git a/backend/app/models.py b/backend/app/models.py index 984bc2f..d514e89 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -1,4 +1,4 @@ -from typing import TypedDict, List +from typing import List, TypedDict from pydantic import BaseModel diff --git a/backend/app/services.py b/backend/app/services.py index ff92f82..f682dd8 100644 --- a/backend/app/services.py +++ b/backend/app/services.py @@ -3,14 +3,20 @@ import json import re import sqlite3 from datetime import datetime, timezone -from typing import Optional, cast, Dict +from typing import Dict, Optional, cast import feedparser import httpx from bs4 import BeautifulSoup -from backend.app.config import ARTICLE_FETCH_TIMEOUT, MAX_ARTICLE_LENGTH, logger, LLM_MODEL, OLLAMA_HOST, \ - LLM_TIMEOUT_SECONDS +from backend.app.config import ( + ARTICLE_FETCH_TIMEOUT, + LLM_MODEL, + LLM_TIMEOUT_SECONDS, + MAX_ARTICLE_LENGTH, + OLLAMA_HOST, + logger, +) from backend.app.database import db_manager from backend.app.models import ArticleSummary @@ -22,7 +28,9 @@ class NewsFetcher: """ @staticmethod - async def fetch_article_content(client: httpx.AsyncClient, url: str) -> str: + async def fetch_article_content( + client: httpx.AsyncClient, + url: str) -> str: """ Fetch and extract the main content from an article URL. @@ -51,7 +59,14 @@ class NewsFetcher: soup = BeautifulSoup(response.text, 'html.parser') - for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form', 'button']): + for element in soup(['script', + 'style', + 'nav', + 'header', + 'footer', + 'aside', + 'form', + 'button']): element.decompose() content_selectors = [ @@ -84,7 +99,8 @@ class NewsFetcher: if body: article_text = body.get_text(separator=' ', strip=True) - article_text = re.sub(r'\s+', ' ', article_text) # Normalize whitespace + article_text = re.sub( + r'\s+', ' ', article_text) # Normalize whitespace article_text = article_text.strip() # Limit length to avoid overwhelming the LLM @@ -97,14 +113,21 @@ class NewsFetcher: logger.warning(f"⏰ Timeout fetching article content from: {url}") return "" except httpx.HTTPError as e: - logger.warning(f"🌐 HTTP error fetching article content from {url}: {e}") + logger.warning( + f"🌐 HTTP error fetching article content from {url}: {e}") return "" except Exception as e: - logger.warning(f"❌ Error fetching article content from {url}: {type(e).__name__}: {e}") + logger.warning( + f"❌ Error fetching article content from {url}: { + type(e).__name__}: {e}") return "" @staticmethod - def build_prompt(url: str, title: str = "", description: str = "", content: str = "") -> str: + def build_prompt( + url: str, + title: str = "", + description: str = "", + content: str = "") -> str: """ Generate a prompt for the LLM to summarize an article. @@ -124,10 +147,12 @@ class NewsFetcher: if description: context_info.append(f"RSS-Beschreibung: {description}") if content: - content_preview = content[:500] + "..." if len(content) > 500 else content + content_preview = content[:500] + \ + "..." if len(content) > 500 else content context_info.append(f"Artikel-Inhalt: {content_preview}") - context = "\n".join(context_info) if context_info else "Keine zusätzlichen Informationen verfügbar." + context = "\n".join( + context_info) if context_info else "Keine zusätzlichen Informationen verfügbar." return ( "### Aufgabe\n" @@ -171,9 +196,11 @@ class NewsFetcher: article_content = await NewsFetcher.fetch_article_content(client, url) if not article_content: - logger.warning(f"⚠️ Could not fetch article content, using RSS data only") + logger.warning( + f"⚠️ Could not fetch article content, using RSS data only") - prompt = NewsFetcher.build_prompt(url, title, description, article_content) + prompt = NewsFetcher.build_prompt( + url, title, description, article_content) payload = { "model": LLM_MODEL, "prompt": prompt, @@ -200,7 +227,8 @@ class NewsFetcher: # Validate required fields required_fields = ["title", "description"] - missing_fields = [field for field in required_fields if field not in summary_data] + missing_fields = [ + field for field in required_fields if field not in summary_data] if missing_fields: logger.warning( @@ -229,7 +257,9 @@ class NewsFetcher: logger.error(f"❌ HTTP error for {url}: {e}") return None except Exception as e: - logger.error(f"❌ Unexpected error summarizing {url}: {type(e).__name__}: {e}") + logger.error( + f"❌ Unexpected error summarizing {url}: { + type(e).__name__}: {e}") return None @staticmethod @@ -264,7 +294,9 @@ class NewsFetcher: ) except Exception as e: - logger.error(f"❌ Critical error during harvest: {type(e).__name__}: {e}") + logger.error( + f"❌ Critical error during harvest: { + type(e).__name__}: {e}") raise @staticmethod @@ -289,14 +321,20 @@ class NewsFetcher: feed_data = feedparser.parse(feed_row["url"]) if hasattr(feed_data, 'bozo') and feed_data.bozo: - logger.warning(f"⚠️ Feed has parsing issues: {feed_row['url']}") + logger.warning( + f"⚠️ Feed has parsing issues: { + feed_row['url']}") if hasattr(feed_data, 'bozo_exception'): - logger.warning(f"⚠️ Feed exception: {feed_data.bozo_exception}") + logger.warning( + f"⚠️ Feed exception: { + feed_data.bozo_exception}") total_entries = len(feed_data.entries) if total_entries == 0: - logger.warning(f"⚠️ No entries found in feed: {feed_row['url']}") + logger.warning( + f"⚠️ No entries found in feed: { + feed_row['url']}") return stats for i, entry in enumerate(feed_data.entries, 1): @@ -321,18 +359,23 @@ class NewsFetcher: stats['skipped'] += 1 continue - # Check if article already exists - use readonly connection for better concurrency + # Check if article already exists - use readonly connection for + # better concurrency try: with db_manager.get_cursor_with_retry(readonly=True) as cursor: - cursor.execute("SELECT id FROM news WHERE url = ?", (article_url,)) + cursor.execute( + "SELECT id FROM news WHERE url = ?", (article_url,)) if cursor.fetchone(): stats['skipped'] += 1 continue except Exception as db_error: - logger.warning(f"⚠️ Database check failed for article {i}, continuing: {db_error}") + logger.warning( + f"⚠️ Database check failed for article {i}, continuing: {db_error}") rss_title = getattr(entry, 'title', '') - rss_description = getattr(entry, 'description', '') or getattr(entry, 'summary', '') + rss_description = getattr( + entry, 'description', '') or getattr( + entry, 'summary', '') summary = await NewsFetcher.summarize_article( client, @@ -342,7 +385,8 @@ class NewsFetcher: ) if not summary: - logger.warning(f"❌ Failed to get summary for article {i}: {article_url}") + logger.warning( + f"❌ Failed to get summary for article {i}: {article_url}") stats['failed'] += 1 continue @@ -353,7 +397,7 @@ class NewsFetcher: cursor.execute( """ INSERT - OR IGNORE INTO news + OR IGNORE INTO news (title, description, url, published, country) VALUES (?, ?, ?, ?, ?) """, @@ -369,13 +413,17 @@ class NewsFetcher: stats['successful'] += 1 except Exception as db_error: - logger.error(f"❌ Database error for article {i}: {db_error}") + logger.error( + f"❌ Database error for article {i}: {db_error}") stats['failed'] += 1 continue await asyncio.sleep(0.01) # 10ms delay to yield control except Exception as e: - logger.error(f"❌ Error processing feed {feed_row['url']}: {type(e).__name__}: {e}") + logger.error( + f"❌ Error processing feed { + feed_row['url']}: { + type(e).__name__}: {e}") return stats