refactor: apply consistent formatting and improve code readability across backend modules

2025-08-01 22:51:38 +02:00
parent 0fd2c7a8b6
commit e1f51794af
5 changed files with 147 additions and 65 deletions
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 import os
 import logging
 import os
 from pathlib import Path
 DB_PATH = Path(os.getenv("DB_NAME", "owlynews.sqlite3"))
 OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
@@ -42,7 +42,8 @@ def update_constants_from_db(settings_dict):
    if 'ollama_host' in settings_dict and os.getenv("OLLAMA_HOST") is None:
        OLLAMA_HOST = settings_dict['ollama_host']
-    if 'min_cron_hours' in settings_dict and os.getenv("MIN_CRON_HOURS") is None:
+    if 'min_cron_hours' in settings_dict and os.getenv(
            "MIN_CRON_HOURS") is None:
        try:
            MIN_CRON_HOURS = float(settings_dict['min_cron_hours'])
        except (ValueError, TypeError):
@@ -61,7 +62,8 @@ def update_constants_from_db(settings_dict):
                f"{settings_dict['cron_hours']}"
            )
-    if 'sync_cooldown_minutes' in settings_dict and os.getenv("SYNC_COOLDOWN_MINUTES") is None:
+    if 'sync_cooldown_minutes' in settings_dict and os.getenv(
            "SYNC_COOLDOWN_MINUTES") is None:
        try:
            SYNC_COOLDOWN_MINUTES = int(settings_dict['sync_cooldown_minutes'])
        except (ValueError, TypeError):
@@ -73,7 +75,8 @@ def update_constants_from_db(settings_dict):
    if 'llm_model' in settings_dict and os.getenv("LLM_MODEL") is None:
        LLM_MODEL = settings_dict['llm_model']
-    if 'llm_timeout_seconds' in settings_dict and os.getenv("LLM_TIMEOUT_SECONDS") is None:
+    if 'llm_timeout_seconds' in settings_dict and os.getenv(
            "LLM_TIMEOUT_SECONDS") is None:
        try:
            LLM_TIMEOUT_SECONDS = int(settings_dict['llm_timeout_seconds'])
        except (ValueError, TypeError):
@@ -82,16 +85,19 @@ def update_constants_from_db(settings_dict):
                f"{settings_dict['llm_timeout_seconds']}"
            )
-    if 'ollama_api_timeout_seconds' in settings_dict and os.getenv("OLLAMA_API_TIMEOUT_SECONDS") is None:
+    if 'ollama_api_timeout_seconds' in settings_dict and os.getenv(
            "OLLAMA_API_TIMEOUT_SECONDS") is None:
        try:
-            OLLAMA_API_TIMEOUT_SECONDS = int(settings_dict['ollama_api_timeout_seconds'])
+            OLLAMA_API_TIMEOUT_SECONDS = int(
                settings_dict['ollama_api_timeout_seconds'])
        except (ValueError, TypeError):
            logger.warning(
                f"⚠️ Invalid ollama_api_timeout_seconds value in DB: "
                f"{settings_dict['ollama_api_timeout_seconds']}"
            )
-    if 'article_fetch_timeout' in settings_dict and os.getenv("ARTICLE_FETCH_TIMEOUT") is None:
+    if 'article_fetch_timeout' in settings_dict and os.getenv(
            "ARTICLE_FETCH_TIMEOUT") is None:
        try:
            ARTICLE_FETCH_TIMEOUT = int(settings_dict['article_fetch_timeout'])
        except (ValueError, TypeError):
@@ -100,7 +106,8 @@ def update_constants_from_db(settings_dict):
                f"{settings_dict['article_fetch_timeout']}"
            )
-    if 'max_article_length' in settings_dict and os.getenv("MAX_ARTICLE_LENGTH") is None:
+    if 'max_article_length' in settings_dict and os.getenv(
            "MAX_ARTICLE_LENGTH") is None:
        try:
            MAX_ARTICLE_LENGTH = int(settings_dict['max_article_length'])
        except (ValueError, TypeError):
--- a/backend/app/database.py
+++ b/backend/app/database.py
@@ -1,11 +1,24 @@
 import json
 import sqlite3
 import time
 from contextlib import contextmanager
 from pathlib import Path
 import sqlite3
 from typing import Iterator
-from backend.app.config import logger, DB_PATH, update_constants_from_db, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \
+from backend.app.config import (
-    SYNC_COOLDOWN_MINUTES, LLM_MODEL, LLM_TIMEOUT_SECONDS, OLLAMA_API_TIMEOUT_SECONDS, ARTICLE_FETCH_TIMEOUT, \
+    ARTICLE_FETCH_TIMEOUT,
-    MAX_ARTICLE_LENGTH
+    CRON_HOURS,
    DB_PATH,
    LLM_MODEL,
    LLM_TIMEOUT_SECONDS,
    MAX_ARTICLE_LENGTH,
    MIN_CRON_HOURS,
    OLLAMA_API_TIMEOUT_SECONDS,
    OLLAMA_HOST,
    SYNC_COOLDOWN_MINUTES,
    logger,
    update_constants_from_db,
 )
 class DatabaseManager:
@@ -41,7 +54,8 @@ class DatabaseManager:
                schema_sql = f.read()
            with self.get_cursor() as cursor:
-                statements = [stmt.strip() for stmt in schema_sql.split(';') if stmt.strip()]
+                statements = [stmt.strip()
                              for stmt in schema_sql.split(';') if stmt.strip()]
                for statement in statements:
                    cursor.execute(statement)
@@ -85,14 +99,12 @@ class DatabaseManager:
        """
        Seed initial feeds from seed_feeds.json file.
        """
        import json
        from pathlib import Path
        try:
            seed_file = Path(__file__).parent / "seed_feeds.json"
            if not seed_file.exists():
-                logger.warning("⚠️ seed_feeds.json not found, skipping feed seeding")
+                logger.warning(
                    "⚠️ seed_feeds.json not found, skipping feed seeding")
                return
            with open(seed_file, 'r', encoding='utf-8') as f:
@@ -101,10 +113,7 @@ class DatabaseManager:
            for country, urls in feeds_data.items():
                for url in urls:
                    cursor.execute(
-                        "INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)",
+                        "INSERT OR IGNORE INTO feeds (country, url) VALUES (?, ?)", (country, url))
                        (country, url)
                    )
        except Exception as e:
            logger.error(f"❌ Failed to seed feeds: {e}")
@@ -182,8 +191,7 @@ class DatabaseManager:
                conn.rollback()
            if "database is locked" in str(e).lower():
                logger.warning(
-                    f"⚠️ Database temporarily locked, operation may need retry: {e}"
+                    f"⚠️ Database temporarily locked, operation may need retry: {e}")
                )
            raise e
        except Exception as e:
            if conn:
@@ -194,7 +202,9 @@ class DatabaseManager:
                conn.close()
    @contextmanager
-    def get_cursor_with_retry(self, readonly: bool = False, max_retries: int = 3) -> Iterator[sqlite3.Cursor]:
+    def get_cursor_with_retry(self,
                              readonly: bool = False,
                              max_retries: int = 3) -> Iterator[sqlite3.Cursor]:
        """
        Context manager with retry logic for database operations.
@@ -211,13 +221,13 @@ class DatabaseManager:
                    yield cursor
                return
            except sqlite3.OperationalError as e:
-                if "database is locked" in str(e).lower() and attempt < max_retries:
+                if "database is locked" in str(
                        e).lower() and attempt < max_retries:
                    wait_time = (attempt + 1) * 0.1
                    logger.warning(
                        f"⚠️ Database locked, retrying in {wait_time}s "
                        f"(attempt {attempt + 1}/{max_retries + 1})"
                    )
                    import time
                    time.sleep(wait_time)
                    continue
                raise e
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -12,22 +12,37 @@ The application uses SQLite for data storage and APScheduler for scheduling peri
 import asyncio
 import os
 import sqlite3
 import time
 from datetime import datetime, timedelta, timezone
 from http.client import HTTPException
 from typing import Any, Dict, List, Union
 # Third-party imports
 import httpx
 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.interval import IntervalTrigger
-from fastapi import Depends, FastAPI, Response, status
+from fastapi import Depends, FastAPI, HTTPException, Response, status
 from fastapi.staticfiles import StaticFiles
-from backend.app.config import logger, OLLAMA_HOST, CRON_HOURS, MIN_CRON_HOURS, \
+from backend.app.config import (
-    SYNC_COOLDOWN_MINUTES, LLM_MODEL, OLLAMA_API_TIMEOUT_SECONDS, frontend_path
+    CRON_HOURS,
    LLM_MODEL,
    MIN_CRON_HOURS,
    OLLAMA_API_TIMEOUT_SECONDS,
    OLLAMA_HOST,
    SYNC_COOLDOWN_MINUTES,
    frontend_path,
    logger,
 )
 from backend.app.database import get_db, get_db_write
-from backend.app.models import TimestampResponse, SuccessResponse, FeedData, ModelStatus, ErrorResponse, HoursResponse, \
+from backend.app.models import (
-    CronSettings
+    CronSettings,
    ErrorResponse,
    FeedData,
    HoursResponse,
    ModelStatus,
    SuccessResponse,
    TimestampResponse,
 )
 from backend.app.services import NewsFetcher
 app = FastAPI(
@@ -88,7 +103,8 @@ async def get_news(
        return [dict(row) for row in db.fetchall()]
    except ValueError:
-        raise HTTPException(400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
+        raise HTTPException(
            400, "Invalid date format. Use ISO format (YYYY-MM-DD)")
    except Exception as e:
        logger.error(f"❌ Error fetching news: {e}")
        raise HTTPException(
@@ -244,8 +260,7 @@ async def manual_sync(db: sqlite3.Cursor = Depends(get_db)):
    if now - last_sync_time < timedelta(minutes=SYNC_COOLDOWN_MINUTES):
        return Response(
            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
-            content="Sync was triggered too recently. Please wait before triggering again."
+            content="Sync was triggered too recently. Please wait before triggering again.")
        )
    try:
        task = asyncio.create_task(NewsFetcher.harvest_feeds())
@@ -281,7 +296,9 @@ async def get_cron_schedule(db: sqlite3.Cursor = Depends(get_db)):
@app.post("/settings/cron", response_model=HoursResponse)
-async def update_cron_schedule(data: CronSettings, db: sqlite3.Cursor = Depends(get_db_write)):
+async def update_cron_schedule(
        data: CronSettings,
        db: sqlite3.Cursor = Depends(get_db_write)):
    """
    Update the cron schedule for harvesting news.
--- a/backend/app/models.py
+++ b/backend/app/models.py
@@ -1,4 +1,4 @@
-from typing import TypedDict, List
+from typing import List, TypedDict
 from pydantic import BaseModel
--- a/backend/app/services.py
+++ b/backend/app/services.py
@@ -3,14 +3,20 @@ import json
 import re
 import sqlite3
 from datetime import datetime, timezone
-from typing import Optional, cast, Dict
+from typing import Dict, Optional, cast
 import feedparser
 import httpx
 from bs4 import BeautifulSoup
-from backend.app.config import ARTICLE_FETCH_TIMEOUT, MAX_ARTICLE_LENGTH, logger, LLM_MODEL, OLLAMA_HOST, \
+from backend.app.config import (
-    LLM_TIMEOUT_SECONDS
+    ARTICLE_FETCH_TIMEOUT,
    LLM_MODEL,
    LLM_TIMEOUT_SECONDS,
    MAX_ARTICLE_LENGTH,
    OLLAMA_HOST,
    logger,
 )
 from backend.app.database import db_manager
 from backend.app.models import ArticleSummary
@@ -22,7 +28,9 @@ class NewsFetcher:
    """
    @staticmethod
-    async def fetch_article_content(client: httpx.AsyncClient, url: str) -> str:
+    async def fetch_article_content(
            client: httpx.AsyncClient,
            url: str) -> str:
        """
        Fetch and extract the main content from an article URL.
@@ -51,7 +59,14 @@ class NewsFetcher:
            soup = BeautifulSoup(response.text, 'html.parser')
-            for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form', 'button']):
+            for element in soup(['script',
                                 'style',
                                 'nav',
                                 'header',
                                 'footer',
                                 'aside',
                                 'form',
                                 'button']):
                element.decompose()
            content_selectors = [
@@ -84,7 +99,8 @@ class NewsFetcher:
                if body:
                    article_text = body.get_text(separator=' ', strip=True)
-            article_text = re.sub(r'\s+', ' ', article_text)  # Normalize whitespace
+            article_text = re.sub(
                r'\s+', ' ', article_text)  # Normalize whitespace
            article_text = article_text.strip()
            # Limit length to avoid overwhelming the LLM
@@ -97,14 +113,21 @@ class NewsFetcher:
            logger.warning(f"⏰ Timeout fetching article content from: {url}")
            return ""
        except httpx.HTTPError as e:
-            logger.warning(f"🌐 HTTP error fetching article content from {url}: {e}")
+            logger.warning(
                f"🌐 HTTP error fetching article content from {url}: {e}")
            return ""
        except Exception as e:
-            logger.warning(f"❌ Error fetching article content from {url}: {type(e).__name__}: {e}")
+            logger.warning(
                f"❌ Error fetching article content from {url}: {
                    type(e).__name__}: {e}")
            return ""
    @staticmethod
-    def build_prompt(url: str, title: str = "", description: str = "", content: str = "") -> str:
+    def build_prompt(
            url: str,
            title: str = "",
            description: str = "",
            content: str = "") -> str:
        """
        Generate a prompt for the LLM to summarize an article.
@@ -124,10 +147,12 @@ class NewsFetcher:
        if description:
            context_info.append(f"RSS-Beschreibung: {description}")
        if content:
-            content_preview = content[:500] + "..." if len(content) > 500 else content
+            content_preview = content[:500] + \
                "..." if len(content) > 500 else content
            context_info.append(f"Artikel-Inhalt: {content_preview}")
-        context = "\n".join(context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
+        context = "\n".join(
            context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
        return (
            "### Aufgabe\n"
@@ -171,9 +196,11 @@ class NewsFetcher:
        article_content = await NewsFetcher.fetch_article_content(client, url)
        if not article_content:
-            logger.warning(f"⚠️ Could not fetch article content, using RSS data only")
+            logger.warning(
                f"⚠️ Could not fetch article content, using RSS data only")
-        prompt = NewsFetcher.build_prompt(url, title, description, article_content)
+        prompt = NewsFetcher.build_prompt(
            url, title, description, article_content)
        payload = {
            "model": LLM_MODEL,
            "prompt": prompt,
@@ -200,7 +227,8 @@ class NewsFetcher:
            # Validate required fields
            required_fields = ["title", "description"]
-            missing_fields = [field for field in required_fields if field not in summary_data]
+            missing_fields = [
                field for field in required_fields if field not in summary_data]
            if missing_fields:
                logger.warning(
@@ -229,7 +257,9 @@ class NewsFetcher:
            logger.error(f"❌ HTTP error for {url}: {e}")
            return None
        except Exception as e:
-            logger.error(f"❌ Unexpected error summarizing {url}: {type(e).__name__}: {e}")
+            logger.error(
                f"❌ Unexpected error summarizing {url}: {
                    type(e).__name__}: {e}")
            return None
    @staticmethod
@@ -264,7 +294,9 @@ class NewsFetcher:
                )
        except Exception as e:
-            logger.error(f"❌ Critical error during harvest: {type(e).__name__}: {e}")
+            logger.error(
                f"❌ Critical error during harvest: {
                    type(e).__name__}: {e}")
            raise
    @staticmethod
@@ -289,14 +321,20 @@ class NewsFetcher:
            feed_data = feedparser.parse(feed_row["url"])
            if hasattr(feed_data, 'bozo') and feed_data.bozo:
-                logger.warning(f"⚠️  Feed has parsing issues: {feed_row['url']}")
+                logger.warning(
                    f"⚠️  Feed has parsing issues: {
                        feed_row['url']}")
                if hasattr(feed_data, 'bozo_exception'):
-                    logger.warning(f"⚠️  Feed exception: {feed_data.bozo_exception}")
+                    logger.warning(
                        f"⚠️  Feed exception: {
                            feed_data.bozo_exception}")
            total_entries = len(feed_data.entries)
            if total_entries == 0:
-                logger.warning(f"⚠️  No entries found in feed: {feed_row['url']}")
+                logger.warning(
                    f"⚠️  No entries found in feed: {
                        feed_row['url']}")
                return stats
            for i, entry in enumerate(feed_data.entries, 1):
@@ -321,18 +359,23 @@ class NewsFetcher:
                    stats['skipped'] += 1
                    continue
-                # Check if article already exists - use readonly connection for better concurrency
+                # Check if article already exists - use readonly connection for
                # better concurrency
                try:
                    with db_manager.get_cursor_with_retry(readonly=True) as cursor:
-                        cursor.execute("SELECT id FROM news WHERE url = ?", (article_url,))
+                        cursor.execute(
                            "SELECT id FROM news WHERE url = ?", (article_url,))
                        if cursor.fetchone():
                            stats['skipped'] += 1
                            continue
                except Exception as db_error:
-                    logger.warning(f"⚠️ Database check failed for article {i}, continuing: {db_error}")
+                    logger.warning(
                        f"⚠️ Database check failed for article {i}, continuing: {db_error}")
                rss_title = getattr(entry, 'title', '')
-                rss_description = getattr(entry, 'description', '') or getattr(entry, 'summary', '')
+                rss_description = getattr(
                    entry, 'description', '') or getattr(
                    entry, 'summary', '')
                summary = await NewsFetcher.summarize_article(
                    client,
@@ -342,7 +385,8 @@ class NewsFetcher:
                )
                if not summary:
-                    logger.warning(f"❌ Failed to get summary for article {i}: {article_url}")
+                    logger.warning(
                        f"❌ Failed to get summary for article {i}: {article_url}")
                    stats['failed'] += 1
                    continue
@@ -353,7 +397,7 @@ class NewsFetcher:
                        cursor.execute(
                            """
                            INSERT
-                            OR IGNORE INTO news 
+                            OR IGNORE INTO news
                            (title, description, url, published, country)
                            VALUES (?, ?, ?, ?, ?)
                            """,
@@ -369,13 +413,17 @@ class NewsFetcher:
                    stats['successful'] += 1
                except Exception as db_error:
-                    logger.error(f"❌ Database error for article {i}: {db_error}")
+                    logger.error(
                        f"❌ Database error for article {i}: {db_error}")
                    stats['failed'] += 1
                    continue
                await asyncio.sleep(0.01)  # 10ms delay to yield control
        except Exception as e:
-            logger.error(f"❌ Error processing feed {feed_row['url']}: {type(e).__name__}: {e}")
+            logger.error(
                f"❌ Error processing feed {
                    feed_row['url']}: {
                    type(e).__name__}: {e}")
        return stats
`@@ -1,4 +1,4 @@`
	`from typing import TypedDict, List`	`from typing import List, TypedDict`

	`from pydantic import BaseModel`	`from pydantic import BaseModel`