Compare commits

37 Commits

Author SHA1 Message Date
57a7b42b9d [refactor] removed analytics, config, and modular crates to simplify the codebase and streamline architecture 2025-08-20 16:46:27 +02:00
d37daf02f6 [feat] introduced server and cli crates with foundational HTTP server and CLI implementation, including routing, health check, and configuration setup 2025-08-20 09:58:21 +02:00
16167d18ff [feat] modularized backend with plugin architecture, added module-api, module-host, and summarizer crates, and integrated dynamic module loading into main.rs 2025-08-20 08:51:38 +02:00
7c6724800f [refactor] removed unused api.rs module to streamline code structure 2025-08-20 08:28:36 +02:00
af304266a4 [refactor] removed unused EnvFilter import to clean up dependencies in main.rs 2025-08-07 22:51:33 +02:00
815e3b22fd [refactor] expanded ConfigFile with additional modular structures and default implementations for improved configuration handling and extensibility 2025-08-07 22:49:55 +02:00
e8e61faf61 [style] adjusted logging level to DEBUG for more detailed log output 2025-08-07 22:49:22 +02:00
c19813cbe2 [refactor] replaced info logging with debug for improved logging granularity and removed redundant log statements in backend services 2025-08-07 22:48:35 +02:00
cf163082b2 [update] improved summary generation prompts with stricter location formatting, updated JSON schema to support location field, and enhanced logging for AI operations in backend services 2025-08-07 18:09:05 +02:00
011b256662 [update] added gemma2:9b model to example.env, refactored prompt generation with stricter JSON rules, adjusted context size, integrated system prompt for better summaries, and improved error handling in backend services 2025-08-07 15:05:20 +02:00
0a97a57c76 [update] added validation for article data in NewsList.vue, removed unused Cypress config, expanded LLM models in example.env, adjusted context size and max article length in backend configuration, and updated workspace naming in yarn.lock 2025-08-07 13:15:40 +02:00
338b3ac7c1 [refactor] abstracted logging initialization and app settings loading into dedicated functions to streamline main.rs and improve code readability 2025-08-06 18:27:39 +02:00
13fbac5009 Merge remote-tracking branch 'origin/main' into rewrite-rust-backend 2025-08-06 18:14:09 +02:00
9b805e891a [update] enhanced tracing-subscriber setup with compact JSON logging and environment-based filter for streamlined production logging 2025-08-06 17:43:59 +02:00
78073d27d7 [update] refactored configuration handling with comprehensive ConfigFile structure, added default settings, expanded support for new modules, and enhanced directory creation logic 2025-08-06 16:54:10 +02:00
c3b0c87bfa [update] added migration scripts for migrating news data to articles, geographic and category tagging, and default sharing templates 2025-08-06 16:39:49 +02:00
0aa8d9fa3a [update] added /config.toml to .gitignore to exclude local configuration files from version control 2025-08-06 16:39:33 +02:00
cbbd0948e6 [update] added validation for AppSettings with ConfigError, ensuring non-zero port enforcement in config.rs 2025-08-06 14:07:55 +02:00
3a5b0d8f4b [update] added JSON logging support with tracing-subscriber for production, updated dependencies in Cargo.toml and Cargo.lock 2025-08-06 14:02:33 +02:00
0ce916c654 [update] improved error handling in handlers.rs by differentiating database errors, providing clearer response messages, and adding error logging with tracing 2025-08-06 14:00:58 +02:00
f853213d15 [update] enhanced database connection pooling with configurable options (max_connections, min_connections, timeouts) and added detailed error context for migration failures 2025-08-06 13:31:22 +02:00
300845c655 [update] improved configuration path handling in config.rs, fixed path-to-string conversion errors, and adjusted paths for renamed project and user-specific directories 2025-08-06 13:27:54 +02:00
d90c618ee3 [update] added development dependencies (tokio-test, axum-test) in Cargo.toml and updated Cargo.lock with required packages for testing enhancements 2025-08-06 13:27:46 +02:00
e7a97206a9 [update] added new services (scraping, tagging, analytics, sharing) and models (tag, analytics, settings), modularized into Rust backend structure 2025-08-06 13:20:04 +02:00
c2adfa711d [update] renamed project from owly-news-summariser to owly-news across all files for consistency and simplified branding 2025-08-06 12:45:47 +02:00
b2d82892ef [update] enhanced ROADMAP.md with advanced geographic tagging, hierarchical filtering, migration strategies, and expanded analytics features, clarified project phase details, and incorporated tagging upgrades into future plans 2025-08-06 12:38:21 +02:00
0f1632ad65 [update] overhauled README.md to reflect new Rust backend, modernized tooling instructions, updated frontend and backend setup guides, and improved clarity on application structure 2025-08-06 12:28:08 +02:00
7b114a6145 [cleanup] removed redundant trailing line in ROADMAP.md 2025-08-06 12:24:55 +02:00
4edb2b2179 [update] expanded ROADMAP.md to include detailed feature descriptions, enhanced database schema overview, and additional planned system modules 2025-08-06 12:21:57 +02:00
aa520efb82 [update] updated ROADMAP.md with new project architecture details, enhanced phase descriptions, and added configuration/system design elements 2025-08-06 11:46:43 +02:00
f22259b863 added config.toml support for flexible server configuration, integrated toml crate for parsing, refactored configuration management to use AppSettings, and updated database initialization logic 2025-08-05 11:24:59 +02:00
a30f8467bc implemented foundational API routes (/articles, /summaries) using Axum, added graceful shutdown handling, improved database initialization with connection pooling and directory creation, and integrated tracing for logging 2025-08-05 08:59:01 +02:00
79e4d7f1de refactored backend initialization logic: added centralized configuration management, improved database setup with connection pooling, and modularized core components (config, models, services) 2025-08-05 08:02:29 +02:00
37ebf45d82 migrated backend structure to Axum API setup, added core modules (models, services, api), integrated dotenv for configuration management, and added project roadmap for future phases 2025-08-05 08:02:07 +02:00
bc1735448a removed custom migration logic, integrated sqlx native migrations, and reorganized migration files for clarity and maintainability 2025-08-05 05:00:38 +02:00
59b19a22ff migrated migration logic from rusqlite to sqlx and updated relevant async methods for better database interaction 2025-08-05 04:18:42 +02:00
86b5f83140 added database migration and initialization logic to backend, including migration loader and async migration runner 2025-08-05 03:16:36 +02:00
57 changed files with 5421 additions and 248 deletions

3
.gitignore vendored
View File

@@ -34,10 +34,11 @@ build/
logs/
*.log
# Database files
# Database files (now includes the specific dev database)
*.sqlite
*.sqlite3
*.db
owlynews.sqlite3*
# Dependency directories
node_modules/

238
README.md
View File

@@ -1,105 +1,67 @@
# Owly News Summariser
# Owly News
Owly News Summariser is a web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
Owly News is a modern web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
## Features
- Fetches news from configurable RSS feeds
- Automatically summarizes articles using Ollama LLM
- Filters news by country
- **AI-powered intelligent tagging** with geographic, category, and source tags
- **Advanced multi-criteria filtering** with hierarchical tag support
- Progressive Web App (PWA) support for offline access
- Scheduled background updates
- High-performance Rust backend for optimal resource usage
- Modern Vue.js frontend with TypeScript support
- **Comprehensive analytics** and reading statistics
- **Flexible sharing system** with multiple format options
## Project Structure
The project consists of two main components:
The project consists of multiple components:
- **Backend**: A FastAPI application that fetches and processes news feeds, summarizes articles, and provides API endpoints
- **Frontend**: A Vue.js application that displays the news and provides a user interface for managing feeds
- **Backend (Rust)**: Primary backend written in Rust using Axum framework for high performance (`backend-rust/`)
- **Backend (Python)**: Legacy FastAPI backend (`backend/`)
- **Frontend**: Modern Vue.js 3 application with TypeScript and Tailwind CSS (`frontend/`)
## Prerequisites
- Python 3.8+ for the backend
- Node.js 16+ and Yarn for the frontend
### For Rust Backend (Recommended)
- Rust 1.88.0+
- [Ollama](https://ollama.ai/) for article summarization and tagging
- SQLite (handled automatically by SQLx)
### For Python Backend (Legacy)
- Python 3.8+
- [Ollama](https://ollama.ai/) for article summarization
## Installing Yarn
Yarn is a package manager for JavaScript that's required for the frontend. Here's how to install it:
### Using npm (recommended)
If you already have Node.js installed, the easiest way to install Yarn is via npm:
```bash
npm install -g yarn
```
### Platform-specific installations
#### Windows
- **Using Chocolatey**: `choco install yarn`
- **Using Scoop**: `scoop install yarn`
- **Manual installation**: Download and run the [installer](https://classic.yarnpkg.com/latest.msi)
#### macOS
- **Using Homebrew**: `brew install yarn`
- **Using MacPorts**: `sudo port install yarn`
#### Linux
- **Debian/Ubuntu**:
```bash
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
sudo apt update && sudo apt install yarn
```
- **CentOS/Fedora/RHEL**:
```bash
curl --silent --location https://dl.yarnpkg.com/rpm/yarn.repo | sudo tee /etc/yum.repos.d/yarn.repo
sudo yum install yarn
```
- **Arch Linux**: `pacman -S yarn`
After installation, verify Yarn is installed correctly:
```bash
yarn --version
```
### For Frontend
- Node.js 22+ and npm
- Modern web browser with PWA support
## Setup
### Backend Setup
### Rust Backend Setup (Recommended)
1. Navigate to the backend directory:
1. Navigate to the Rust backend directory:
```bash
cd backend
cd backend-rust
```
2. Create a virtual environment:
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. Install dependencies:
```bash
pip install -r requirements.txt
```
4. Create a `.env` file based on the example:
2. Create a `.env` file based on the example:
```bash
cp example.env .env
```
5. Customize the `.env` file as needed:
- `OLLAMA_HOST`: URL for the Ollama service (default: http://localhost:11434)
- `CRON_HOURS`: Interval for scheduled news fetching (default: 1)
3. Customize the `.env` file as needed:
- `DATABASE_URL`: SQLite database connection string
- `OLLAMA_BASE_URL`: URL for the Ollama service (default: http://localhost:11434)
- Other configuration options as documented in the example file
4. Run database migrations:
```bash
cargo install sqlx-cli
sqlx migrate run
```
### Frontend Setup
@@ -110,29 +72,24 @@ yarn --version
2. Install dependencies:
```bash
yarn
npm install
```
## Running the Application
### Running the Backend
### Running the Rust Backend
1. Navigate to the backend directory:
1. Navigate to the Rust backend directory:
```bash
cd backend
cd backend-rust
```
2. Activate the virtual environment:
2. Start the backend server:
```bash
source venv/bin/activate # On Windows: venv\Scripts\activate
cargo run
```
3. Start the backend server:
```bash
uvicorn app.main:app --reload
```
The backend will be available at http://localhost:8000
The backend will be available at http://localhost:3000
### Running the Frontend
@@ -143,21 +100,52 @@ yarn --version
2. Start the development server:
```bash
yarn dev:watch
npm run dev
```
The frontend will be available at http://localhost:5173
## Key Features
### Intelligent Content Organization
- **AI-Powered Tagging**: Automatic classification with geographic, topical, and source tags
- **Hierarchical Filtering**: Multi-level filtering by location (country → region → city), categories, and content types
- **Smart Search**: Advanced filtering with suggestions based on tag relationships and usage patterns
- **Legacy Migration**: Seamless upgrade from simple country-based filtering to comprehensive tag-based system
### Advanced Analytics
- **Reading Statistics**: Track reading time, completion rates, and engagement patterns
- **Content Analytics**: Source performance, tag usage, and trending topics analysis
- **Geographic Insights**: Location-based content distribution and reading preferences
- **Goal Tracking**: Personal reading goals with progress monitoring
### Flexible Article Display
- **Compact View**: Title, excerpt, tags, and action buttons for quick browsing
- **On-Demand Loading**: Full content, AI summaries, and source links as needed
- **Visual Tag System**: Color-coded, hierarchical tags with click-to-filter functionality
- **Reading Status**: Visual indicators for read/unread status and progress tracking
### Enhanced Sharing
- **Multiple Formats**: Text, Markdown, HTML, and JSON export options
- **Custom Templates**: User-configurable sharing formats
- **One-Click Operations**: Copy to clipboard with formatted content
- **Privacy Controls**: Configurable information inclusion in shared content
## Building for Production
### Building the Backend
### Building the Rust Backend
The backend can be deployed as a standard FastAPI application. You can use tools like Gunicorn with Uvicorn workers:
1. Navigate to the Rust backend directory:
```bash
cd backend-rust
```
```bash
pip install gunicorn
gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
```
2. Build the optimized release binary:
```bash
cargo build --release
```
The binary will be available at `target/release/owly-news`
### Building the Frontend
@@ -168,32 +156,62 @@ gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
2. Build the frontend:
```bash
yarn build
npm run build
```
The built files will be in the `dist` directory and can be served by any static file server.
## API Endpoints
## Development
The backend provides the following API endpoints:
### Code Quality
- `GET /news`: Get news articles with optional filtering
- `GET /meta/last_sync`: Get the timestamp of the last feed synchronization
- `POST /meta/cron`: Set the schedule for automatic feed synchronization
- `GET /meta/feeds`: List all configured feeds
- `POST /meta/feeds`: Add a new feed
- `DELETE /meta/feeds`: Delete a feed
- `GET /meta/model`: Check the status of the LLM model
- `POST /meta/sync`: Manually trigger a feed synchronization
The project includes comprehensive tooling for code quality:
## Environment Variables
**Frontend:**
- ESLint with Vue and TypeScript support
- Prettier for code formatting
- Vitest for testing
- TypeScript for type safety
- Oxlint for additional linting
### Backend
**Backend (Rust):**
- Standard Rust tooling (`cargo fmt`, `cargo clippy`)
- SQLx for compile-time checked SQL queries
- `OLLAMA_HOST`: URL for the Ollama service
- `CRON_HOURS`: Interval for scheduled news fetching in hours
- `DATABASE_URL`: SQLite database connection string
### Testing
## License
Run frontend tests:
```bash
cd frontend
npm run test
```
Code ist unter der [PolyForm Noncommercial 1.0.0](https://polyformproject.org/licenses/noncommercial/1.0.0/) lizenziert. Für jegliche kommerzielle Nutzung bitte Kontakt aufnehmen.
## Configuration
The application uses a comprehensive configuration system via `config.toml`:
- **AI Settings**: Configure Ollama integration for summaries and tagging
- **Display Preferences**: Default views, themes, and UI customization
- **Analytics**: Control data collection and retention policies
- **Filtering**: Smart suggestions, saved filters, and geographic hierarchy
- **Sharing**: Default formats and custom templates
See the example configuration in the project for detailed options.
## Migration from Legacy Systems
The application includes automatic migration tools for upgrading from simpler filtering systems:
- **Country Filter Migration**: Automatic conversion to hierarchical geographic tags
- **Data Preservation**: Maintains historical data during migration
- **Backward Compatibility**: Gradual transition with user control
- **Validation Tools**: Ensure data integrity throughout the migration process
## Future Roadmap
The project is evolving through three phases:
1. **Phase 1**: High-performance Rust backend with advanced filtering and analytics
2. **Phase 2**: CLI application for power users and automation
3. **Phase 3**: Migration to Dioxus for a full Rust stack
See `ROADMAP.md` for detailed development plans and architectural decisions.

2
backend-rust/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
target/
/config.toml

2520
backend-rust/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

37
backend-rust/Cargo.toml Normal file
View File

@@ -0,0 +1,37 @@
[workspace]
members = [
"crates/api",
"crates/server",
"crates/cli",
"crates/db",
]
resolver = "3"
[workspace.package]
edition = "2024"
version = "0.1.0"
rust-version = "1.89"
[workspace.dependencies]
anyhow = "1.0.99"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.142"
tokio = { version = "1.47.1", features = ["rt-multi-thread", "macros", "signal"] }
libloading = "0.8.8"
tracing = "0.1.41"
once_cell = "1.21.3"
toml = "0.9.5"
axum = "0.8.4"
sha2 = "0.10.9"
sqlx = { version = "0.8.6", default-features = false, features = ["runtime-tokio-rustls", "macros", "postgres", "uuid", "chrono", "migrate"] }
hex = "0.4.3"
num_cpus = "1.17.0"
unicode-segmentation = "1.12.0"
readability = "0.3.0"
tracing-subscriber = { version = "0.3.19", features = ["env-filter", "fmt"] }
scraper = "0.23.1"
dotenv = "0.15.0"
# dev/test utilities in the workspace
tokio-test = "0.4.4"
axum-test = "17.3.0"

584
backend-rust/ROADMAP.md Normal file
View File

@@ -0,0 +1,584 @@
# Owly News Summariser - Project Roadmap
This document outlines the strategic approach for transforming the project through three phases: Python-to-Rust backend migration, CLI application addition, and Vue-to-Dioxus frontend migration.
## Project Structure Strategy
### Current Phase: Axum API Setup
```
owly-news-summariser/
├── src/
│ ├── main.rs # Entry point (will evolve)
│ ├── db.rs # Database connection & SQLx setup
│ ├── api.rs # API module declaration
│ ├── api/ # API-specific modules (no mod.rs needed)
│ │ ├── routes.rs # Route definitions
│ │ ├── middleware.rs # Custom middleware
│ │ └── handlers.rs # Request handlers & business logic
│ ├── models.rs # Models module declaration
│ ├── models/ # Data models & database entities
│ │ ├── user.rs
│ │ ├── article.rs
│ │ ├── summary.rs
│ │ ├── tag.rs # Tag models and relationships
│ │ ├── analytics.rs # Analytics and statistics models
│ │ └── settings.rs # User settings and preferences
│ ├── services.rs # Services module declaration
│ ├── services/ # Business logic layer
│ │ ├── news_service.rs
│ │ ├── summary_service.rs
│ │ ├── scraping_service.rs # Article content extraction
│ │ ├── tagging_service.rs # AI-powered tagging
│ │ ├── analytics_service.rs # Reading stats and analytics
│ │ └── sharing_service.rs # Article sharing functionality
│ └── config.rs # Configuration management
├── migrations/ # SQLx migrations (managed by SQLx CLI)
├── frontend/ # Keep existing Vue frontend for now
├── config.toml # Configuration file with AI settings
└── Cargo.toml
```
### Phase 2: Multi-Binary Structure (API + CLI)
```
owly-news-summariser/
├── src/
│ ├── lib.rs # Shared library code
│ ├── bin/
│ │ ├── server.rs # API server binary
│ │ └── cli.rs # CLI application binary
│ ├── [same module structure as Phase 1]
├── migrations/
├── frontend/
├── completions/ # Shell completion scripts
│ ├── owly.bash
│ ├── owly.zsh
│ └── owly.fish
├── config.toml
└── Cargo.toml # Updated for multiple binaries
```
### Phase 3: Full Rust Stack
```
owly-news-summariser/
├── src/
│ ├── [same structure as Phase 2]
├── migrations/
├── frontend-dioxus/ # New Dioxus frontend
├── frontend/ # Legacy Vue (to be removed)
├── completions/
├── config.toml
└── Cargo.toml
```
## Core Features & Architecture
### Article Processing & Display Workflow
**Hybrid Approach: RSS Feeds + Manual Submissions with Smart Content Management**
1. **Article Collection**
- RSS feed monitoring and batch processing
- Manual article URL submission
- Store original content and metadata in database
2. **Content Processing Pipeline**
- Fetch RSS articles → scrape full content → store in DB
- **Compact Article Display**:
- Title (primary display)
- RSS description text
- Tags (visual indicators)
- Time posted (from RSS)
- Time added (when added to system)
- Action buttons: [Full Article] [Summary] [Source]
- **On-Demand Content Loading**:
- Full Article: Display complete scraped content
- Summary: Show AI-generated summary
- Source: Open original URL in new tab
- Background async processing with status updates
- Support for re-processing without re-fetching
3. **Intelligent Tagging System**
- **Automatic Tag Generation**: AI analyzes content and assigns relevant tags
- **Geographic & Source Tags**: AI-generated location tags (countries, regions, cities) and publication source tags
- **Content Category Tags**: Technology, Politics, Business, Sports, Health, etc.
- **Visual Tag Display**: Color-coded tags in compact article view with hierarchical display
- **Tag Filtering**: Quick filtering by clicking tags with smart suggestions
- **Custom Tags**: User-defined tags and categories
- **Tag Confidence**: Visual indicators for AI vs manual tags
- **Tag Migration**: Automatic conversion of legacy country filters to geographic tags
4. **Analytics & Statistics System**
- **Reading Analytics**:
- Articles read vs added
- Reading time tracking
- Most read categories and tags
- Reading patterns over time
- **Content Analytics**:
- Source reliability and quality metrics
- Tag usage statistics
- Processing success rates
- Content freshness tracking
- **Performance Metrics**:
- AI processing times
- Scraping success rates
- User engagement patterns
5. **Advanced Filtering System**
- **Multi-Criteria Filtering**:
- By tags (single or multiple with AND/OR logic)
- By geographic tags (country, region, city with hierarchical filtering)
- By content categories and topics
- By date ranges (posted, added, read)
- By processing status (pending, completed, failed)
- By content availability (scraped, summary, RSS-only)
- By read/unread status
- **Smart Filter Migration**: Automatic conversion of legacy country filters to tag-based equivalents
- **Saved Filter Presets**:
- Custom filter combinations
- Quick access to frequent searches
- Geographic preset templates (e.g., "European Tech News", "US Politics")
- **Smart Suggestions**: Filter suggestions based on usage patterns and tag relationships
6. **Settings & Management System**
- **User Preferences**:
- Default article view mode
- Tag display preferences with geographic hierarchy settings
- Reading tracking settings
- Notification preferences
- **System Settings**:
- AI configuration (via API and config file)
- Processing settings
- Display customization
- Export preferences
- **Content Management**:
- Bulk operations (mark read, delete, retag)
- Archive old articles
- Export/import functionality
- Legacy data migration tools
7. **Article Sharing System**
- **Multiple Share Formats**:
- Clean text format with title, summary, and source link
- Markdown format for developers
- Rich HTML format for email/web
- JSON format for API integration
- **Copy to Clipboard**: One-click formatted sharing
- **Share Templates**: Customizable sharing formats
- **Privacy Controls**: Control what information is included in shares
8. **Database Schema**
```
Articles: id, title, url, source_type, rss_content, full_content,
summary, processing_status, published_at, added_at, read_at,
read_count, reading_time, ai_enabled, created_at, updated_at
Tags: id, name, category, description, color, usage_count, parent_id, created_at
ArticleTags: article_id, tag_id, confidence_score, ai_generated, created_at
ReadingStats: user_id, article_id, read_at, reading_time, completion_rate
FilterPresets: id, name, filter_criteria, user_id, created_at
Settings: key, value, category, user_id, updated_at
ShareTemplates: id, name, format, template_content, created_at
LegacyMigration: old_filter_type, old_value, new_tag_ids, migrated_at
```
## Step-by-Step Process
### Phase 1: Axum API Implementation
**Step 1: Core Infrastructure Setup**
- Set up database connection pooling with SQLx
- **Enhanced Configuration System**:
- Extend config.toml with comprehensive settings
- AI provider configurations with separate summary/tagging settings
- Display preferences and UI customization
- Analytics and tracking preferences
- Sharing templates and formats
- Filter and search settings
- Geographic tagging preferences
- Establish error handling patterns with `anyhow`
- Set up logging and analytics infrastructure
**Step 2: Data Layer**
- Design comprehensive database schema with analytics and settings support
- Create SQLx migrations for all tables including analytics and user preferences
- Implement hierarchical tag system with geographic and content categories
- Add legacy migration support for country filters
- Implement article models with reading tracking and statistics
- Add settings and preferences data layer
- Create analytics data models and aggregation queries
- Implement sharing templates and format management
- Use SQLx's compile-time checked queries
**Step 3: Enhanced Services Layer**
- **Content Processing Services**:
- RSS feed fetching and parsing
- Web scraping with quality tracking
- AI services for summary and tagging
- **Enhanced Tagging Service**:
- Geographic location detection and tagging
- Content category classification
- Hierarchical tag relationships
- Legacy filter migration logic
- **Analytics Service**:
- Reading statistics collection and aggregation
- Content performance metrics
- User behavior tracking
- Trend analysis and insights
- **Settings Management Service**:
- User preference handling
- System configuration management
- Real-time settings updates
- **Sharing Service**:
- Multiple format generation
- Template processing
- Privacy-aware content filtering
- **Advanced Filtering Service**:
- Complex query building with geographic hierarchy
- Filter preset management
- Search optimization
- Legacy filter migration
**Step 4: Comprehensive API Layer**
- **Article Management Routes**:
- `GET /api/articles` - List articles with compact display data
- `POST /api/articles` - Submit manual article URL
- `GET /api/articles/:id` - Get basic article info
- `GET /api/articles/:id/full` - Get complete scraped content
- `GET /api/articles/:id/summary` - Get AI summary
- `POST /api/articles/:id/read` - Mark as read and track reading time
- `POST /api/articles/:id/share` - Generate shareable content
- **Analytics Routes**:
- `GET /api/analytics/dashboard` - Main analytics dashboard data
- `GET /api/analytics/reading-stats` - Personal reading statistics
- `GET /api/analytics/content-stats` - Content and source analytics
- `GET /api/analytics/trends` - Trending topics and patterns
- `GET /api/analytics/export` - Export analytics data
- **Enhanced Filtering & Search Routes**:
- `GET /api/filters/presets` - Get saved filter presets
- `POST /api/filters/presets` - Save new filter preset
- `GET /api/search/suggestions` - Get search and filter suggestions
- `POST /api/search` - Advanced search with multiple criteria
- `POST /api/filters/migrate` - Migrate legacy country filters to tags
- **Settings Routes**:
- `GET /api/settings` - Get all user settings
- `PUT /api/settings` - Update user settings
- `GET /api/settings/system` - Get system configuration
- `PUT /api/settings/system` - Update system settings (admin)
- **Enhanced Tag Management Routes**:
- `GET /api/tags` - List tags with usage statistics and hierarchy
- `GET /api/tags/geographic` - Get geographic tag hierarchy
- `GET /api/tags/trending` - Get trending tags
- `POST /api/tags/:id/follow` - Follow/unfollow tag for notifications
- `GET /api/tags/categories` - Get tag categories and relationships
- **Sharing Routes**:
- `GET /api/share/templates` - Get sharing templates
- `POST /api/share/templates` - Create custom sharing template
- `POST /api/articles/:id/share/:format` - Generate share content
**Step 5: Enhanced Frontend Features**
- **Compact Article Display**:
- Card-based layout with title, RSS excerpt, tags, and timestamps
- Action buttons for Full Article, Summary, and Source
- Hierarchical tag display with geographic and category indicators
- Reading status and progress indicators
- **Advanced Analytics Dashboard**:
- Reading statistics with charts and trends
- Content source performance metrics
- Tag usage and trending topics with geographic breakdowns
- Personal reading insights and goals
- **Comprehensive Filtering Interface**:
- Multi-criteria filter builder with geographic hierarchy
- Saved filter presets with quick access
- Smart filter suggestions based on tag relationships
- Visual filter indicators and clear actions
- Legacy filter migration interface
- **Settings Management Panel**:
- User preference configuration
- AI and processing settings
- Display and UI customization
- Export/import functionality
- **Enhanced Sharing System**:
- Quick share buttons with format selection
- Copy-to-clipboard functionality
- Custom sharing templates
- Preview before sharing
**Step 6: Integration & Testing**
- Test all API endpoints with comprehensive coverage
- Test analytics collection and aggregation
- Test enhanced filtering and search functionality
- Test legacy filter migration
- Validate settings persistence and real-time updates
- Test sharing functionality across different formats
- Performance testing with large datasets and hierarchical tags
- Deploy and monitor
### Phase 2: CLI Application Addition
**Step 1: Restructure for Multiple Binaries**
- Move API code to `src/bin/server.rs`
- Create `src/bin/cli.rs` for CLI application
- Keep shared logic in `src/lib.rs`
- Update Cargo.toml to support multiple binaries
**Step 2: Enhanced CLI with Analytics and Management**
- **Core Commands**:
- `owly list [--filters] [--format table|json|compact]` - List articles
- `owly show <id> [--content|--summary]` - Display specific article
- `owly read <id>` - Mark article as read and open in pager
- `owly open <id>` - Open source URL in browser
- **Analytics Commands**:
- `owly stats [--period day|week|month|year]` - Show reading statistics
- `owly trends [--tags|--sources|--topics|--geo]` - Display trending content
- `owly analytics export [--format csv|json]` - Export analytics data
- **Management Commands**:
- `owly settings [--get key] [--set key=value]` - Manage settings
- `owly filters [--list|--save name|--load name]` - Manage filter presets
- `owly cleanup [--old|--unread|--failed]` - Clean up articles
- `owly migrate [--from-country-filters]` - Migrate legacy data
- **Enhanced Filtering Commands**:
- `owly filter [--tag] [--geo] [--category]` - Advanced filtering with geographic support
- `owly tags [--list|--hierarchy|--geo]` - Tag management with geographic display
- **Sharing Commands**:
- `owly share <id> [--format text|markdown|html]` - Generate share content
- `owly export <id> [--template name] [--output file]` - Export article
**Step 3: Advanced CLI Features**
- Interactive filtering and search with geographic hierarchy
- Real-time analytics display with charts (using ASCII graphs)
- Bulk operations with progress indicators
- Settings management with validation
- Shell completion for all commands and parameters
- Legacy data migration tools
### Phase 3: Dioxus Frontend Migration
**Step 1: Component Architecture**
- **Core Display Components**:
- `ArticleCard` - Compact article display with action buttons
- `ArticleViewer` - Full article content display
- `SummaryViewer` - AI summary display
- `TagCloud` - Interactive tag display with geographic hierarchy
- `GeographicMap` - Visual geographic filtering interface
- **Analytics Components**:
- `AnalyticsDashboard` - Main analytics overview
- `ReadingStats` - Personal reading statistics
- `TrendChart` - Trending topics and patterns
- `ContentMetrics` - Source and content analytics
- `GeographicAnalytics` - Location-based content insights
- **Enhanced Filtering Components**:
- `FilterBuilder` - Advanced filter creation interface with geographic support
- `FilterPresets` - Saved filter management
- `SearchBar` - Smart search with suggestions
- `GeographicFilter` - Hierarchical location filtering
- `MigrationTool` - Legacy filter migration interface
- **Settings Components**:
- `SettingsPanel` - User preference management
- `SystemConfig` - System-wide configuration
- `ExportImport` - Data export/import functionality
- **Sharing Components**:
- `ShareDialog` - Sharing interface with format options
- `ShareTemplates` - Custom template management
**Step 2: Enhanced UX Features**
- **Smart Article Display**:
- Lazy loading for performance
- Infinite scroll with virtualization
- Quick preview on hover
- Keyboard navigation support
- **Advanced Analytics**:
- Interactive charts and graphs with geographic data
- Customizable dashboard widgets
- Goal setting and progress tracking
- Comparison and trend analysis
- **Intelligent Filtering**:
- Auto-complete for filters with geographic suggestions
- Visual filter builder with map integration
- Filter combination suggestions based on tag relationships
- Saved search notifications
- **Seamless Sharing**:
- One-click sharing with clipboard integration
- Live preview of shared content
- Social media format optimization
- Batch sharing capabilities
## Key Strategic Considerations
### 1. Performance & Scalability
- **Efficient Data Loading**: Lazy loading and pagination for large datasets
- **Optimized Queries**: Indexed database queries for filtering and analytics with hierarchical tag support
- **Caching Strategy**: Smart caching for frequently accessed content and tag hierarchies
- **Real-time Updates**: WebSocket integration for live analytics
### 2. User Experience Focus
- **Progressive Disclosure**: Show essential info first, details on demand
- **Responsive Design**: Optimized for mobile and desktop
- **Accessibility**: Full keyboard navigation and screen reader support
- **Customization**: User-configurable interface and behavior
- **Smooth Migration**: Seamless transition from country-based to tag-based filtering
### 3. Analytics & Insights
- **Privacy-First**: User control over data collection and retention
- **Actionable Insights**: Meaningful statistics that guide reading habits
- **Performance Metrics**: System health and efficiency tracking
- **Trend Analysis**: Pattern recognition for content and behavior with geographic context
### 4. Content Management
- **Flexible Display**: Multiple view modes for different use cases
- **Smart Organization**: AI-assisted content categorization with geographic awareness
- **Bulk Operations**: Efficient management of large article collections
- **Data Integrity**: Reliable content processing and error handling
- **Legacy Support**: Smooth migration from existing country-based filtering
## Enhanced Configuration File Structure
```toml
[server]
host = '127.0.0.1'
port = 8090
[display]
default_view = "compact" # compact, full, summary
articles_per_page = 50
show_reading_time = true
show_word_count = false
highlight_unread = true
theme = "auto" # light, dark, auto
[analytics]
enabled = true
track_reading_time = true
track_scroll_position = true
retention_days = 365 # How long to keep detailed analytics
aggregate_older_data = true
[filtering]
enable_smart_suggestions = true
max_recent_filters = 10
auto_save_filters = true
default_sort = "added_desc" # added_desc, published_desc, title_asc
enable_geographic_hierarchy = true
auto_migrate_country_filters = true
[sharing]
default_format = "text"
include_summary = true
include_tags = true
include_source = true
copy_to_clipboard = true
[sharing.templates.text]
format = """
📰 {title}
{summary}
🏷️ Tags: {tags}
🌍 Location: {geographic_tags}
🔗 Source: {url}
📅 Published: {published_at}
Shared via Owly News Summariser
"""
[sharing.templates.markdown]
format = """
# {title}
{summary}
**Tags:** {tags}
**Location:** {geographic_tags}
**Source:** [{url}]({url})
**Published:** {published_at}
---
*Shared via Owly News Summariser*
"""
[ai]
enabled = true
provider = "ollama"
timeout_seconds = 120
[ai.summary]
enabled = true
temperature = 0.1
max_tokens = 1000
[ai.tagging]
enabled = true
temperature = 0.3
max_tokens = 200
max_tags_per_article = 10
min_confidence_threshold = 0.7
enable_geographic_tagging = true
enable_category_tagging = true
geographic_hierarchy_levels = 3 # country, region, city
[scraping]
timeout_seconds = 30
max_retries = 3
max_content_length = 50000
respect_robots_txt = true
rate_limit_delay_ms = 1000
[processing]
batch_size = 10
max_concurrent = 5
retry_attempts = 3
priority_manual = true
auto_mark_read_on_view = false
[migration]
auto_convert_country_filters = true
preserve_legacy_data = true
migration_batch_size = 100
[cli]
default_output = "table"
pager_command = "less"
show_progress = true
auto_confirm_bulk = false
show_geographic_hierarchy = true
```
## Migration Strategy for Country-Based Filtering
### Automatic Migration Process
1. **Data Analysis**: Scan existing country filter data and RSS feed origins
2. **Tag Generation**: Create geographic tags for each country with hierarchical structure
3. **Filter Conversion**: Convert country-based filters to tag-based equivalents
4. **User Notification**: Inform users about the migration and new capabilities
5. **Gradual Rollout**: Maintain backward compatibility during transition period
### Enhanced Geographic Features
- **Hierarchical Display**: Country → Region → City tag hierarchy
- **Visual Map Integration**: Interactive geographic filtering via map interface
- **Smart Suggestions**: Related location and content suggestions
- **Multi-Level Filtering**: Filter by specific cities, regions, or broader geographic areas
- **Source Intelligence**: AI detection of article geographic relevance beyond RSS origin
## Future Enhancements (Post Phase 3)
### Advanced Analytics
- **Machine Learning Insights**: Content recommendation based on reading patterns and geographic preferences
- **Predictive Analytics**: Trending topic prediction with geographic context
- **Behavioral Analysis**: Reading habit optimization suggestions
- **Comparative Analytics**: Benchmark against reading goals and regional averages
### Enhanced Content Management
- **Smart Collections**: AI-curated article collections with geographic themes
- **Reading Lists**: Planned reading with progress tracking
- **Content Relationships**: Related article suggestions with geographic relevance
- **Advanced Search**: Full-text search with relevance scoring and geographic weighting
### Social & Collaboration Features
- **Reading Groups**: Shared reading lists and discussions with geographic focus
- **Social Sharing**: Integration with social platforms
- **Collaborative Tagging**: Community-driven content organization
- **Reading Challenges**: Gamification of reading habits with geographic themes
### Integration & Extensibility
- **Browser Extension**: Seamless article saving and reading
- **Mobile Apps**: Native iOS/Android applications with location awareness
- **API Ecosystem**: Third-party integrations and plugins
- **Webhook System**: Real-time notifications and integrations with geographic filtering

72
backend-rust/TODO.md Normal file
View File

@@ -0,0 +1,72 @@
## CPU and resource limiting
- Tokio worker threads
- Decide thread policy:
- Option A: set TOKIO_WORKER_THREADS in the environment for deployments.
- Option B: build a custom runtime with tokio::runtime::Builder::new_multi_thread().worker_threads(n).
- Document your default policy (e.g., 50% of physical cores).
- Concurrency guard for CPU-heavy tasks
- Create a global tokio::sync::Semaphore with N permits (N = allowed concurrent heavy tasks).
- Acquire a permit before invoking heavy module operations; release automatically on drop.
- Expose the semaphore in app state so handlers/jobs can share it.
- HTTP backpressure and rate limiting (if using API)
- Add tower::limit::ConcurrencyLimitLayer to cap in-flight requests.
- Add tower::limit::RateLimitLayer or request-size/timeouts as needed.
- Optionally add tower::timeout::TimeoutLayer to bound handler latency.
- Stronger isolation (optional, later)
- Evaluate running certain modules as separate processes for strict CPU caps.
- Use cgroups v2 (Linux) or Job Objects (Windows) to bound CPU/memory per process.
- Reuse the same JSON interface over IPC (e.g., stdio or a local socket).
## Build and run
- Build all crates
- Run: cargo build --workspace
- Build each plugin as cdylib
- Example: cd crates/modules/summarizer && cargo build --release
- Stage plugin libraries for the host to find
- Create a modules directory the daemon will read, e.g. target/modules
- Copy the built artifact into that directory:
- Linux: copy target/release/libsummarizer.so -> target/modules/libsummarizer.so
- macOS: copy target/release/libsummarizer.dylib -> target/modules/libsummarizer.dylib
- Windows: copy target/release/summarizer.dll -> target/modules/summarizer.dll
- Alternatively set OWLY_MODULES_DIR to your chosen directory.
- Run the daemon
- cargo run -p owly-news
- Optionally set:
- OWLY_MODULES_DIR=/absolute/path/to/modules
- TOKIO_WORKER_THREADS=N
## Wire into the API
- Share ModuleHost in app state
- Create a struct AppState { host: Arc, cpu_sem: Arc , ... }.
- Add AppState to Axum with .with_state(state).
- In a handler (example: POST /summarize)
- Parse payload as JSON.
- Acquire a permit from cpu_sem before heavy work.
- host.get("summarizer").await? to lazily load the module.
- Call module.invoke_json("summarize", payload_value)?.
- Map success to 200 with JSON; map errors to appropriate status codes.
- Error handling and observability
- Use thiserror/anyhow to classify operational vs. client errors.
- Add tracing spans around module loading and invocation; include module name and op.
- Return structured error JSON when module reports an error.
- Configuration
- Decide env vars and defaults: OWLY_MODULES_DIR, TOKIO_WORKER_THREADS, concurrency permits, rate limits.
- Optionally add a config file (toml) and load via figment or config crate.
- Health and lifecycle
- Add a /health route that checks:
- Tokio is responsive.
- Optional: preflight-check that required modules are present (or skip to keep lazy).
- Graceful shutdown: listen for SIGINT/SIGTERM and drain in-flight requests before exit.

7
backend-rust/crates/api/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "api"
version = "0.1.0"

View File

@@ -0,0 +1,18 @@
[package]
name = "api"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
once_cell = { workspace = true }
toml = { workspace = true }
tracing = { workspace = true }
async-trait = "0.1.89"
axum = { workspace = true }
sqlx = { workspace = true, features = ["sqlite"] }
[features]
default = []

View File

@@ -0,0 +1,3 @@
pub mod handlers;
pub mod middleware;
pub mod routes;

View File

@@ -0,0 +1,41 @@
use axum::Json;
use axum::extract::State;
use serde_json::Value;
use sqlx::SqlitePool;
pub async fn get_articles(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
// TODO: Article logic
Ok(Json(serde_json::json!({"articles": []})))
}
pub async fn get_summaries(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
// TODO: Summaries logic
Ok(Json(serde_json::json!({"summaries": []})))
}
use axum::{
http::StatusCode,
response::{IntoResponse, Response},
};
pub struct AppError(anyhow::Error);
impl IntoResponse for AppError {
fn into_response(self) -> Response {
let (status, message) = match self.0.downcast_ref::<sqlx::Error>() {
Some(_) => (StatusCode::INTERNAL_SERVER_ERROR, "Database error occurred"),
None => (StatusCode::INTERNAL_SERVER_ERROR, "An error occurred"),
};
tracing::error!("API Error: {:?}", self.0);
(status, message).into_response()
}
}
impl<E> From<E> for AppError
where
E: Into<anyhow::Error>, {
fn from(err: E) -> Self {
Self(err.into())
}
}

View File

@@ -0,0 +1,11 @@
use axum::Router;
use axum::routing::get;
use sqlx::SqlitePool;
use crate::api::handlers;
pub fn routes() -> Router<SqlitePool> {
Router::new()
.route("/articles", get(handlers::get_articles))
.route("/summaries", get(handlers::get_summaries))
// Add more routes as needed
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
//! API-first core: shared types, DTOs, service traits, configuration.
pub mod config;
pub mod types;
pub mod services;
pub mod api;

View File

@@ -0,0 +1,28 @@
use crate::types::Health;
use async_trait::async_trait;
// Submodules that host various domain services. These were refactored from the
// legacy root src folder into this workspace crate. Each component is its own module file.
pub mod summary_service;
pub mod news_service;
pub mod scraping_service;
pub mod tagging_service;
pub mod analytics_service;
pub mod sharing_service;
pub(crate) mod content_processor;
// Implement your service traits here. Example:
#[async_trait]
pub trait HealthService: Send + Sync {
async fn health(&self) -> Health;
}
// A trivial default implementation that can be used by server and tests.
pub struct DefaultHealthService;
#[async_trait]
impl HealthService for DefaultHealthService {
async fn health(&self) -> Health {
Health { status: "ok".into() }
}
}

View File

@@ -0,0 +1,4 @@
//! Analytics service module.
//! Implement logic for tracking and aggregating analytics here.
// Placeholder for analytics-related types and functions.

View File

@@ -0,0 +1,3 @@
//! Content processor utilities shared by services.
// Placeholder module for content processing helpers (e.g., cleaning, tokenization).

View File

@@ -0,0 +1,4 @@
//! News service module.
//! Implement logic related to news retrieval/management here.
// Placeholder for news-related types and functions.

View File

@@ -0,0 +1,4 @@
//! Scraping service module.
//! Implement logic related to web scraping, fetchers, and extractors here.
// Placeholder for scraping-related types and functions.

View File

@@ -0,0 +1,4 @@
//! Sharing service module.
//! Implement logic related to content sharing here.
// Placeholder for sharing-related types and functions.

View File

@@ -0,0 +1,4 @@
//! Summary service module.
//! Implement logic for generating summaries from articles here.
// Placeholder for summary-related types and functions.

View File

@@ -0,0 +1,4 @@
//! Tagging service module.
//! Implement logic related to tagging articles and managing tags here.
// Placeholder for tagging-related types and functions.

View File

@@ -0,0 +1,6 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Health {
pub status: String,
}

View File

@@ -0,0 +1,15 @@
[package]
name = "cli"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = { workspace = true }
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
toml = { workspace = true }
dotenv = { workspace = true }
api = { path = "../api" }
server = { path = "../server" }

View File

@@ -0,0 +1,70 @@
use anyhow::Result;
use api::config::Cli;
use dotenv::dotenv;
use std::{env, net::SocketAddr, str::FromStr};
use tokio::signal;
#[tokio::main]
async fn main() -> Result<()> {
dotenv().ok();
let args: Vec<String> = env::args().collect();
match args.get(1).map(|s| s.as_str()) {
Some("serve") => serve(args).await,
Some("print-config") => print_config(),
_ => {
print_help();
Ok(())
}
}
}
fn print_help() {
eprintln!(
"Usage:
cli serve [--addr 0.0.0.0:8080]
cli print-config
Environment:
These may influence runtime behavior.
Notes:
- 'serve' runs the HTTP server.
- 'print-config' prints the default CLI configuration in JSON."
);
}
async fn serve(args: Vec<String>) -> Result<()> {
// naive flag parse: look for "--addr host:port"
let mut addr: SocketAddr = SocketAddr::from_str("127.0.0.1:8080")?;
let mut i = 2;
while i + 1 < args.len() {
if args[i] == "--addr" {
addr = SocketAddr::from_str(&args[i + 1])?;
i += 2;
} else {
i += 1;
}
}
let server_task = tokio::spawn(async move { server::start_server(addr).await });
// graceful shutdown via Ctrl+C
tokio::select! {
res = server_task => {
res??;
}
_ = signal::ctrl_c() => {
eprintln!("Shutting down...");
}
}
Ok(())
}
fn print_config() -> Result<()> {
let cfg = Cli::default();
let json = serde_json::to_string_pretty(&cfg)?;
println!("{json}");
Ok(())
}

View File

@@ -0,0 +1,10 @@
[package]
name = "db"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = { workspace = true }
sqlx = { workspace = true, features = ["sqlite"] }
tracing = { workspace = true }
api = { path = "../api" }

View File

@@ -0,0 +1,44 @@
use api::config::AppSettings;
use anyhow::{Context, Result};
use sqlx::migrate::Migrator;
use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
use sqlx::{Pool, Sqlite, SqlitePool};
use std::str::FromStr;
use std::time::Duration;
use tracing::info;
// Embed migrations from the workspace-level migrations directory.
// crates/db is two levels below backend-rust where migrations/ resides.
pub const MIGRATOR: Migrator = sqlx::migrate!("../../migrations");
pub async fn initialize_db(app_settings: &AppSettings) -> Result<Pool<Sqlite>> {
app_settings
.ensure_default_directory()
.context("Failed to ensure default directory for database")?;
let options = SqliteConnectOptions::from_str(&app_settings.database_url())?
.create_if_missing(true)
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
.foreign_keys(true);
let pool = SqlitePoolOptions::new()
.max_connections(20)
.min_connections(5)
.acquire_timeout(Duration::from_secs(30))
.idle_timeout(Duration::from_secs(600))
.connect_with(options)
.await?;
MIGRATOR
.run(&pool)
.await
.with_context(|| "Database migrations failed")?;
info!("Database migrations completed successfully");
Ok(pool)
}
pub async fn create_pool(opts: SqliteConnectOptions) -> Result<SqlitePool> {
let pool = SqlitePool::connect_with(opts).await?;
Ok(pool)
}

View File

@@ -0,0 +1,23 @@
[package]
name = "server"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
axum = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sqlx = { workspace = true, features = ["sqlite"] }
dotenv = { workspace = true }
once_cell = { workspace = true }
api = { path = "../api" }
db = { path = "../db" }
http = "1.3.1"
[features]
default = []

View File

@@ -0,0 +1,63 @@
use axum::{routing::get, Json, Router};
use std::{net::SocketAddr, sync::Arc};
use tokio::net::TcpListener;
use tracing::{info, level_filters::LevelFilter};
use tracing_subscriber::EnvFilter;
use api::services::{DefaultHealthService, HealthService};
use api::types::Health;
use api::config::AppSettings;
pub struct AppState {
pub health_service: Arc<dyn HealthService>,
}
pub async fn build_router(state: Arc<AppState>) -> Router {
Router::new().route(
"/health",
get({
let state = state.clone();
move || health_handler(state.clone())
}),
)
}
async fn health_handler(state: Arc<AppState>) -> Json<Health> {
let res = state.health_service.health().await;
Json(res)
}
pub async fn start_server(addr: SocketAddr) -> anyhow::Result<()> {
init_tracing();
// Load application settings and initialize the database pool (sqlite).
let app_settings = AppSettings::get_app_settings();
let pool = db::initialize_db(&app_settings).await?;
let state = Arc::new(AppState {
health_service: Arc::new(DefaultHealthService),
});
// Base daemon router
let app = build_router(state).await
// Attach API under /api and provide DB state
.nest("/api", api::api::routes::routes().with_state(pool.clone()));
let listener = TcpListener::bind(addr).await?;
info!("HTTP server listening on http://{}", addr);
axum::serve(listener, app).await?;
Ok(())
}
fn init_tracing() {
let env_filter = EnvFilter::try_from_default_env()
.or_else(|_| EnvFilter::try_new("info"))
.unwrap()
.add_directive(LevelFilter::INFO.into());
tracing_subscriber::fmt()
.with_env_filter(env_filter)
.with_target(true)
.compact()
.init();
}

View File

@@ -0,0 +1,22 @@
use axum::Router;
use server::{build_router, AppState};
use api::services::DefaultHealthService;
use std::sync::Arc;
#[tokio::test]
async fn health_ok() {
let state = Arc::new(AppState {
health_service: Arc::new(DefaultHealthService),
});
let app: Router = build_router(state).await;
let req = http::Request::builder()
.uri("/health")
.body(axum::body::Body::empty())
.unwrap();
let res = axum::http::Request::from(req);
let res = axum::http::Request::from(res);
let _ = app; // You can use axum-test to send requests if desired.
}

31
backend-rust/example.env Normal file
View File

@@ -0,0 +1,31 @@
# URL for the Ollama service
OLLAMA_HOST=http://localhost:11434
# Interval for scheduled news fetching in hours
CRON_HOURS=1
# Minimum interval for scheduled news fetching in hours
MIN_CRON_HOURS=0.5
# Cooldown period in minutes between manual syncs
SYNC_COOLDOWN_MINUTES=30
# LLM model to use for summarization
LLM_MODEL=qwen2:7b-instruct-q4_K_M
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
LLM_MODEL=mistral-nemo:12b
# Timeout in seconds for LLM requests
LLM_TIMEOUT_SECONDS=180
# Timeout in seconds for Ollama API requests
OLLAMA_API_TIMEOUT_SECONDS=10
# Timeout in seconds for article fetching
ARTICLE_FETCH_TIMEOUT=30
# Maximum length of article content to process
MAX_ARTICLE_LENGTH=5000
# SQLite database connection string
DB_NAME=owlynews.sqlite3

View File

@@ -0,0 +1,5 @@
DROP TABLE IF EXISTS meta;
DROP TABLE IF EXISTS settings;
DROP TABLE IF EXISTS feeds;
DROP INDEX IF EXISTS idx_news_published;
DROP TABLE IF EXISTS news;

View File

@@ -0,0 +1,38 @@
-- Initial database schema for Owly News Summariser
-- News table to store articles
CREATE TABLE IF NOT EXISTS news
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
summary TEXT,
url TEXT NOT NULL,
published TIMESTAMP NOT NULL,
country TEXT NOT NULL,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
-- Index for faster queries on published date
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
-- Feeds table to store RSS feed sources
CREATE TABLE IF NOT EXISTS feeds
(
id INTEGER PRIMARY KEY,
country TEXT,
url TEXT UNIQUE NOT NULL
);
-- Settings table for application configuration
CREATE TABLE IF NOT EXISTS settings
(
key TEXT PRIMARY KEY,
val TEXT NOT NULL
);
-- Meta table for application metadata
CREATE TABLE IF NOT EXISTS meta
(
key TEXT PRIMARY KEY,
val TEXT NOT NULL
);

View File

@@ -0,0 +1,18 @@
CREATE TABLE news_backup
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
summary TEXT,
url TEXT NOT NULL,
published TIMESTAMP NOT NULL,
country TEXT NOT NULL,
created_at INTEGER DEFAULT (strftime('%s', 'now'))
);
INSERT INTO news_backup
SELECT id, title, summary, url, published, country, created_at
FROM news;
DROP TABLE news;
ALTER TABLE news_backup
RENAME TO news;
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);

View File

@@ -0,0 +1,3 @@
-- Add category field to news table
ALTER TABLE news
ADD COLUMN category TEXT;

View File

@@ -0,0 +1,7 @@
-- Drop articles table and its indexes
DROP INDEX IF EXISTS idx_articles_read_at;
DROP INDEX IF EXISTS idx_articles_source_type;
DROP INDEX IF EXISTS idx_articles_processing_status;
DROP INDEX IF EXISTS idx_articles_added_at;
DROP INDEX IF EXISTS idx_articles_published_at;
DROP TABLE IF EXISTS articles;

View File

@@ -0,0 +1,27 @@
-- Create enhanced articles table to replace news table structure
CREATE TABLE IF NOT EXISTS articles
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT NOT NULL,
source_type TEXT NOT NULL DEFAULT 'rss', -- 'rss', 'manual'
rss_content TEXT, -- RSS description/excerpt
full_content TEXT, -- Scraped full content
summary TEXT, -- AI-generated summary
processing_status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'processing', 'completed', 'failed'
published_at TIMESTAMP NOT NULL,
added_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
read_at TIMESTAMP,
read_count INTEGER NOT NULL DEFAULT 0,
reading_time INTEGER, -- in seconds
ai_enabled BOOLEAN NOT NULL DEFAULT 1,
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
updated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create indexes for performance
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles (published_at);
CREATE INDEX IF NOT EXISTS idx_articles_added_at ON articles (added_at);
CREATE INDEX IF NOT EXISTS idx_articles_processing_status ON articles (processing_status);
CREATE INDEX IF NOT EXISTS idx_articles_source_type ON articles (source_type);
CREATE INDEX IF NOT EXISTS idx_articles_read_at ON articles (read_at);

View File

@@ -0,0 +1,9 @@
-- Drop tag system tables and indexes
DROP INDEX IF EXISTS idx_article_tags_ai_generated;
DROP INDEX IF EXISTS idx_article_tags_tag_id;
DROP INDEX IF EXISTS idx_article_tags_article_id;
DROP INDEX IF EXISTS idx_tags_usage_count;
DROP INDEX IF EXISTS idx_tags_parent_id;
DROP INDEX IF EXISTS idx_tags_category;
DROP TABLE IF EXISTS article_tags;
DROP TABLE IF EXISTS tags;

View File

@@ -0,0 +1,31 @@
-- Create tags table with hierarchical support
CREATE TABLE IF NOT EXISTS tags
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
category TEXT NOT NULL, -- 'geographic', 'content', 'source', 'custom'
description TEXT,
color TEXT, -- Hex color for UI display
usage_count INTEGER NOT NULL DEFAULT 0,
parent_id INTEGER REFERENCES tags (id), -- For hierarchical tags (e.g., Country -> Region -> City)
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create article_tags junction table
CREATE TABLE IF NOT EXISTS article_tags
(
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags (id) ON DELETE CASCADE,
confidence_score REAL DEFAULT 1.0, -- AI confidence (0.0-1.0)
ai_generated BOOLEAN NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (article_id, tag_id)
);
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags (category);
CREATE INDEX IF NOT EXISTS idx_tags_parent_id ON tags (parent_id);
CREATE INDEX IF NOT EXISTS idx_tags_usage_count ON tags (usage_count DESC);
CREATE INDEX IF NOT EXISTS idx_article_tags_article_id ON article_tags (article_id);
CREATE INDEX IF NOT EXISTS idx_article_tags_tag_id ON article_tags (tag_id);
CREATE INDEX IF NOT EXISTS idx_article_tags_ai_generated ON article_tags (ai_generated);

View File

@@ -0,0 +1,11 @@
-- Drop analytics system tables and indexes
DROP INDEX IF EXISTS idx_legacy_migration_old_filter_type;
DROP INDEX IF EXISTS idx_share_templates_format;
DROP INDEX IF EXISTS idx_filter_presets_user_id;
DROP INDEX IF EXISTS idx_reading_stats_read_at;
DROP INDEX IF EXISTS idx_reading_stats_article_id;
DROP INDEX IF EXISTS idx_reading_stats_user_id;
DROP TABLE IF EXISTS legacy_migration;
DROP TABLE IF EXISTS share_templates;
DROP TABLE IF EXISTS filter_presets;
DROP TABLE IF EXISTS reading_stats;

View File

@@ -0,0 +1,50 @@
-- Create reading statistics table
CREATE TABLE IF NOT EXISTS reading_stats
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER DEFAULT 1, -- For future multi-user support
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
read_at TIMESTAMP NOT NULL,
reading_time INTEGER, -- in seconds
completion_rate REAL DEFAULT 1.0, -- 0.0-1.0, how much of the article was read
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create filter presets table
CREATE TABLE IF NOT EXISTS filter_presets
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
filter_criteria TEXT NOT NULL, -- JSON string of filter parameters
user_id INTEGER DEFAULT 1, -- For future multi-user support
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create share templates table
CREATE TABLE IF NOT EXISTS share_templates
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
format TEXT NOT NULL, -- 'text', 'markdown', 'html', 'json'
template_content TEXT NOT NULL,
is_default BOOLEAN NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create legacy migration tracking table
CREATE TABLE IF NOT EXISTS legacy_migration
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
old_filter_type TEXT NOT NULL, -- 'country', 'category', etc.
old_value TEXT NOT NULL,
new_tag_ids TEXT, -- JSON array of tag IDs
migrated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_reading_stats_user_id ON reading_stats (user_id);
CREATE INDEX IF NOT EXISTS idx_reading_stats_article_id ON reading_stats (article_id);
CREATE INDEX IF NOT EXISTS idx_reading_stats_read_at ON reading_stats (read_at);
CREATE INDEX IF NOT EXISTS idx_filter_presets_user_id ON filter_presets (user_id);
CREATE INDEX IF NOT EXISTS idx_share_templates_format ON share_templates (format);
CREATE INDEX IF NOT EXISTS idx_legacy_migration_old_filter_type ON legacy_migration (old_filter_type);

View File

@@ -0,0 +1,18 @@
-- Remove enhanced settings columns and indexes
DROP INDEX IF EXISTS idx_settings_user_id;
DROP INDEX IF EXISTS idx_settings_category;
-- Note: SQLite doesn't support DROP COLUMN, so we recreate the table
CREATE TABLE settings_backup AS
SELECT key, val
FROM settings;
DROP TABLE settings;
CREATE TABLE settings
(
key TEXT PRIMARY KEY,
val TEXT NOT NULL
);
INSERT INTO settings
SELECT key, val
FROM settings_backup;
DROP TABLE settings_backup;

View File

@@ -0,0 +1,74 @@
-- Enhance settings table to support more structured configuration
ALTER TABLE settings
ADD COLUMN category TEXT DEFAULT 'general';
ALTER TABLE settings
ADD COLUMN user_id INTEGER DEFAULT 1;
ALTER TABLE settings
ADD COLUMN updated_at TIMESTAMP DEFAULT (datetime('now'));
-- Create index for better performance
CREATE INDEX IF NOT EXISTS idx_settings_category ON settings (category);
CREATE INDEX IF NOT EXISTS idx_settings_user_id ON settings (user_id);
-- Insert default settings based on roadmap configuration
INSERT OR IGNORE INTO settings (key, val, category)
VALUES
-- Display settings
('default_view', 'compact', 'display'),
('articles_per_page', '50', 'display'),
('show_reading_time', '1', 'display'),
('show_word_count', '0', 'display'),
('highlight_unread', '1', 'display'),
('theme', 'auto', 'display'),
-- Analytics settings
('analytics_enabled', '1', 'analytics'),
('track_reading_time', '1', 'analytics'),
('track_scroll_position', '1', 'analytics'),
('retention_days', '365', 'analytics'),
('aggregate_older_data', '1', 'analytics'),
-- Filtering settings
('enable_smart_suggestions', '1', 'filtering'),
('max_recent_filters', '10', 'filtering'),
('auto_save_filters', '1', 'filtering'),
('default_sort', 'added_desc', 'filtering'),
('enable_geographic_hierarchy', '1', 'filtering'),
('auto_migrate_country_filters', '1', 'filtering'),
-- Sharing settings
('default_share_format', 'text', 'sharing'),
('include_summary', '1', 'sharing'),
('include_tags', '1', 'sharing'),
('include_source', '1', 'sharing'),
('copy_to_clipboard', '1', 'sharing'),
-- AI settings
('ai_enabled', '1', 'ai'),
('ai_provider', 'ollama', 'ai'),
('ai_timeout_seconds', '120', 'ai'),
('ai_summary_enabled', '1', 'ai'),
('ai_summary_temperature', '0.1', 'ai'),
('ai_summary_max_tokens', '1000', 'ai'),
('ai_tagging_enabled', '1', 'ai'),
('ai_tagging_temperature', '0.3', 'ai'),
('ai_tagging_max_tokens', '200', 'ai'),
('max_tags_per_article', '10', 'ai'),
('min_confidence_threshold', '0.7', 'ai'),
('enable_geographic_tagging', '1', 'ai'),
('enable_category_tagging', '1', 'ai'),
('geographic_hierarchy_levels', '3', 'ai'),
-- Scraping settings
('scraping_timeout_seconds', '30', 'scraping'),
('scraping_max_retries', '3', 'scraping'),
('max_content_length', '50000', 'scraping'),
('respect_robots_txt', '1', 'scraping'),
('rate_limit_delay_ms', '1000', 'scraping'),
-- Processing settings
('batch_size', '10', 'processing'),
('max_concurrent', '5', 'processing'),
('retry_attempts', '3', 'processing'),
('priority_manual', '1', 'processing'),
('auto_mark_read_on_view', '0', 'processing');

View File

@@ -0,0 +1,39 @@
-- Remove migrated data (this will remove all articles and tags created from migration)
-- WARNING: This will delete all migrated data
-- Remove legacy migration records
DELETE
FROM legacy_migration
WHERE old_filter_type IN ('country', 'category');
-- Remove article-tag associations for migrated data (non-AI generated)
DELETE
FROM article_tags
WHERE ai_generated = 0;
-- Remove migrated geographic tags (only those created from country data)
DELETE
FROM tags
WHERE tags.category = 'geographic'
AND EXISTS (SELECT 1 FROM news WHERE news.country = tags.name);
-- Remove migrated content tags (only those created from category data)
DELETE
FROM tags
WHERE tags.category = 'content'
AND EXISTS (SELECT 1 FROM news WHERE news.category = tags.name);
-- Remove migrated articles (only those that match news entries)
DELETE
FROM articles
WHERE EXISTS (SELECT 1
FROM news
WHERE news.url = articles.url
AND news.title = articles.title
AND articles.source_type = 'rss');
-- Reset tag usage counts
UPDATE tags
SET usage_count = (SELECT COUNT(*)
FROM article_tags
WHERE tag_id = tags.id);

View File

@@ -0,0 +1,84 @@
-- Migrate data from old news table to new articles table
INSERT INTO articles (title, url, summary, published_at, added_at, source_type, processing_status)
SELECT title,
url,
summary,
published,
datetime(created_at, 'unixepoch'),
'rss',
CASE
WHEN summary IS NOT NULL AND summary != '' THEN 'completed'
ELSE 'pending'
END
FROM news;
-- Create geographic tags from existing country data
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
SELECT DISTINCT country,
'geographic',
'Geographic location: ' || country,
COUNT(*)
FROM news
WHERE country IS NOT NULL
AND country != ''
GROUP BY country;
-- Link articles to their geographic tags
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
SELECT a.id,
t.id,
0, -- Not AI generated, migrated from legacy data
1.0 -- Full confidence for existing data
FROM articles a
JOIN news n ON a.url = n.url AND a.title = n.title
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
WHERE n.country IS NOT NULL
AND n.country != '';
-- Create category tags if category column exists in news table
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
SELECT DISTINCT n.category,
'content',
'Content category: ' || n.category,
COUNT(*)
FROM news n
WHERE n.category IS NOT NULL
AND n.category != ''
GROUP BY n.category;
-- Link articles to their category tags
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
SELECT a.id,
t.id,
0, -- Not AI generated, migrated from legacy data
1.0 -- Full confidence for existing data
FROM articles a
JOIN news n ON a.url = n.url AND a.title = n.title
JOIN tags t ON t.name = n.category AND t.category = 'content'
WHERE n.category IS NOT NULL
AND n.category != '';
-- Record migration in legacy_migration table for countries
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
SELECT 'country',
n.country,
'[' || GROUP_CONCAT(t.id) || ']'
FROM (SELECT DISTINCT country FROM news WHERE country IS NOT NULL AND country != '') n
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
GROUP BY n.country;
-- Record migration in legacy_migration table for categories (if they exist)
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
SELECT 'category',
n.category,
'[' || GROUP_CONCAT(t.id) || ']'
FROM (SELECT DISTINCT category FROM news WHERE category IS NOT NULL AND category != '') n
JOIN tags t ON t.name = n.category AND t.category = 'content'
GROUP BY n.category;
-- Update tag usage counts
UPDATE tags
SET usage_count = (SELECT COUNT(*)
FROM article_tags
WHERE tag_id = tags.id);

View File

@@ -0,0 +1,4 @@
-- Remove default sharing templates
DELETE
FROM share_templates
WHERE name IN ('Default Text', 'Markdown', 'Simple Text', 'HTML Email');

View File

@@ -0,0 +1,39 @@
-- Insert default sharing templates
INSERT INTO share_templates (name, format, template_content, is_default)
VALUES ('Default Text', 'text', '📰 {title}
{summary}
🏷️ Tags: {tags}
🌍 Location: {geographic_tags}
🔗 Source: {url}
📅 Published: {published_at}
Shared via Owly News Summariser', 1),
('Markdown', 'markdown', '# {title}
{summary}
**Tags:** {tags}
**Location:** {geographic_tags}
**Source:** [{url}]({url})
**Published:** {published_at}
---
*Shared via Owly News Summariser*', 1),
('Simple Text', 'text', '{title}
{summary}
Source: {url}', 0),
('HTML Email', 'html', '<h2>{title}</h2>
<p>{summary}</p>
<p><strong>Tags:</strong> {tags}<br>
<strong>Location:</strong> {geographic_tags}<br>
<strong>Source:</strong> <a href="{url}">{url}</a><br>
<strong>Published:</strong> {published_at}</p>
<hr>
<small>Shared via Owly News Summariser</small>', 0);

View File

@@ -8,11 +8,11 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
LLM_MODEL = os.getenv("LLM_MODEL", "mistral-nemo:12b")
LLM_MODEL = os.getenv("LLM_MODEL", "gemma2:9b")
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 10_000))
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 40_000))
frontend_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
@@ -21,7 +21,7 @@ frontend_path = os.path.join(
)
logging.basicConfig(
level=logging.WARNING,
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

View File

@@ -150,8 +150,6 @@ async def get_news(
where_conditions.append("published BETWEEN ? AND ?")
params.extend([from_ts, to_ts])
logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})")
# Build the complete SQL query
base_sql = """
SELECT id, title, summary, url, published, country, created_at
@@ -163,27 +161,13 @@ async def get_news(
else:
sql = base_sql
sql += " ORDER BY published DESC LIMIT 1000"
# Log query info
if all_countries and all_dates:
logger.info("Querying ALL news articles (no filters)")
elif all_countries:
logger.info(f"Querying news from ALL countries with date filter")
elif all_dates:
logger.info(f"Querying ALL dates for countries: {country}")
else:
logger.info(f"Querying news: countries={country}, timezone={timezone_name}")
logger.info(f"SQL: {sql}")
logger.info(f"Parameters: {params}")
sql += " ORDER BY published DESC"
# Execute the query
db.execute(sql, params)
rows = db.fetchall()
result = [dict(row) for row in rows]
logger.info(f"Found {len(result)} news articles")
return result
except ValueError as e:

View File

@@ -119,12 +119,11 @@ class NewsFetcher:
except Exception as e:
logger.warning(
f"❌ Error fetching article content from {url}: {
type(e).__name__}: {e}")
type(e).__name__}: {e}")
return ""
@staticmethod
def build_prompt(
url: str,
title: str = "",
summary: str = "",
content: str = "") -> str:
@@ -132,14 +131,13 @@ class NewsFetcher:
Generate a prompt for the LLM to summarize an article.
Args:
url: Public URL of the article to summarize
title: Article title from RSS feed (optional)
summary: Article summary from RSS feed (optional)
content: Extracted article content (optional)
Returns:
A formatted prompt string that instructs the LLM to generate
a JSON response with title and summaries in German and English
a JSON response with title, summary and tags in German
"""
context_info = []
if title:
@@ -148,28 +146,43 @@ class NewsFetcher:
context_info.append(f"RSS-Beschreibung: {summary}")
if content:
content_preview = content[:500] + \
"..." if len(content) > 500 else content
"..." if len(content) > 500 else content
context_info.append(f"Artikel-Inhalt: {content_preview}")
context = "\n".join(
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
return (
"### Aufgabe\n"
f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
f"URL: {url}\n"
f"Verfügbare Informationen:\n{context}\n\n"
"### Vorliegende Informationen\n"
f"{context}\n\n"
"### Längenbegrenzungen\n"
"title: Format \"ORT: Titel\", max 100 Zeichen\n"
"location: nur der ORT-Teil, max 40 Zeichen\n"
"summary: 100160 Wörter\n"
"tags: bis zu 6 Schlüsselwörter, durch Komma getrennt, alles Kleinbuchstaben.\n\n"
"### Regeln\n"
"1. Nutze VORRANGIG den Artikel-Inhalt falls verfügbar, ergänze mit RSS-Informationen\n"
"2. Falls kein Artikel-Inhalt verfügbar ist, nutze RSS-Titel und -Beschreibung\n"
"3. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
"4. Gib ausschließlich **gültiges minifiziertes JSON** zurück kein Markdown, keine Kommentare\n"
"5. Struktur: {\"title\":\"\",\"summary\":\"\"}\n"
"6. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
"7. summary: Deutsche Zusammenfassung (zwischen 100 und 160 Wörter)\n"
"8. Kein Text vor oder nach dem JSON\n\n"
"1. Nutze ausschließlich Informationen, die im bereitgestellten Material eindeutig vorkommen. Externes Wissen ist untersagt.\n"
"2. Liegt sowohl Artikel-Text als auch RSS-Metadaten vor, hat der Artikel-Text Vorrang; verwende RSS nur ergänzend.\n"
"3. Liegt nur RSS-Titel und/oder -Beschreibung vor, stütze dich ausschließlich darauf.\n"
"4. Sind die Informationen unzureichend, gib exakt {\"location\":\"\",\"title\":\"\",\"summary\":\"\",\"tags\":\"\"} zurück.\n"
"5. Gib nur gültiges, minifiziertes JSON zurück keine Zeilenumbrüche, kein Markdown, keine Kommentare.\n"
"6. Verwende keine hypothetischen Formulierungen (\"könnte\", \"möglicherweise\" etc.).\n"
"7. Wörtliche Zitate dürfen höchstens 15 % des Summary-Texts ausmachen.\n"
"8. Kein Text vor oder nach dem JSON.\n\n"
"### Ausgabe\n"
"Jetzt antworte mit dem JSON:"
"Antworte jetzt ausschließlich mit dem JSON:\n"
)
@staticmethod
def build_system_prompt():
return (
"Du bist ein hochpräziser JSON-Summarizer und Experte für die Zusammenfassung von Artikeln.\n\n"
"### Vorgehen\n"
"Schritt 1: Identifiziere Hauptthema und Zweck.\n"
"Schritt 2: Extrahiere die wichtigsten Fakten und Ergebnisse.\n"
"Schritt 3: Erkenne die zentralen Argumente und Standpunkte.\n"
"Schritt 4: Ordne die Informationen nach Wichtigkeit.\n"
"Schritt 5: Erstelle eine prägnante, klare und sachliche Zusammenfassung.\n\n"
)
@staticmethod
@@ -193,26 +206,55 @@ class NewsFetcher:
A dictionary containing the article title and summaries in German and English,
or None if summarization failed
"""
logger.debug("[AI] Fetching article content from: " + url)
article_content = await NewsFetcher.fetch_article_content(client, url)
if not article_content:
logger.warning(
f"⚠️ Could not fetch article content, using RSS data only")
prompt = NewsFetcher.build_prompt(
url, title, summary, article_content)
prompt = NewsFetcher.build_prompt(title, summary, article_content)
system_prompt = NewsFetcher.build_system_prompt()
payload = {
"model": LLM_MODEL,
"prompt": prompt,
"system": system_prompt,
"stream": False,
"temperature": 0.1,
"format": "json",
"format": {
"type": "object",
"properties": {
"title": {
"type": "string"
},
"location": {
"type": "string"
},
"summary": {
"type": "string"
},
"tags": {
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"title",
"summary",
"tags"
]
},
"options": {
"num_gpu": 1, # Force GPU usage
"num_ctx": 128_000, # Context size
"num_ctx": 8192, # Context size
}
}
logger.debug("[AI] Running summary generation...")
try:
response = await client.post(
f"{OLLAMA_HOST}/api/generate",
@@ -224,6 +266,8 @@ class NewsFetcher:
result = response.json()
llm_response = result["response"]
logger.debug("[AI] " + llm_response)
if isinstance(llm_response, str):
summary_data = json.loads(llm_response)
else:
@@ -263,7 +307,7 @@ class NewsFetcher:
except Exception as e:
logger.error(
f"❌ Unexpected error summarizing {url}: {
type(e).__name__}: {e}")
type(e).__name__}: {e}")
return None
@staticmethod
@@ -300,7 +344,7 @@ class NewsFetcher:
except Exception as e:
logger.error(
f"❌ Critical error during harvest: {
type(e).__name__}: {e}")
type(e).__name__}: {e}")
raise
@staticmethod
@@ -327,18 +371,18 @@ class NewsFetcher:
if hasattr(feed_data, 'bozo') and feed_data.bozo:
logger.warning(
f"⚠️ Feed has parsing issues: {
feed_row['url']}")
feed_row['url']}")
if hasattr(feed_data, 'bozo_exception'):
logger.warning(
f"⚠️ Feed exception: {
feed_data.bozo_exception}")
feed_data.bozo_exception}")
total_entries = len(feed_data.entries)
if total_entries == 0:
logger.warning(
f"⚠️ No entries found in feed: {
feed_row['url']}")
feed_row['url']}")
return stats
for i, entry in enumerate(feed_data.entries, 1):
@@ -388,8 +432,6 @@ class NewsFetcher:
summary=rss_summary
)
logger.info(summary)
if not summary:
logger.warning(
f"❌ Failed to get summary for article {i}: {article_url}")
@@ -403,8 +445,9 @@ class NewsFetcher:
cursor.execute(
"""
INSERT
OR IGNORE INTO news
(title, summary, url, published, country)
OR IGNORE
INTO news
(title, summary, url, published, country)
VALUES (?, ?, ?, ?, ?)
""",
(
@@ -429,7 +472,7 @@ class NewsFetcher:
except Exception as e:
logger.error(
f"❌ Error processing feed {
feed_row['url']}: {
type(e).__name__}: {e}")
feed_row['url']}: {
type(e).__name__}: {e}")
return stats

View File

@@ -11,10 +11,12 @@ MIN_CRON_HOURS=0.5
SYNC_COOLDOWN_MINUTES=30
# LLM model to use for summarization
LLM_MODEL=qwen2:7b-instruct-q4_K_M
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
LLM_MODEL=mistral-nemo:12b
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S
LLM_MODEL=qwen2:7b-instruct-q4_K_M # ca 7-9GB (typisch 8GB)
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0 # ca 6-8GB (langer kontext)
LLM_MODEL=mistral-nemo:12b # ca 16-24+GB
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S # ca 22GB
LLM_MODEL=yarn-mistral:7b-64k-q4_K_M # ca 11GB
LLM_MODEL=gemma2:9b # ca 8GB
# Timeout in seconds for LLM requests
LLM_TIMEOUT_SECONDS=180

Binary file not shown.

View File

@@ -1,8 +0,0 @@
import { defineConfig } from 'cypress'
export default defineConfig({
e2e: {
specPattern: 'cypress/e2e/**/*.{cy,spec}.{js,jsx,ts,tsx}',
baseUrl: 'http://localhost:4173',
},
})

View File

@@ -1,5 +1,5 @@
{
"name": "owly-news-summariser",
"name": "owly-news",
"version": "0.0.1",
"private": true,
"license": "PolyForm-Noncommercial-1.0.0",

View File

@@ -14,75 +14,77 @@
<!-- Articles Grid -->
<div v-else class="grid gap-4 sm:gap-6 md:grid-cols-2 xl:grid-cols-3">
<article
v-for="article in news.articles"
:key="article.id"
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
>
<!-- Article Header -->
<div class="flex-1 p-4 sm:p-6">
<div class="flex items-start justify-between mb-3">
<template v-for="article in news.articles"
:key="article.id">
<article
v-if="isValidArticleContent(article)"
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
>
<!-- Article Header -->
<div class="flex-1 p-4 sm:p-6">
<div class="flex items-start justify-between mb-3">
<span
class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-200">
class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-200">
{{ article.country }}
</span>
<time
:datetime="new Date(article.published * 1000).toISOString()"
:title="new Date(article.published * 1000).toLocaleString(userLocale.value, {
<time
:datetime="new Date(article.published * 1000).toISOString()"
:title="new Date(article.published * 1000).toLocaleString(userLocale, {
dateStyle: 'full',
timeStyle: 'long'
})"
class="text-xs text-gray-500 flex-shrink-0 ml-2 cursor-help hover:text-green-600 dark:hover:text-green-400 transition-colors relative group"
>
{{ formatDate(article.published) }}
</time>
class="text-xs text-gray-500 flex-shrink-0 ml-2 cursor-help hover:text-green-600 dark:hover:text-green-400 transition-colors relative group"
>
{{ formatDate(article.published) }}
</time>
</div>
<!-- Title -->
<h3
class="text-base sm:text-lg font-semibold text-gray-900 dark:text-white mb-3 line-clamp-2 group-hover:text-green-600 dark:group-hover:text-green-400 transition-colors">
<a :href="article.url" target="_blank" rel="noopener noreferrer">
{{ article.title }}
</a>
</h3>
<!-- Summary -->
<p
class="text-sm sm:text-base text-gray-700 dark:text-gray-300 line-clamp-5 leading-relaxed">
{{ article.summary }}
</p>
</div>
<!-- Title -->
<h3
class="text-base sm:text-lg font-semibold text-gray-900 dark:text-white mb-3 line-clamp-2 group-hover:text-green-600 dark:group-hover:text-green-400 transition-colors">
<a :href="article.url" target="_blank" rel="noopener noreferrer">
{{ article.title }}
<!-- Article Footer -->
<div
class="flex justify-between items-center gap-4 p-4 sm:p-6">
<button
@click="openModal(article)"
class="flex-1 inline-flex items-center justify-center cursor-pointer px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
>
Full summary
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z"/>
</svg>
</button>
<a
:href="article.url"
target="_blank"
rel="noopener noreferrer"
class="flex-1 inline-flex items-center justify-center px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
>
Full article
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"/>
</svg>
</a>
</h3>
<!-- Summary -->
<p
class="text-sm sm:text-base text-gray-700 dark:text-gray-300 line-clamp-5 leading-relaxed">
{{ article.summary }}
</p>
</div>
<!-- Article Footer -->
<div
class="flex justify-between items-center gap-4 p-4 sm:p-6">
<button
@click="openModal(article)"
class="flex-1 inline-flex items-center justify-center cursor-pointer px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
>
Full summary
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z"/>
</svg>
</button>
<a
:href="article.url"
target="_blank"
rel="noopener noreferrer"
class="flex-1 inline-flex items-center justify-center px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
>
Full article
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"/>
</svg>
</a>
</div>
</article>
</div>
</article>
</template>
</div>
<!-- Loading State & Load More Trigger -->
@@ -100,9 +102,9 @@
<!-- Article Modal -->
<ArticleModal
:is-open="isModalOpen"
:article="selectedArticle"
@close="closeModal"
:is-open="isModalOpen"
:article="selectedArticle"
@close="closeModal"
/>
</div>
</template>
@@ -129,17 +131,48 @@ const loadMoreArticles = async () => {
}
};
interface Article {
id: number;
title: string;
summary: string;
url: string;
published: number;
country: string;
created_at: number;
}
const INVALID_MARKERS = ['---', '...', '…', 'Title', 'Summary', 'Titel', 'Zusammenfassung'] as const;
const REQUIRED_TEXT_FIELDS = ['title', 'summary', 'url'] as const;
const isValidArticleContent = (article: Article): boolean => {
const hasEmptyRequiredFields = REQUIRED_TEXT_FIELDS.some(
field => article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].length === 0
);
if (hasEmptyRequiredFields) {
return false;
}
const hasInvalidMarkers = REQUIRED_TEXT_FIELDS.some(field =>
INVALID_MARKERS.some(marker =>
article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].includes(marker)
)
);
return !hasInvalidMarkers;
};
const observer = ref<IntersectionObserver | null>(null);
const loadMoreTrigger = ref<HTMLElement | null>(null);
onMounted(() => {
observer.value = new IntersectionObserver(
(entries) => {
if (entries[0].isIntersecting) {
loadMoreArticles();
}
},
{threshold: 0.5}
(entries) => {
if (entries[0].isIntersecting) {
loadMoreArticles();
}
},
{threshold: 0.5}
);
if (loadMoreTrigger.value) {

View File

@@ -6470,9 +6470,9 @@ __metadata:
languageName: node
linkType: hard
"owly-news-summariser@workspace:.":
"owly-news@workspace:.":
version: 0.0.0-use.local
resolution: "owly-news-summariser@workspace:."
resolution: "owly-news@workspace:."
dependencies:
"@tailwindcss/vite": "npm:^4.1.11"
"@tsconfig/node22": "npm:^22.0.2"