Compare commits
37 Commits
main
...
rewrite-ru
Author | SHA1 | Date | |
---|---|---|---|
57a7b42b9d | |||
d37daf02f6 | |||
16167d18ff | |||
7c6724800f | |||
af304266a4 | |||
815e3b22fd | |||
e8e61faf61 | |||
c19813cbe2 | |||
cf163082b2 | |||
011b256662 | |||
0a97a57c76 | |||
338b3ac7c1 | |||
13fbac5009 | |||
9b805e891a | |||
78073d27d7 | |||
c3b0c87bfa | |||
0aa8d9fa3a | |||
cbbd0948e6 | |||
3a5b0d8f4b | |||
0ce916c654 | |||
f853213d15 | |||
300845c655 | |||
d90c618ee3 | |||
e7a97206a9 | |||
c2adfa711d | |||
b2d82892ef | |||
0f1632ad65 | |||
7b114a6145 | |||
4edb2b2179 | |||
aa520efb82 | |||
f22259b863 | |||
a30f8467bc | |||
79e4d7f1de | |||
37ebf45d82 | |||
bc1735448a | |||
59b19a22ff | |||
86b5f83140 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -34,10 +34,11 @@ build/
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Database files
|
||||
# Database files (now includes the specific dev database)
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.db
|
||||
owlynews.sqlite3*
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
|
238
README.md
238
README.md
@@ -1,105 +1,67 @@
|
||||
# Owly News Summariser
|
||||
# Owly News
|
||||
|
||||
Owly News Summariser is a web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
|
||||
Owly News is a modern web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
|
||||
|
||||
## Features
|
||||
|
||||
- Fetches news from configurable RSS feeds
|
||||
- Automatically summarizes articles using Ollama LLM
|
||||
- Filters news by country
|
||||
- **AI-powered intelligent tagging** with geographic, category, and source tags
|
||||
- **Advanced multi-criteria filtering** with hierarchical tag support
|
||||
- Progressive Web App (PWA) support for offline access
|
||||
- Scheduled background updates
|
||||
- High-performance Rust backend for optimal resource usage
|
||||
- Modern Vue.js frontend with TypeScript support
|
||||
- **Comprehensive analytics** and reading statistics
|
||||
- **Flexible sharing system** with multiple format options
|
||||
|
||||
## Project Structure
|
||||
|
||||
The project consists of two main components:
|
||||
The project consists of multiple components:
|
||||
|
||||
- **Backend**: A FastAPI application that fetches and processes news feeds, summarizes articles, and provides API endpoints
|
||||
- **Frontend**: A Vue.js application that displays the news and provides a user interface for managing feeds
|
||||
- **Backend (Rust)**: Primary backend written in Rust using Axum framework for high performance (`backend-rust/`)
|
||||
- **Backend (Python)**: Legacy FastAPI backend (`backend/`)
|
||||
- **Frontend**: Modern Vue.js 3 application with TypeScript and Tailwind CSS (`frontend/`)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python 3.8+ for the backend
|
||||
- Node.js 16+ and Yarn for the frontend
|
||||
### For Rust Backend (Recommended)
|
||||
- Rust 1.88.0+
|
||||
- [Ollama](https://ollama.ai/) for article summarization and tagging
|
||||
- SQLite (handled automatically by SQLx)
|
||||
|
||||
### For Python Backend (Legacy)
|
||||
- Python 3.8+
|
||||
- [Ollama](https://ollama.ai/) for article summarization
|
||||
|
||||
## Installing Yarn
|
||||
|
||||
Yarn is a package manager for JavaScript that's required for the frontend. Here's how to install it:
|
||||
|
||||
### Using npm (recommended)
|
||||
|
||||
If you already have Node.js installed, the easiest way to install Yarn is via npm:
|
||||
|
||||
```bash
|
||||
npm install -g yarn
|
||||
```
|
||||
|
||||
### Platform-specific installations
|
||||
|
||||
#### Windows
|
||||
|
||||
- **Using Chocolatey**: `choco install yarn`
|
||||
- **Using Scoop**: `scoop install yarn`
|
||||
- **Manual installation**: Download and run the [installer](https://classic.yarnpkg.com/latest.msi)
|
||||
|
||||
#### macOS
|
||||
|
||||
- **Using Homebrew**: `brew install yarn`
|
||||
- **Using MacPorts**: `sudo port install yarn`
|
||||
|
||||
#### Linux
|
||||
|
||||
- **Debian/Ubuntu**:
|
||||
```bash
|
||||
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
|
||||
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
|
||||
sudo apt update && sudo apt install yarn
|
||||
```
|
||||
|
||||
- **CentOS/Fedora/RHEL**:
|
||||
```bash
|
||||
curl --silent --location https://dl.yarnpkg.com/rpm/yarn.repo | sudo tee /etc/yum.repos.d/yarn.repo
|
||||
sudo yum install yarn
|
||||
```
|
||||
|
||||
- **Arch Linux**: `pacman -S yarn`
|
||||
|
||||
After installation, verify Yarn is installed correctly:
|
||||
|
||||
```bash
|
||||
yarn --version
|
||||
```
|
||||
### For Frontend
|
||||
- Node.js 22+ and npm
|
||||
- Modern web browser with PWA support
|
||||
|
||||
## Setup
|
||||
|
||||
### Backend Setup
|
||||
### Rust Backend Setup (Recommended)
|
||||
|
||||
1. Navigate to the backend directory:
|
||||
1. Navigate to the Rust backend directory:
|
||||
```bash
|
||||
cd backend
|
||||
cd backend-rust
|
||||
```
|
||||
|
||||
2. Create a virtual environment:
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. Create a `.env` file based on the example:
|
||||
2. Create a `.env` file based on the example:
|
||||
```bash
|
||||
cp example.env .env
|
||||
```
|
||||
|
||||
5. Customize the `.env` file as needed:
|
||||
- `OLLAMA_HOST`: URL for the Ollama service (default: http://localhost:11434)
|
||||
- `CRON_HOURS`: Interval for scheduled news fetching (default: 1)
|
||||
3. Customize the `.env` file as needed:
|
||||
- `DATABASE_URL`: SQLite database connection string
|
||||
- `OLLAMA_BASE_URL`: URL for the Ollama service (default: http://localhost:11434)
|
||||
- Other configuration options as documented in the example file
|
||||
|
||||
4. Run database migrations:
|
||||
```bash
|
||||
cargo install sqlx-cli
|
||||
sqlx migrate run
|
||||
```
|
||||
|
||||
### Frontend Setup
|
||||
|
||||
@@ -110,29 +72,24 @@ yarn --version
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
yarn
|
||||
npm install
|
||||
```
|
||||
|
||||
## Running the Application
|
||||
|
||||
### Running the Backend
|
||||
### Running the Rust Backend
|
||||
|
||||
1. Navigate to the backend directory:
|
||||
1. Navigate to the Rust backend directory:
|
||||
```bash
|
||||
cd backend
|
||||
cd backend-rust
|
||||
```
|
||||
|
||||
2. Activate the virtual environment:
|
||||
2. Start the backend server:
|
||||
```bash
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
cargo run
|
||||
```
|
||||
|
||||
3. Start the backend server:
|
||||
```bash
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
The backend will be available at http://localhost:8000
|
||||
The backend will be available at http://localhost:3000
|
||||
|
||||
### Running the Frontend
|
||||
|
||||
@@ -143,21 +100,52 @@ yarn --version
|
||||
|
||||
2. Start the development server:
|
||||
```bash
|
||||
yarn dev:watch
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The frontend will be available at http://localhost:5173
|
||||
|
||||
## Key Features
|
||||
|
||||
### Intelligent Content Organization
|
||||
- **AI-Powered Tagging**: Automatic classification with geographic, topical, and source tags
|
||||
- **Hierarchical Filtering**: Multi-level filtering by location (country → region → city), categories, and content types
|
||||
- **Smart Search**: Advanced filtering with suggestions based on tag relationships and usage patterns
|
||||
- **Legacy Migration**: Seamless upgrade from simple country-based filtering to comprehensive tag-based system
|
||||
|
||||
### Advanced Analytics
|
||||
- **Reading Statistics**: Track reading time, completion rates, and engagement patterns
|
||||
- **Content Analytics**: Source performance, tag usage, and trending topics analysis
|
||||
- **Geographic Insights**: Location-based content distribution and reading preferences
|
||||
- **Goal Tracking**: Personal reading goals with progress monitoring
|
||||
|
||||
### Flexible Article Display
|
||||
- **Compact View**: Title, excerpt, tags, and action buttons for quick browsing
|
||||
- **On-Demand Loading**: Full content, AI summaries, and source links as needed
|
||||
- **Visual Tag System**: Color-coded, hierarchical tags with click-to-filter functionality
|
||||
- **Reading Status**: Visual indicators for read/unread status and progress tracking
|
||||
|
||||
### Enhanced Sharing
|
||||
- **Multiple Formats**: Text, Markdown, HTML, and JSON export options
|
||||
- **Custom Templates**: User-configurable sharing formats
|
||||
- **One-Click Operations**: Copy to clipboard with formatted content
|
||||
- **Privacy Controls**: Configurable information inclusion in shared content
|
||||
|
||||
## Building for Production
|
||||
|
||||
### Building the Backend
|
||||
### Building the Rust Backend
|
||||
|
||||
The backend can be deployed as a standard FastAPI application. You can use tools like Gunicorn with Uvicorn workers:
|
||||
1. Navigate to the Rust backend directory:
|
||||
```bash
|
||||
cd backend-rust
|
||||
```
|
||||
|
||||
```bash
|
||||
pip install gunicorn
|
||||
gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
|
||||
```
|
||||
2. Build the optimized release binary:
|
||||
```bash
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
The binary will be available at `target/release/owly-news`
|
||||
|
||||
### Building the Frontend
|
||||
|
||||
@@ -168,32 +156,62 @@ gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
|
||||
|
||||
2. Build the frontend:
|
||||
```bash
|
||||
yarn build
|
||||
npm run build
|
||||
```
|
||||
|
||||
The built files will be in the `dist` directory and can be served by any static file server.
|
||||
|
||||
## API Endpoints
|
||||
## Development
|
||||
|
||||
The backend provides the following API endpoints:
|
||||
### Code Quality
|
||||
|
||||
- `GET /news`: Get news articles with optional filtering
|
||||
- `GET /meta/last_sync`: Get the timestamp of the last feed synchronization
|
||||
- `POST /meta/cron`: Set the schedule for automatic feed synchronization
|
||||
- `GET /meta/feeds`: List all configured feeds
|
||||
- `POST /meta/feeds`: Add a new feed
|
||||
- `DELETE /meta/feeds`: Delete a feed
|
||||
- `GET /meta/model`: Check the status of the LLM model
|
||||
- `POST /meta/sync`: Manually trigger a feed synchronization
|
||||
The project includes comprehensive tooling for code quality:
|
||||
|
||||
## Environment Variables
|
||||
**Frontend:**
|
||||
- ESLint with Vue and TypeScript support
|
||||
- Prettier for code formatting
|
||||
- Vitest for testing
|
||||
- TypeScript for type safety
|
||||
- Oxlint for additional linting
|
||||
|
||||
### Backend
|
||||
**Backend (Rust):**
|
||||
- Standard Rust tooling (`cargo fmt`, `cargo clippy`)
|
||||
- SQLx for compile-time checked SQL queries
|
||||
|
||||
- `OLLAMA_HOST`: URL for the Ollama service
|
||||
- `CRON_HOURS`: Interval for scheduled news fetching in hours
|
||||
- `DATABASE_URL`: SQLite database connection string
|
||||
### Testing
|
||||
|
||||
## License
|
||||
Run frontend tests:
|
||||
```bash
|
||||
cd frontend
|
||||
npm run test
|
||||
```
|
||||
|
||||
Code ist unter der [PolyForm Noncommercial 1.0.0](https://polyformproject.org/licenses/noncommercial/1.0.0/) lizenziert. Für jegliche kommerzielle Nutzung bitte Kontakt aufnehmen.
|
||||
## Configuration
|
||||
|
||||
The application uses a comprehensive configuration system via `config.toml`:
|
||||
|
||||
- **AI Settings**: Configure Ollama integration for summaries and tagging
|
||||
- **Display Preferences**: Default views, themes, and UI customization
|
||||
- **Analytics**: Control data collection and retention policies
|
||||
- **Filtering**: Smart suggestions, saved filters, and geographic hierarchy
|
||||
- **Sharing**: Default formats and custom templates
|
||||
|
||||
See the example configuration in the project for detailed options.
|
||||
|
||||
## Migration from Legacy Systems
|
||||
|
||||
The application includes automatic migration tools for upgrading from simpler filtering systems:
|
||||
|
||||
- **Country Filter Migration**: Automatic conversion to hierarchical geographic tags
|
||||
- **Data Preservation**: Maintains historical data during migration
|
||||
- **Backward Compatibility**: Gradual transition with user control
|
||||
- **Validation Tools**: Ensure data integrity throughout the migration process
|
||||
|
||||
## Future Roadmap
|
||||
|
||||
The project is evolving through three phases:
|
||||
1. **Phase 1**: High-performance Rust backend with advanced filtering and analytics
|
||||
2. **Phase 2**: CLI application for power users and automation
|
||||
3. **Phase 3**: Migration to Dioxus for a full Rust stack
|
||||
|
||||
See `ROADMAP.md` for detailed development plans and architectural decisions.
|
||||
|
2
backend-rust/.gitignore
vendored
Normal file
2
backend-rust/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
target/
|
||||
/config.toml
|
2520
backend-rust/Cargo.lock
generated
Normal file
2520
backend-rust/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
37
backend-rust/Cargo.toml
Normal file
37
backend-rust/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/api",
|
||||
"crates/server",
|
||||
"crates/cli",
|
||||
"crates/db",
|
||||
]
|
||||
resolver = "3"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
version = "0.1.0"
|
||||
rust-version = "1.89"
|
||||
|
||||
[workspace.dependencies]
|
||||
anyhow = "1.0.99"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
serde_json = "1.0.142"
|
||||
tokio = { version = "1.47.1", features = ["rt-multi-thread", "macros", "signal"] }
|
||||
libloading = "0.8.8"
|
||||
tracing = "0.1.41"
|
||||
once_cell = "1.21.3"
|
||||
toml = "0.9.5"
|
||||
axum = "0.8.4"
|
||||
sha2 = "0.10.9"
|
||||
sqlx = { version = "0.8.6", default-features = false, features = ["runtime-tokio-rustls", "macros", "postgres", "uuid", "chrono", "migrate"] }
|
||||
hex = "0.4.3"
|
||||
num_cpus = "1.17.0"
|
||||
unicode-segmentation = "1.12.0"
|
||||
readability = "0.3.0"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["env-filter", "fmt"] }
|
||||
scraper = "0.23.1"
|
||||
dotenv = "0.15.0"
|
||||
|
||||
# dev/test utilities in the workspace
|
||||
tokio-test = "0.4.4"
|
||||
axum-test = "17.3.0"
|
584
backend-rust/ROADMAP.md
Normal file
584
backend-rust/ROADMAP.md
Normal file
@@ -0,0 +1,584 @@
|
||||
# Owly News Summariser - Project Roadmap
|
||||
|
||||
This document outlines the strategic approach for transforming the project through three phases: Python-to-Rust backend migration, CLI application addition, and Vue-to-Dioxus frontend migration.
|
||||
|
||||
## Project Structure Strategy
|
||||
|
||||
### Current Phase: Axum API Setup
|
||||
```
|
||||
|
||||
owly-news-summariser/
|
||||
├── src/
|
||||
│ ├── main.rs # Entry point (will evolve)
|
||||
│ ├── db.rs # Database connection & SQLx setup
|
||||
│ ├── api.rs # API module declaration
|
||||
│ ├── api/ # API-specific modules (no mod.rs needed)
|
||||
│ │ ├── routes.rs # Route definitions
|
||||
│ │ ├── middleware.rs # Custom middleware
|
||||
│ │ └── handlers.rs # Request handlers & business logic
|
||||
│ ├── models.rs # Models module declaration
|
||||
│ ├── models/ # Data models & database entities
|
||||
│ │ ├── user.rs
|
||||
│ │ ├── article.rs
|
||||
│ │ ├── summary.rs
|
||||
│ │ ├── tag.rs # Tag models and relationships
|
||||
│ │ ├── analytics.rs # Analytics and statistics models
|
||||
│ │ └── settings.rs # User settings and preferences
|
||||
│ ├── services.rs # Services module declaration
|
||||
│ ├── services/ # Business logic layer
|
||||
│ │ ├── news_service.rs
|
||||
│ │ ├── summary_service.rs
|
||||
│ │ ├── scraping_service.rs # Article content extraction
|
||||
│ │ ├── tagging_service.rs # AI-powered tagging
|
||||
│ │ ├── analytics_service.rs # Reading stats and analytics
|
||||
│ │ └── sharing_service.rs # Article sharing functionality
|
||||
│ └── config.rs # Configuration management
|
||||
├── migrations/ # SQLx migrations (managed by SQLx CLI)
|
||||
├── frontend/ # Keep existing Vue frontend for now
|
||||
├── config.toml # Configuration file with AI settings
|
||||
└── Cargo.toml
|
||||
```
|
||||
### Phase 2: Multi-Binary Structure (API + CLI)
|
||||
```
|
||||
|
||||
owly-news-summariser/
|
||||
├── src/
|
||||
│ ├── lib.rs # Shared library code
|
||||
│ ├── bin/
|
||||
│ │ ├── server.rs # API server binary
|
||||
│ │ └── cli.rs # CLI application binary
|
||||
│ ├── [same module structure as Phase 1]
|
||||
├── migrations/
|
||||
├── frontend/
|
||||
├── completions/ # Shell completion scripts
|
||||
│ ├── owly.bash
|
||||
│ ├── owly.zsh
|
||||
│ └── owly.fish
|
||||
├── config.toml
|
||||
└── Cargo.toml # Updated for multiple binaries
|
||||
```
|
||||
### Phase 3: Full Rust Stack
|
||||
```
|
||||
|
||||
owly-news-summariser/
|
||||
├── src/
|
||||
│ ├── [same structure as Phase 2]
|
||||
├── migrations/
|
||||
├── frontend-dioxus/ # New Dioxus frontend
|
||||
├── frontend/ # Legacy Vue (to be removed)
|
||||
├── completions/
|
||||
├── config.toml
|
||||
└── Cargo.toml
|
||||
```
|
||||
## Core Features & Architecture
|
||||
|
||||
### Article Processing & Display Workflow
|
||||
**Hybrid Approach: RSS Feeds + Manual Submissions with Smart Content Management**
|
||||
|
||||
1. **Article Collection**
|
||||
- RSS feed monitoring and batch processing
|
||||
- Manual article URL submission
|
||||
- Store original content and metadata in database
|
||||
|
||||
2. **Content Processing Pipeline**
|
||||
- Fetch RSS articles → scrape full content → store in DB
|
||||
- **Compact Article Display**:
|
||||
- Title (primary display)
|
||||
- RSS description text
|
||||
- Tags (visual indicators)
|
||||
- Time posted (from RSS)
|
||||
- Time added (when added to system)
|
||||
- Action buttons: [Full Article] [Summary] [Source]
|
||||
- **On-Demand Content Loading**:
|
||||
- Full Article: Display complete scraped content
|
||||
- Summary: Show AI-generated summary
|
||||
- Source: Open original URL in new tab
|
||||
- Background async processing with status updates
|
||||
- Support for re-processing without re-fetching
|
||||
|
||||
3. **Intelligent Tagging System**
|
||||
- **Automatic Tag Generation**: AI analyzes content and assigns relevant tags
|
||||
- **Geographic & Source Tags**: AI-generated location tags (countries, regions, cities) and publication source tags
|
||||
- **Content Category Tags**: Technology, Politics, Business, Sports, Health, etc.
|
||||
- **Visual Tag Display**: Color-coded tags in compact article view with hierarchical display
|
||||
- **Tag Filtering**: Quick filtering by clicking tags with smart suggestions
|
||||
- **Custom Tags**: User-defined tags and categories
|
||||
- **Tag Confidence**: Visual indicators for AI vs manual tags
|
||||
- **Tag Migration**: Automatic conversion of legacy country filters to geographic tags
|
||||
|
||||
4. **Analytics & Statistics System**
|
||||
- **Reading Analytics**:
|
||||
- Articles read vs added
|
||||
- Reading time tracking
|
||||
- Most read categories and tags
|
||||
- Reading patterns over time
|
||||
- **Content Analytics**:
|
||||
- Source reliability and quality metrics
|
||||
- Tag usage statistics
|
||||
- Processing success rates
|
||||
- Content freshness tracking
|
||||
- **Performance Metrics**:
|
||||
- AI processing times
|
||||
- Scraping success rates
|
||||
- User engagement patterns
|
||||
|
||||
5. **Advanced Filtering System**
|
||||
- **Multi-Criteria Filtering**:
|
||||
- By tags (single or multiple with AND/OR logic)
|
||||
- By geographic tags (country, region, city with hierarchical filtering)
|
||||
- By content categories and topics
|
||||
- By date ranges (posted, added, read)
|
||||
- By processing status (pending, completed, failed)
|
||||
- By content availability (scraped, summary, RSS-only)
|
||||
- By read/unread status
|
||||
- **Smart Filter Migration**: Automatic conversion of legacy country filters to tag-based equivalents
|
||||
- **Saved Filter Presets**:
|
||||
- Custom filter combinations
|
||||
- Quick access to frequent searches
|
||||
- Geographic preset templates (e.g., "European Tech News", "US Politics")
|
||||
- **Smart Suggestions**: Filter suggestions based on usage patterns and tag relationships
|
||||
|
||||
6. **Settings & Management System**
|
||||
- **User Preferences**:
|
||||
- Default article view mode
|
||||
- Tag display preferences with geographic hierarchy settings
|
||||
- Reading tracking settings
|
||||
- Notification preferences
|
||||
- **System Settings**:
|
||||
- AI configuration (via API and config file)
|
||||
- Processing settings
|
||||
- Display customization
|
||||
- Export preferences
|
||||
- **Content Management**:
|
||||
- Bulk operations (mark read, delete, retag)
|
||||
- Archive old articles
|
||||
- Export/import functionality
|
||||
- Legacy data migration tools
|
||||
|
||||
7. **Article Sharing System**
|
||||
- **Multiple Share Formats**:
|
||||
- Clean text format with title, summary, and source link
|
||||
- Markdown format for developers
|
||||
- Rich HTML format for email/web
|
||||
- JSON format for API integration
|
||||
- **Copy to Clipboard**: One-click formatted sharing
|
||||
- **Share Templates**: Customizable sharing formats
|
||||
- **Privacy Controls**: Control what information is included in shares
|
||||
|
||||
8. **Database Schema**
|
||||
```
|
||||
Articles: id, title, url, source_type, rss_content, full_content,
|
||||
summary, processing_status, published_at, added_at, read_at,
|
||||
read_count, reading_time, ai_enabled, created_at, updated_at
|
||||
Tags: id, name, category, description, color, usage_count, parent_id, created_at
|
||||
ArticleTags: article_id, tag_id, confidence_score, ai_generated, created_at
|
||||
ReadingStats: user_id, article_id, read_at, reading_time, completion_rate
|
||||
FilterPresets: id, name, filter_criteria, user_id, created_at
|
||||
Settings: key, value, category, user_id, updated_at
|
||||
ShareTemplates: id, name, format, template_content, created_at
|
||||
LegacyMigration: old_filter_type, old_value, new_tag_ids, migrated_at
|
||||
```
|
||||
|
||||
## Step-by-Step Process
|
||||
|
||||
### Phase 1: Axum API Implementation
|
||||
|
||||
**Step 1: Core Infrastructure Setup**
|
||||
- Set up database connection pooling with SQLx
|
||||
- **Enhanced Configuration System**:
|
||||
- Extend config.toml with comprehensive settings
|
||||
- AI provider configurations with separate summary/tagging settings
|
||||
- Display preferences and UI customization
|
||||
- Analytics and tracking preferences
|
||||
- Sharing templates and formats
|
||||
- Filter and search settings
|
||||
- Geographic tagging preferences
|
||||
- Establish error handling patterns with `anyhow`
|
||||
- Set up logging and analytics infrastructure
|
||||
|
||||
**Step 2: Data Layer**
|
||||
- Design comprehensive database schema with analytics and settings support
|
||||
- Create SQLx migrations for all tables including analytics and user preferences
|
||||
- Implement hierarchical tag system with geographic and content categories
|
||||
- Add legacy migration support for country filters
|
||||
- Implement article models with reading tracking and statistics
|
||||
- Add settings and preferences data layer
|
||||
- Create analytics data models and aggregation queries
|
||||
- Implement sharing templates and format management
|
||||
- Use SQLx's compile-time checked queries
|
||||
|
||||
**Step 3: Enhanced Services Layer**
|
||||
- **Content Processing Services**:
|
||||
- RSS feed fetching and parsing
|
||||
- Web scraping with quality tracking
|
||||
- AI services for summary and tagging
|
||||
- **Enhanced Tagging Service**:
|
||||
- Geographic location detection and tagging
|
||||
- Content category classification
|
||||
- Hierarchical tag relationships
|
||||
- Legacy filter migration logic
|
||||
- **Analytics Service**:
|
||||
- Reading statistics collection and aggregation
|
||||
- Content performance metrics
|
||||
- User behavior tracking
|
||||
- Trend analysis and insights
|
||||
- **Settings Management Service**:
|
||||
- User preference handling
|
||||
- System configuration management
|
||||
- Real-time settings updates
|
||||
- **Sharing Service**:
|
||||
- Multiple format generation
|
||||
- Template processing
|
||||
- Privacy-aware content filtering
|
||||
- **Advanced Filtering Service**:
|
||||
- Complex query building with geographic hierarchy
|
||||
- Filter preset management
|
||||
- Search optimization
|
||||
- Legacy filter migration
|
||||
|
||||
**Step 4: Comprehensive API Layer**
|
||||
- **Article Management Routes**:
|
||||
- `GET /api/articles` - List articles with compact display data
|
||||
- `POST /api/articles` - Submit manual article URL
|
||||
- `GET /api/articles/:id` - Get basic article info
|
||||
- `GET /api/articles/:id/full` - Get complete scraped content
|
||||
- `GET /api/articles/:id/summary` - Get AI summary
|
||||
- `POST /api/articles/:id/read` - Mark as read and track reading time
|
||||
- `POST /api/articles/:id/share` - Generate shareable content
|
||||
- **Analytics Routes**:
|
||||
- `GET /api/analytics/dashboard` - Main analytics dashboard data
|
||||
- `GET /api/analytics/reading-stats` - Personal reading statistics
|
||||
- `GET /api/analytics/content-stats` - Content and source analytics
|
||||
- `GET /api/analytics/trends` - Trending topics and patterns
|
||||
- `GET /api/analytics/export` - Export analytics data
|
||||
- **Enhanced Filtering & Search Routes**:
|
||||
- `GET /api/filters/presets` - Get saved filter presets
|
||||
- `POST /api/filters/presets` - Save new filter preset
|
||||
- `GET /api/search/suggestions` - Get search and filter suggestions
|
||||
- `POST /api/search` - Advanced search with multiple criteria
|
||||
- `POST /api/filters/migrate` - Migrate legacy country filters to tags
|
||||
- **Settings Routes**:
|
||||
- `GET /api/settings` - Get all user settings
|
||||
- `PUT /api/settings` - Update user settings
|
||||
- `GET /api/settings/system` - Get system configuration
|
||||
- `PUT /api/settings/system` - Update system settings (admin)
|
||||
- **Enhanced Tag Management Routes**:
|
||||
- `GET /api/tags` - List tags with usage statistics and hierarchy
|
||||
- `GET /api/tags/geographic` - Get geographic tag hierarchy
|
||||
- `GET /api/tags/trending` - Get trending tags
|
||||
- `POST /api/tags/:id/follow` - Follow/unfollow tag for notifications
|
||||
- `GET /api/tags/categories` - Get tag categories and relationships
|
||||
- **Sharing Routes**:
|
||||
- `GET /api/share/templates` - Get sharing templates
|
||||
- `POST /api/share/templates` - Create custom sharing template
|
||||
- `POST /api/articles/:id/share/:format` - Generate share content
|
||||
|
||||
**Step 5: Enhanced Frontend Features**
|
||||
- **Compact Article Display**:
|
||||
- Card-based layout with title, RSS excerpt, tags, and timestamps
|
||||
- Action buttons for Full Article, Summary, and Source
|
||||
- Hierarchical tag display with geographic and category indicators
|
||||
- Reading status and progress indicators
|
||||
- **Advanced Analytics Dashboard**:
|
||||
- Reading statistics with charts and trends
|
||||
- Content source performance metrics
|
||||
- Tag usage and trending topics with geographic breakdowns
|
||||
- Personal reading insights and goals
|
||||
- **Comprehensive Filtering Interface**:
|
||||
- Multi-criteria filter builder with geographic hierarchy
|
||||
- Saved filter presets with quick access
|
||||
- Smart filter suggestions based on tag relationships
|
||||
- Visual filter indicators and clear actions
|
||||
- Legacy filter migration interface
|
||||
- **Settings Management Panel**:
|
||||
- User preference configuration
|
||||
- AI and processing settings
|
||||
- Display and UI customization
|
||||
- Export/import functionality
|
||||
- **Enhanced Sharing System**:
|
||||
- Quick share buttons with format selection
|
||||
- Copy-to-clipboard functionality
|
||||
- Custom sharing templates
|
||||
- Preview before sharing
|
||||
|
||||
**Step 6: Integration & Testing**
|
||||
- Test all API endpoints with comprehensive coverage
|
||||
- Test analytics collection and aggregation
|
||||
- Test enhanced filtering and search functionality
|
||||
- Test legacy filter migration
|
||||
- Validate settings persistence and real-time updates
|
||||
- Test sharing functionality across different formats
|
||||
- Performance testing with large datasets and hierarchical tags
|
||||
- Deploy and monitor
|
||||
|
||||
### Phase 2: CLI Application Addition
|
||||
|
||||
**Step 1: Restructure for Multiple Binaries**
|
||||
- Move API code to `src/bin/server.rs`
|
||||
- Create `src/bin/cli.rs` for CLI application
|
||||
- Keep shared logic in `src/lib.rs`
|
||||
- Update Cargo.toml to support multiple binaries
|
||||
|
||||
**Step 2: Enhanced CLI with Analytics and Management**
|
||||
- **Core Commands**:
|
||||
- `owly list [--filters] [--format table|json|compact]` - List articles
|
||||
- `owly show <id> [--content|--summary]` - Display specific article
|
||||
- `owly read <id>` - Mark article as read and open in pager
|
||||
- `owly open <id>` - Open source URL in browser
|
||||
- **Analytics Commands**:
|
||||
- `owly stats [--period day|week|month|year]` - Show reading statistics
|
||||
- `owly trends [--tags|--sources|--topics|--geo]` - Display trending content
|
||||
- `owly analytics export [--format csv|json]` - Export analytics data
|
||||
- **Management Commands**:
|
||||
- `owly settings [--get key] [--set key=value]` - Manage settings
|
||||
- `owly filters [--list|--save name|--load name]` - Manage filter presets
|
||||
- `owly cleanup [--old|--unread|--failed]` - Clean up articles
|
||||
- `owly migrate [--from-country-filters]` - Migrate legacy data
|
||||
- **Enhanced Filtering Commands**:
|
||||
- `owly filter [--tag] [--geo] [--category]` - Advanced filtering with geographic support
|
||||
- `owly tags [--list|--hierarchy|--geo]` - Tag management with geographic display
|
||||
- **Sharing Commands**:
|
||||
- `owly share <id> [--format text|markdown|html]` - Generate share content
|
||||
- `owly export <id> [--template name] [--output file]` - Export article
|
||||
|
||||
**Step 3: Advanced CLI Features**
|
||||
- Interactive filtering and search with geographic hierarchy
|
||||
- Real-time analytics display with charts (using ASCII graphs)
|
||||
- Bulk operations with progress indicators
|
||||
- Settings management with validation
|
||||
- Shell completion for all commands and parameters
|
||||
- Legacy data migration tools
|
||||
|
||||
### Phase 3: Dioxus Frontend Migration
|
||||
|
||||
**Step 1: Component Architecture**
|
||||
- **Core Display Components**:
|
||||
- `ArticleCard` - Compact article display with action buttons
|
||||
- `ArticleViewer` - Full article content display
|
||||
- `SummaryViewer` - AI summary display
|
||||
- `TagCloud` - Interactive tag display with geographic hierarchy
|
||||
- `GeographicMap` - Visual geographic filtering interface
|
||||
- **Analytics Components**:
|
||||
- `AnalyticsDashboard` - Main analytics overview
|
||||
- `ReadingStats` - Personal reading statistics
|
||||
- `TrendChart` - Trending topics and patterns
|
||||
- `ContentMetrics` - Source and content analytics
|
||||
- `GeographicAnalytics` - Location-based content insights
|
||||
- **Enhanced Filtering Components**:
|
||||
- `FilterBuilder` - Advanced filter creation interface with geographic support
|
||||
- `FilterPresets` - Saved filter management
|
||||
- `SearchBar` - Smart search with suggestions
|
||||
- `GeographicFilter` - Hierarchical location filtering
|
||||
- `MigrationTool` - Legacy filter migration interface
|
||||
- **Settings Components**:
|
||||
- `SettingsPanel` - User preference management
|
||||
- `SystemConfig` - System-wide configuration
|
||||
- `ExportImport` - Data export/import functionality
|
||||
- **Sharing Components**:
|
||||
- `ShareDialog` - Sharing interface with format options
|
||||
- `ShareTemplates` - Custom template management
|
||||
|
||||
**Step 2: Enhanced UX Features**
|
||||
- **Smart Article Display**:
|
||||
- Lazy loading for performance
|
||||
- Infinite scroll with virtualization
|
||||
- Quick preview on hover
|
||||
- Keyboard navigation support
|
||||
- **Advanced Analytics**:
|
||||
- Interactive charts and graphs with geographic data
|
||||
- Customizable dashboard widgets
|
||||
- Goal setting and progress tracking
|
||||
- Comparison and trend analysis
|
||||
- **Intelligent Filtering**:
|
||||
- Auto-complete for filters with geographic suggestions
|
||||
- Visual filter builder with map integration
|
||||
- Filter combination suggestions based on tag relationships
|
||||
- Saved search notifications
|
||||
- **Seamless Sharing**:
|
||||
- One-click sharing with clipboard integration
|
||||
- Live preview of shared content
|
||||
- Social media format optimization
|
||||
- Batch sharing capabilities
|
||||
|
||||
## Key Strategic Considerations
|
||||
|
||||
### 1. Performance & Scalability
|
||||
- **Efficient Data Loading**: Lazy loading and pagination for large datasets
|
||||
- **Optimized Queries**: Indexed database queries for filtering and analytics with hierarchical tag support
|
||||
- **Caching Strategy**: Smart caching for frequently accessed content and tag hierarchies
|
||||
- **Real-time Updates**: WebSocket integration for live analytics
|
||||
|
||||
### 2. User Experience Focus
|
||||
- **Progressive Disclosure**: Show essential info first, details on demand
|
||||
- **Responsive Design**: Optimized for mobile and desktop
|
||||
- **Accessibility**: Full keyboard navigation and screen reader support
|
||||
- **Customization**: User-configurable interface and behavior
|
||||
- **Smooth Migration**: Seamless transition from country-based to tag-based filtering
|
||||
|
||||
### 3. Analytics & Insights
|
||||
- **Privacy-First**: User control over data collection and retention
|
||||
- **Actionable Insights**: Meaningful statistics that guide reading habits
|
||||
- **Performance Metrics**: System health and efficiency tracking
|
||||
- **Trend Analysis**: Pattern recognition for content and behavior with geographic context
|
||||
|
||||
### 4. Content Management
|
||||
- **Flexible Display**: Multiple view modes for different use cases
|
||||
- **Smart Organization**: AI-assisted content categorization with geographic awareness
|
||||
- **Bulk Operations**: Efficient management of large article collections
|
||||
- **Data Integrity**: Reliable content processing and error handling
|
||||
- **Legacy Support**: Smooth migration from existing country-based filtering
|
||||
|
||||
## Enhanced Configuration File Structure
|
||||
|
||||
```toml
|
||||
[server]
|
||||
host = '127.0.0.1'
|
||||
port = 8090
|
||||
|
||||
[display]
|
||||
default_view = "compact" # compact, full, summary
|
||||
articles_per_page = 50
|
||||
show_reading_time = true
|
||||
show_word_count = false
|
||||
highlight_unread = true
|
||||
theme = "auto" # light, dark, auto
|
||||
|
||||
[analytics]
|
||||
enabled = true
|
||||
track_reading_time = true
|
||||
track_scroll_position = true
|
||||
retention_days = 365 # How long to keep detailed analytics
|
||||
aggregate_older_data = true
|
||||
|
||||
[filtering]
|
||||
enable_smart_suggestions = true
|
||||
max_recent_filters = 10
|
||||
auto_save_filters = true
|
||||
default_sort = "added_desc" # added_desc, published_desc, title_asc
|
||||
enable_geographic_hierarchy = true
|
||||
auto_migrate_country_filters = true
|
||||
|
||||
[sharing]
|
||||
default_format = "text"
|
||||
include_summary = true
|
||||
include_tags = true
|
||||
include_source = true
|
||||
copy_to_clipboard = true
|
||||
|
||||
[sharing.templates.text]
|
||||
format = """
|
||||
📰 {title}
|
||||
|
||||
{summary}
|
||||
|
||||
🏷️ Tags: {tags}
|
||||
🌍 Location: {geographic_tags}
|
||||
🔗 Source: {url}
|
||||
📅 Published: {published_at}
|
||||
|
||||
Shared via Owly News Summariser
|
||||
"""
|
||||
|
||||
[sharing.templates.markdown]
|
||||
format = """
|
||||
# {title}
|
||||
|
||||
{summary}
|
||||
|
||||
**Tags:** {tags}
|
||||
**Location:** {geographic_tags}
|
||||
**Source:** [{url}]({url})
|
||||
**Published:** {published_at}
|
||||
|
||||
---
|
||||
*Shared via Owly News Summariser*
|
||||
"""
|
||||
|
||||
[ai]
|
||||
enabled = true
|
||||
provider = "ollama"
|
||||
timeout_seconds = 120
|
||||
|
||||
[ai.summary]
|
||||
enabled = true
|
||||
temperature = 0.1
|
||||
max_tokens = 1000
|
||||
|
||||
[ai.tagging]
|
||||
enabled = true
|
||||
temperature = 0.3
|
||||
max_tokens = 200
|
||||
max_tags_per_article = 10
|
||||
min_confidence_threshold = 0.7
|
||||
enable_geographic_tagging = true
|
||||
enable_category_tagging = true
|
||||
geographic_hierarchy_levels = 3 # country, region, city
|
||||
|
||||
[scraping]
|
||||
timeout_seconds = 30
|
||||
max_retries = 3
|
||||
max_content_length = 50000
|
||||
respect_robots_txt = true
|
||||
rate_limit_delay_ms = 1000
|
||||
|
||||
[processing]
|
||||
batch_size = 10
|
||||
max_concurrent = 5
|
||||
retry_attempts = 3
|
||||
priority_manual = true
|
||||
auto_mark_read_on_view = false
|
||||
|
||||
[migration]
|
||||
auto_convert_country_filters = true
|
||||
preserve_legacy_data = true
|
||||
migration_batch_size = 100
|
||||
|
||||
[cli]
|
||||
default_output = "table"
|
||||
pager_command = "less"
|
||||
show_progress = true
|
||||
auto_confirm_bulk = false
|
||||
show_geographic_hierarchy = true
|
||||
```
|
||||
|
||||
## Migration Strategy for Country-Based Filtering
|
||||
|
||||
### Automatic Migration Process
|
||||
1. **Data Analysis**: Scan existing country filter data and RSS feed origins
|
||||
2. **Tag Generation**: Create geographic tags for each country with hierarchical structure
|
||||
3. **Filter Conversion**: Convert country-based filters to tag-based equivalents
|
||||
4. **User Notification**: Inform users about the migration and new capabilities
|
||||
5. **Gradual Rollout**: Maintain backward compatibility during transition period
|
||||
|
||||
### Enhanced Geographic Features
|
||||
- **Hierarchical Display**: Country → Region → City tag hierarchy
|
||||
- **Visual Map Integration**: Interactive geographic filtering via map interface
|
||||
- **Smart Suggestions**: Related location and content suggestions
|
||||
- **Multi-Level Filtering**: Filter by specific cities, regions, or broader geographic areas
|
||||
- **Source Intelligence**: AI detection of article geographic relevance beyond RSS origin
|
||||
|
||||
## Future Enhancements (Post Phase 3)
|
||||
|
||||
### Advanced Analytics
|
||||
- **Machine Learning Insights**: Content recommendation based on reading patterns and geographic preferences
|
||||
- **Predictive Analytics**: Trending topic prediction with geographic context
|
||||
- **Behavioral Analysis**: Reading habit optimization suggestions
|
||||
- **Comparative Analytics**: Benchmark against reading goals and regional averages
|
||||
|
||||
### Enhanced Content Management
|
||||
- **Smart Collections**: AI-curated article collections with geographic themes
|
||||
- **Reading Lists**: Planned reading with progress tracking
|
||||
- **Content Relationships**: Related article suggestions with geographic relevance
|
||||
- **Advanced Search**: Full-text search with relevance scoring and geographic weighting
|
||||
|
||||
### Social & Collaboration Features
|
||||
- **Reading Groups**: Shared reading lists and discussions with geographic focus
|
||||
- **Social Sharing**: Integration with social platforms
|
||||
- **Collaborative Tagging**: Community-driven content organization
|
||||
- **Reading Challenges**: Gamification of reading habits with geographic themes
|
||||
|
||||
### Integration & Extensibility
|
||||
- **Browser Extension**: Seamless article saving and reading
|
||||
- **Mobile Apps**: Native iOS/Android applications with location awareness
|
||||
- **API Ecosystem**: Third-party integrations and plugins
|
||||
- **Webhook System**: Real-time notifications and integrations with geographic filtering
|
72
backend-rust/TODO.md
Normal file
72
backend-rust/TODO.md
Normal file
@@ -0,0 +1,72 @@
|
||||
## CPU and resource limiting
|
||||
- Tokio worker threads
|
||||
- Decide thread policy:
|
||||
- Option A: set TOKIO_WORKER_THREADS in the environment for deployments.
|
||||
- Option B: build a custom runtime with tokio::runtime::Builder::new_multi_thread().worker_threads(n).
|
||||
|
||||
- Document your default policy (e.g., 50% of physical cores).
|
||||
|
||||
- Concurrency guard for CPU-heavy tasks
|
||||
- Create a global tokio::sync::Semaphore with N permits (N = allowed concurrent heavy tasks).
|
||||
- Acquire a permit before invoking heavy module operations; release automatically on drop.
|
||||
- Expose the semaphore in app state so handlers/jobs can share it.
|
||||
|
||||
- HTTP backpressure and rate limiting (if using API)
|
||||
- Add tower::limit::ConcurrencyLimitLayer to cap in-flight requests.
|
||||
- Add tower::limit::RateLimitLayer or request-size/timeouts as needed.
|
||||
- Optionally add tower::timeout::TimeoutLayer to bound handler latency.
|
||||
|
||||
- Stronger isolation (optional, later)
|
||||
- Evaluate running certain modules as separate processes for strict CPU caps.
|
||||
- Use cgroups v2 (Linux) or Job Objects (Windows) to bound CPU/memory per process.
|
||||
- Reuse the same JSON interface over IPC (e.g., stdio or a local socket).
|
||||
|
||||
## Build and run
|
||||
- Build all crates
|
||||
- Run: cargo build --workspace
|
||||
|
||||
- Build each plugin as cdylib
|
||||
- Example: cd crates/modules/summarizer && cargo build --release
|
||||
|
||||
- Stage plugin libraries for the host to find
|
||||
- Create a modules directory the daemon will read, e.g. target/modules
|
||||
- Copy the built artifact into that directory:
|
||||
- Linux: copy target/release/libsummarizer.so -> target/modules/libsummarizer.so
|
||||
- macOS: copy target/release/libsummarizer.dylib -> target/modules/libsummarizer.dylib
|
||||
- Windows: copy target/release/summarizer.dll -> target/modules/summarizer.dll
|
||||
|
||||
- Alternatively set OWLY_MODULES_DIR to your chosen directory.
|
||||
|
||||
- Run the daemon
|
||||
- cargo run -p owly-news
|
||||
- Optionally set:
|
||||
- OWLY_MODULES_DIR=/absolute/path/to/modules
|
||||
- TOKIO_WORKER_THREADS=N
|
||||
|
||||
## Wire into the API
|
||||
- Share ModuleHost in app state
|
||||
- Create a struct AppState { host: Arc, cpu_sem: Arc , ... }.
|
||||
- Add AppState to Axum with .with_state(state).
|
||||
|
||||
- In a handler (example: POST /summarize)
|
||||
- Parse payload as JSON.
|
||||
- Acquire a permit from cpu_sem before heavy work.
|
||||
- host.get("summarizer").await? to lazily load the module.
|
||||
- Call module.invoke_json("summarize", payload_value)?.
|
||||
- Map success to 200 with JSON; map errors to appropriate status codes.
|
||||
|
||||
- Error handling and observability
|
||||
- Use thiserror/anyhow to classify operational vs. client errors.
|
||||
- Add tracing spans around module loading and invocation; include module name and op.
|
||||
- Return structured error JSON when module reports an error.
|
||||
|
||||
- Configuration
|
||||
- Decide env vars and defaults: OWLY_MODULES_DIR, TOKIO_WORKER_THREADS, concurrency permits, rate limits.
|
||||
- Optionally add a config file (toml) and load via figment or config crate.
|
||||
|
||||
- Health and lifecycle
|
||||
- Add a /health route that checks:
|
||||
- Tokio is responsive.
|
||||
- Optional: preflight-check that required modules are present (or skip to keep lazy).
|
||||
|
||||
- Graceful shutdown: listen for SIGINT/SIGTERM and drain in-flight requests before exit.
|
7
backend-rust/crates/api/Cargo.lock
generated
Normal file
7
backend-rust/crates/api/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.1.0"
|
18
backend-rust/crates/api/Cargo.toml
Normal file
18
backend-rust/crates/api/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "api"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
async-trait = "0.1.89"
|
||||
axum = { workspace = true }
|
||||
sqlx = { workspace = true, features = ["sqlite"] }
|
||||
|
||||
[features]
|
||||
default = []
|
3
backend-rust/crates/api/src/api.rs
Normal file
3
backend-rust/crates/api/src/api.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod handlers;
|
||||
pub mod middleware;
|
||||
pub mod routes;
|
41
backend-rust/crates/api/src/api/handlers.rs
Normal file
41
backend-rust/crates/api/src/api/handlers.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use axum::Json;
|
||||
use axum::extract::State;
|
||||
use serde_json::Value;
|
||||
use sqlx::SqlitePool;
|
||||
|
||||
pub async fn get_articles(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
|
||||
// TODO: Article logic
|
||||
Ok(Json(serde_json::json!({"articles": []})))
|
||||
}
|
||||
|
||||
pub async fn get_summaries(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
|
||||
// TODO: Summaries logic
|
||||
Ok(Json(serde_json::json!({"summaries": []})))
|
||||
}
|
||||
|
||||
use axum::{
|
||||
http::StatusCode,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
|
||||
pub struct AppError(anyhow::Error);
|
||||
|
||||
impl IntoResponse for AppError {
|
||||
fn into_response(self) -> Response {
|
||||
let (status, message) = match self.0.downcast_ref::<sqlx::Error>() {
|
||||
Some(_) => (StatusCode::INTERNAL_SERVER_ERROR, "Database error occurred"),
|
||||
None => (StatusCode::INTERNAL_SERVER_ERROR, "An error occurred"),
|
||||
};
|
||||
|
||||
tracing::error!("API Error: {:?}", self.0);
|
||||
(status, message).into_response()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> From<E> for AppError
|
||||
where
|
||||
E: Into<anyhow::Error>, {
|
||||
fn from(err: E) -> Self {
|
||||
Self(err.into())
|
||||
}
|
||||
}
|
0
backend-rust/crates/api/src/api/middleware.rs
Normal file
0
backend-rust/crates/api/src/api/middleware.rs
Normal file
11
backend-rust/crates/api/src/api/routes.rs
Normal file
11
backend-rust/crates/api/src/api/routes.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use axum::Router;
|
||||
use axum::routing::get;
|
||||
use sqlx::SqlitePool;
|
||||
use crate::api::handlers;
|
||||
|
||||
pub fn routes() -> Router<SqlitePool> {
|
||||
Router::new()
|
||||
.route("/articles", get(handlers::get_articles))
|
||||
.route("/summaries", get(handlers::get_summaries))
|
||||
// Add more routes as needed
|
||||
}
|
1003
backend-rust/crates/api/src/config.rs
Normal file
1003
backend-rust/crates/api/src/config.rs
Normal file
File diff suppressed because it is too large
Load Diff
6
backend-rust/crates/api/src/lib.rs
Normal file
6
backend-rust/crates/api/src/lib.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
//! API-first core: shared types, DTOs, service traits, configuration.
|
||||
|
||||
pub mod config;
|
||||
pub mod types;
|
||||
pub mod services;
|
||||
pub mod api;
|
28
backend-rust/crates/api/src/services.rs
Normal file
28
backend-rust/crates/api/src/services.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use crate::types::Health;
|
||||
use async_trait::async_trait;
|
||||
|
||||
// Submodules that host various domain services. These were refactored from the
|
||||
// legacy root src folder into this workspace crate. Each component is its own module file.
|
||||
pub mod summary_service;
|
||||
pub mod news_service;
|
||||
pub mod scraping_service;
|
||||
pub mod tagging_service;
|
||||
pub mod analytics_service;
|
||||
pub mod sharing_service;
|
||||
pub(crate) mod content_processor;
|
||||
|
||||
// Implement your service traits here. Example:
|
||||
#[async_trait]
|
||||
pub trait HealthService: Send + Sync {
|
||||
async fn health(&self) -> Health;
|
||||
}
|
||||
|
||||
// A trivial default implementation that can be used by server and tests.
|
||||
pub struct DefaultHealthService;
|
||||
|
||||
#[async_trait]
|
||||
impl HealthService for DefaultHealthService {
|
||||
async fn health(&self) -> Health {
|
||||
Health { status: "ok".into() }
|
||||
}
|
||||
}
|
@@ -0,0 +1,4 @@
|
||||
//! Analytics service module.
|
||||
//! Implement logic for tracking and aggregating analytics here.
|
||||
|
||||
// Placeholder for analytics-related types and functions.
|
@@ -0,0 +1,3 @@
|
||||
//! Content processor utilities shared by services.
|
||||
|
||||
// Placeholder module for content processing helpers (e.g., cleaning, tokenization).
|
4
backend-rust/crates/api/src/services/news_service.rs
Normal file
4
backend-rust/crates/api/src/services/news_service.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! News service module.
|
||||
//! Implement logic related to news retrieval/management here.
|
||||
|
||||
// Placeholder for news-related types and functions.
|
4
backend-rust/crates/api/src/services/scraping_service.rs
Normal file
4
backend-rust/crates/api/src/services/scraping_service.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Scraping service module.
|
||||
//! Implement logic related to web scraping, fetchers, and extractors here.
|
||||
|
||||
// Placeholder for scraping-related types and functions.
|
4
backend-rust/crates/api/src/services/sharing_service.rs
Normal file
4
backend-rust/crates/api/src/services/sharing_service.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Sharing service module.
|
||||
//! Implement logic related to content sharing here.
|
||||
|
||||
// Placeholder for sharing-related types and functions.
|
4
backend-rust/crates/api/src/services/summary_service.rs
Normal file
4
backend-rust/crates/api/src/services/summary_service.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Summary service module.
|
||||
//! Implement logic for generating summaries from articles here.
|
||||
|
||||
// Placeholder for summary-related types and functions.
|
4
backend-rust/crates/api/src/services/tagging_service.rs
Normal file
4
backend-rust/crates/api/src/services/tagging_service.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
//! Tagging service module.
|
||||
//! Implement logic related to tagging articles and managing tags here.
|
||||
|
||||
// Placeholder for tagging-related types and functions.
|
6
backend-rust/crates/api/src/types.rs
Normal file
6
backend-rust/crates/api/src/types.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Health {
|
||||
pub status: String,
|
||||
}
|
15
backend-rust/crates/cli/Cargo.toml
Normal file
15
backend-rust/crates/cli/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
||||
[package]
|
||||
name = "cli"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
toml = { workspace = true }
|
||||
dotenv = { workspace = true }
|
||||
|
||||
api = { path = "../api" }
|
||||
server = { path = "../server" }
|
70
backend-rust/crates/cli/src/main.rs
Normal file
70
backend-rust/crates/cli/src/main.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
use anyhow::Result;
|
||||
use api::config::Cli;
|
||||
use dotenv::dotenv;
|
||||
use std::{env, net::SocketAddr, str::FromStr};
|
||||
use tokio::signal;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
match args.get(1).map(|s| s.as_str()) {
|
||||
Some("serve") => serve(args).await,
|
||||
Some("print-config") => print_config(),
|
||||
_ => {
|
||||
print_help();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_help() {
|
||||
eprintln!(
|
||||
"Usage:
|
||||
cli serve [--addr 0.0.0.0:8080]
|
||||
cli print-config
|
||||
|
||||
Environment:
|
||||
These may influence runtime behavior.
|
||||
|
||||
Notes:
|
||||
- 'serve' runs the HTTP server.
|
||||
- 'print-config' prints the default CLI configuration in JSON."
|
||||
);
|
||||
}
|
||||
|
||||
async fn serve(args: Vec<String>) -> Result<()> {
|
||||
// naive flag parse: look for "--addr host:port"
|
||||
let mut addr: SocketAddr = SocketAddr::from_str("127.0.0.1:8080")?;
|
||||
let mut i = 2;
|
||||
while i + 1 < args.len() {
|
||||
if args[i] == "--addr" {
|
||||
addr = SocketAddr::from_str(&args[i + 1])?;
|
||||
i += 2;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let server_task = tokio::spawn(async move { server::start_server(addr).await });
|
||||
|
||||
// graceful shutdown via Ctrl+C
|
||||
tokio::select! {
|
||||
res = server_task => {
|
||||
res??;
|
||||
}
|
||||
_ = signal::ctrl_c() => {
|
||||
eprintln!("Shutting down...");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_config() -> Result<()> {
|
||||
let cfg = Cli::default();
|
||||
let json = serde_json::to_string_pretty(&cfg)?;
|
||||
println!("{json}");
|
||||
Ok(())
|
||||
}
|
10
backend-rust/crates/db/Cargo.toml
Normal file
10
backend-rust/crates/db/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "db"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
sqlx = { workspace = true, features = ["sqlite"] }
|
||||
tracing = { workspace = true }
|
||||
api = { path = "../api" }
|
44
backend-rust/crates/db/src/lib.rs
Normal file
44
backend-rust/crates/db/src/lib.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use api::config::AppSettings;
|
||||
use anyhow::{Context, Result};
|
||||
use sqlx::migrate::Migrator;
|
||||
use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
|
||||
use sqlx::{Pool, Sqlite, SqlitePool};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use tracing::info;
|
||||
|
||||
// Embed migrations from the workspace-level migrations directory.
|
||||
// crates/db is two levels below backend-rust where migrations/ resides.
|
||||
pub const MIGRATOR: Migrator = sqlx::migrate!("../../migrations");
|
||||
|
||||
pub async fn initialize_db(app_settings: &AppSettings) -> Result<Pool<Sqlite>> {
|
||||
app_settings
|
||||
.ensure_default_directory()
|
||||
.context("Failed to ensure default directory for database")?;
|
||||
|
||||
let options = SqliteConnectOptions::from_str(&app_settings.database_url())?
|
||||
.create_if_missing(true)
|
||||
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
||||
.foreign_keys(true);
|
||||
|
||||
let pool = SqlitePoolOptions::new()
|
||||
.max_connections(20)
|
||||
.min_connections(5)
|
||||
.acquire_timeout(Duration::from_secs(30))
|
||||
.idle_timeout(Duration::from_secs(600))
|
||||
.connect_with(options)
|
||||
.await?;
|
||||
|
||||
MIGRATOR
|
||||
.run(&pool)
|
||||
.await
|
||||
.with_context(|| "Database migrations failed")?;
|
||||
info!("Database migrations completed successfully");
|
||||
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
pub async fn create_pool(opts: SqliteConnectOptions) -> Result<SqlitePool> {
|
||||
let pool = SqlitePool::connect_with(opts).await?;
|
||||
Ok(pool)
|
||||
}
|
23
backend-rust/crates/server/Cargo.toml
Normal file
23
backend-rust/crates/server/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
[package]
|
||||
name = "server"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
axum = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
sqlx = { workspace = true, features = ["sqlite"] }
|
||||
dotenv = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
|
||||
api = { path = "../api" }
|
||||
db = { path = "../db" }
|
||||
http = "1.3.1"
|
||||
|
||||
[features]
|
||||
default = []
|
63
backend-rust/crates/server/src/lib.rs
Normal file
63
backend-rust/crates/server/src/lib.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use axum::{routing::get, Json, Router};
|
||||
use std::{net::SocketAddr, sync::Arc};
|
||||
use tokio::net::TcpListener;
|
||||
use tracing::{info, level_filters::LevelFilter};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use api::services::{DefaultHealthService, HealthService};
|
||||
use api::types::Health;
|
||||
use api::config::AppSettings;
|
||||
|
||||
pub struct AppState {
|
||||
pub health_service: Arc<dyn HealthService>,
|
||||
}
|
||||
|
||||
pub async fn build_router(state: Arc<AppState>) -> Router {
|
||||
Router::new().route(
|
||||
"/health",
|
||||
get({
|
||||
let state = state.clone();
|
||||
move || health_handler(state.clone())
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
async fn health_handler(state: Arc<AppState>) -> Json<Health> {
|
||||
let res = state.health_service.health().await;
|
||||
Json(res)
|
||||
}
|
||||
|
||||
pub async fn start_server(addr: SocketAddr) -> anyhow::Result<()> {
|
||||
init_tracing();
|
||||
|
||||
// Load application settings and initialize the database pool (sqlite).
|
||||
let app_settings = AppSettings::get_app_settings();
|
||||
let pool = db::initialize_db(&app_settings).await?;
|
||||
|
||||
let state = Arc::new(AppState {
|
||||
health_service: Arc::new(DefaultHealthService),
|
||||
});
|
||||
|
||||
// Base daemon router
|
||||
let app = build_router(state).await
|
||||
// Attach API under /api and provide DB state
|
||||
.nest("/api", api::api::routes::routes().with_state(pool.clone()));
|
||||
|
||||
let listener = TcpListener::bind(addr).await?;
|
||||
info!("HTTP server listening on http://{}", addr);
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn init_tracing() {
|
||||
let env_filter = EnvFilter::try_from_default_env()
|
||||
.or_else(|_| EnvFilter::try_new("info"))
|
||||
.unwrap()
|
||||
.add_directive(LevelFilter::INFO.into());
|
||||
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(env_filter)
|
||||
.with_target(true)
|
||||
.compact()
|
||||
.init();
|
||||
}
|
22
backend-rust/crates/server/tests/health.rs
Normal file
22
backend-rust/crates/server/tests/health.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
use axum::Router;
|
||||
use server::{build_router, AppState};
|
||||
use api::services::DefaultHealthService;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[tokio::test]
|
||||
async fn health_ok() {
|
||||
let state = Arc::new(AppState {
|
||||
health_service: Arc::new(DefaultHealthService),
|
||||
});
|
||||
|
||||
let app: Router = build_router(state).await;
|
||||
|
||||
let req = http::Request::builder()
|
||||
.uri("/health")
|
||||
.body(axum::body::Body::empty())
|
||||
.unwrap();
|
||||
|
||||
let res = axum::http::Request::from(req);
|
||||
let res = axum::http::Request::from(res);
|
||||
let _ = app; // You can use axum-test to send requests if desired.
|
||||
}
|
31
backend-rust/example.env
Normal file
31
backend-rust/example.env
Normal file
@@ -0,0 +1,31 @@
|
||||
# URL for the Ollama service
|
||||
OLLAMA_HOST=http://localhost:11434
|
||||
|
||||
# Interval for scheduled news fetching in hours
|
||||
CRON_HOURS=1
|
||||
|
||||
# Minimum interval for scheduled news fetching in hours
|
||||
MIN_CRON_HOURS=0.5
|
||||
|
||||
# Cooldown period in minutes between manual syncs
|
||||
SYNC_COOLDOWN_MINUTES=30
|
||||
|
||||
# LLM model to use for summarization
|
||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M
|
||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
|
||||
LLM_MODEL=mistral-nemo:12b
|
||||
|
||||
# Timeout in seconds for LLM requests
|
||||
LLM_TIMEOUT_SECONDS=180
|
||||
|
||||
# Timeout in seconds for Ollama API requests
|
||||
OLLAMA_API_TIMEOUT_SECONDS=10
|
||||
|
||||
# Timeout in seconds for article fetching
|
||||
ARTICLE_FETCH_TIMEOUT=30
|
||||
|
||||
# Maximum length of article content to process
|
||||
MAX_ARTICLE_LENGTH=5000
|
||||
|
||||
# SQLite database connection string
|
||||
DB_NAME=owlynews.sqlite3
|
5
backend-rust/migrations/001_initial_schema.down.sql
Normal file
5
backend-rust/migrations/001_initial_schema.down.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
DROP TABLE IF EXISTS meta;
|
||||
DROP TABLE IF EXISTS settings;
|
||||
DROP TABLE IF EXISTS feeds;
|
||||
DROP INDEX IF EXISTS idx_news_published;
|
||||
DROP TABLE IF EXISTS news;
|
38
backend-rust/migrations/001_initial_schema.up.sql
Normal file
38
backend-rust/migrations/001_initial_schema.up.sql
Normal file
@@ -0,0 +1,38 @@
|
||||
-- Initial database schema for Owly News Summariser
|
||||
|
||||
-- News table to store articles
|
||||
CREATE TABLE IF NOT EXISTS news
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
summary TEXT,
|
||||
url TEXT NOT NULL,
|
||||
published TIMESTAMP NOT NULL,
|
||||
country TEXT NOT NULL,
|
||||
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
);
|
||||
|
||||
-- Index for faster queries on published date
|
||||
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
|
||||
|
||||
-- Feeds table to store RSS feed sources
|
||||
CREATE TABLE IF NOT EXISTS feeds
|
||||
(
|
||||
id INTEGER PRIMARY KEY,
|
||||
country TEXT,
|
||||
url TEXT UNIQUE NOT NULL
|
||||
);
|
||||
|
||||
-- Settings table for application configuration
|
||||
CREATE TABLE IF NOT EXISTS settings
|
||||
(
|
||||
key TEXT PRIMARY KEY,
|
||||
val TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Meta table for application metadata
|
||||
CREATE TABLE IF NOT EXISTS meta
|
||||
(
|
||||
key TEXT PRIMARY KEY,
|
||||
val TEXT NOT NULL
|
||||
);
|
18
backend-rust/migrations/002_add_category_to_news.down.sql
Normal file
18
backend-rust/migrations/002_add_category_to_news.down.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
CREATE TABLE news_backup
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
summary TEXT,
|
||||
url TEXT NOT NULL,
|
||||
published TIMESTAMP NOT NULL,
|
||||
country TEXT NOT NULL,
|
||||
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
);
|
||||
|
||||
INSERT INTO news_backup
|
||||
SELECT id, title, summary, url, published, country, created_at
|
||||
FROM news;
|
||||
DROP TABLE news;
|
||||
ALTER TABLE news_backup
|
||||
RENAME TO news;
|
||||
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
|
3
backend-rust/migrations/002_add_category_to_news.up.sql
Normal file
3
backend-rust/migrations/002_add_category_to_news.up.sql
Normal file
@@ -0,0 +1,3 @@
|
||||
-- Add category field to news table
|
||||
ALTER TABLE news
|
||||
ADD COLUMN category TEXT;
|
@@ -0,0 +1,7 @@
|
||||
-- Drop articles table and its indexes
|
||||
DROP INDEX IF EXISTS idx_articles_read_at;
|
||||
DROP INDEX IF EXISTS idx_articles_source_type;
|
||||
DROP INDEX IF EXISTS idx_articles_processing_status;
|
||||
DROP INDEX IF EXISTS idx_articles_added_at;
|
||||
DROP INDEX IF EXISTS idx_articles_published_at;
|
||||
DROP TABLE IF EXISTS articles;
|
27
backend-rust/migrations/003_create_articles_table.up.sql
Normal file
27
backend-rust/migrations/003_create_articles_table.up.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Create enhanced articles table to replace news table structure
|
||||
CREATE TABLE IF NOT EXISTS articles
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
source_type TEXT NOT NULL DEFAULT 'rss', -- 'rss', 'manual'
|
||||
rss_content TEXT, -- RSS description/excerpt
|
||||
full_content TEXT, -- Scraped full content
|
||||
summary TEXT, -- AI-generated summary
|
||||
processing_status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'processing', 'completed', 'failed'
|
||||
published_at TIMESTAMP NOT NULL,
|
||||
added_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||
read_at TIMESTAMP,
|
||||
read_count INTEGER NOT NULL DEFAULT 0,
|
||||
reading_time INTEGER, -- in seconds
|
||||
ai_enabled BOOLEAN NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create indexes for performance
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles (published_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_added_at ON articles (added_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_processing_status ON articles (processing_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_source_type ON articles (source_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_read_at ON articles (read_at);
|
9
backend-rust/migrations/004_create_tags_table.down.sql
Normal file
9
backend-rust/migrations/004_create_tags_table.down.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
-- Drop tag system tables and indexes
|
||||
DROP INDEX IF EXISTS idx_article_tags_ai_generated;
|
||||
DROP INDEX IF EXISTS idx_article_tags_tag_id;
|
||||
DROP INDEX IF EXISTS idx_article_tags_article_id;
|
||||
DROP INDEX IF EXISTS idx_tags_usage_count;
|
||||
DROP INDEX IF EXISTS idx_tags_parent_id;
|
||||
DROP INDEX IF EXISTS idx_tags_category;
|
||||
DROP TABLE IF EXISTS article_tags;
|
||||
DROP TABLE IF EXISTS tags;
|
31
backend-rust/migrations/004_create_tags_table.up.sql
Normal file
31
backend-rust/migrations/004_create_tags_table.up.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
-- Create tags table with hierarchical support
|
||||
CREATE TABLE IF NOT EXISTS tags
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
category TEXT NOT NULL, -- 'geographic', 'content', 'source', 'custom'
|
||||
description TEXT,
|
||||
color TEXT, -- Hex color for UI display
|
||||
usage_count INTEGER NOT NULL DEFAULT 0,
|
||||
parent_id INTEGER REFERENCES tags (id), -- For hierarchical tags (e.g., Country -> Region -> City)
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create article_tags junction table
|
||||
CREATE TABLE IF NOT EXISTS article_tags
|
||||
(
|
||||
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
|
||||
tag_id INTEGER NOT NULL REFERENCES tags (id) ON DELETE CASCADE,
|
||||
confidence_score REAL DEFAULT 1.0, -- AI confidence (0.0-1.0)
|
||||
ai_generated BOOLEAN NOT NULL DEFAULT 0,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||
PRIMARY KEY (article_id, tag_id)
|
||||
);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags (category);
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_parent_id ON tags (parent_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_usage_count ON tags (usage_count DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_article_tags_article_id ON article_tags (article_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_article_tags_tag_id ON article_tags (tag_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_article_tags_ai_generated ON article_tags (ai_generated);
|
11
backend-rust/migrations/005_create_statistics_table.down.sql
Normal file
11
backend-rust/migrations/005_create_statistics_table.down.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Drop analytics system tables and indexes
|
||||
DROP INDEX IF EXISTS idx_legacy_migration_old_filter_type;
|
||||
DROP INDEX IF EXISTS idx_share_templates_format;
|
||||
DROP INDEX IF EXISTS idx_filter_presets_user_id;
|
||||
DROP INDEX IF EXISTS idx_reading_stats_read_at;
|
||||
DROP INDEX IF EXISTS idx_reading_stats_article_id;
|
||||
DROP INDEX IF EXISTS idx_reading_stats_user_id;
|
||||
DROP TABLE IF EXISTS legacy_migration;
|
||||
DROP TABLE IF EXISTS share_templates;
|
||||
DROP TABLE IF EXISTS filter_presets;
|
||||
DROP TABLE IF EXISTS reading_stats;
|
50
backend-rust/migrations/005_create_statistics_table.up.sql
Normal file
50
backend-rust/migrations/005_create_statistics_table.up.sql
Normal file
@@ -0,0 +1,50 @@
|
||||
-- Create reading statistics table
|
||||
CREATE TABLE IF NOT EXISTS reading_stats
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
user_id INTEGER DEFAULT 1, -- For future multi-user support
|
||||
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
|
||||
read_at TIMESTAMP NOT NULL,
|
||||
reading_time INTEGER, -- in seconds
|
||||
completion_rate REAL DEFAULT 1.0, -- 0.0-1.0, how much of the article was read
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create filter presets table
|
||||
CREATE TABLE IF NOT EXISTS filter_presets
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
filter_criteria TEXT NOT NULL, -- JSON string of filter parameters
|
||||
user_id INTEGER DEFAULT 1, -- For future multi-user support
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create share templates table
|
||||
CREATE TABLE IF NOT EXISTS share_templates
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
format TEXT NOT NULL, -- 'text', 'markdown', 'html', 'json'
|
||||
template_content TEXT NOT NULL,
|
||||
is_default BOOLEAN NOT NULL DEFAULT 0,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create legacy migration tracking table
|
||||
CREATE TABLE IF NOT EXISTS legacy_migration
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
old_filter_type TEXT NOT NULL, -- 'country', 'category', etc.
|
||||
old_value TEXT NOT NULL,
|
||||
new_tag_ids TEXT, -- JSON array of tag IDs
|
||||
migrated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_reading_stats_user_id ON reading_stats (user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_reading_stats_article_id ON reading_stats (article_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_reading_stats_read_at ON reading_stats (read_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_filter_presets_user_id ON filter_presets (user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_share_templates_format ON share_templates (format);
|
||||
CREATE INDEX IF NOT EXISTS idx_legacy_migration_old_filter_type ON legacy_migration (old_filter_type);
|
18
backend-rust/migrations/006_update_settings_table.down.sql
Normal file
18
backend-rust/migrations/006_update_settings_table.down.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Remove enhanced settings columns and indexes
|
||||
DROP INDEX IF EXISTS idx_settings_user_id;
|
||||
DROP INDEX IF EXISTS idx_settings_category;
|
||||
|
||||
-- Note: SQLite doesn't support DROP COLUMN, so we recreate the table
|
||||
CREATE TABLE settings_backup AS
|
||||
SELECT key, val
|
||||
FROM settings;
|
||||
DROP TABLE settings;
|
||||
CREATE TABLE settings
|
||||
(
|
||||
key TEXT PRIMARY KEY,
|
||||
val TEXT NOT NULL
|
||||
);
|
||||
INSERT INTO settings
|
||||
SELECT key, val
|
||||
FROM settings_backup;
|
||||
DROP TABLE settings_backup;
|
74
backend-rust/migrations/006_update_settings_table.up.sql
Normal file
74
backend-rust/migrations/006_update_settings_table.up.sql
Normal file
@@ -0,0 +1,74 @@
|
||||
-- Enhance settings table to support more structured configuration
|
||||
ALTER TABLE settings
|
||||
ADD COLUMN category TEXT DEFAULT 'general';
|
||||
ALTER TABLE settings
|
||||
ADD COLUMN user_id INTEGER DEFAULT 1;
|
||||
ALTER TABLE settings
|
||||
ADD COLUMN updated_at TIMESTAMP DEFAULT (datetime('now'));
|
||||
|
||||
-- Create index for better performance
|
||||
CREATE INDEX IF NOT EXISTS idx_settings_category ON settings (category);
|
||||
CREATE INDEX IF NOT EXISTS idx_settings_user_id ON settings (user_id);
|
||||
|
||||
-- Insert default settings based on roadmap configuration
|
||||
INSERT OR IGNORE INTO settings (key, val, category)
|
||||
VALUES
|
||||
-- Display settings
|
||||
('default_view', 'compact', 'display'),
|
||||
('articles_per_page', '50', 'display'),
|
||||
('show_reading_time', '1', 'display'),
|
||||
('show_word_count', '0', 'display'),
|
||||
('highlight_unread', '1', 'display'),
|
||||
('theme', 'auto', 'display'),
|
||||
|
||||
-- Analytics settings
|
||||
('analytics_enabled', '1', 'analytics'),
|
||||
('track_reading_time', '1', 'analytics'),
|
||||
('track_scroll_position', '1', 'analytics'),
|
||||
('retention_days', '365', 'analytics'),
|
||||
('aggregate_older_data', '1', 'analytics'),
|
||||
|
||||
-- Filtering settings
|
||||
('enable_smart_suggestions', '1', 'filtering'),
|
||||
('max_recent_filters', '10', 'filtering'),
|
||||
('auto_save_filters', '1', 'filtering'),
|
||||
('default_sort', 'added_desc', 'filtering'),
|
||||
('enable_geographic_hierarchy', '1', 'filtering'),
|
||||
('auto_migrate_country_filters', '1', 'filtering'),
|
||||
|
||||
-- Sharing settings
|
||||
('default_share_format', 'text', 'sharing'),
|
||||
('include_summary', '1', 'sharing'),
|
||||
('include_tags', '1', 'sharing'),
|
||||
('include_source', '1', 'sharing'),
|
||||
('copy_to_clipboard', '1', 'sharing'),
|
||||
|
||||
-- AI settings
|
||||
('ai_enabled', '1', 'ai'),
|
||||
('ai_provider', 'ollama', 'ai'),
|
||||
('ai_timeout_seconds', '120', 'ai'),
|
||||
('ai_summary_enabled', '1', 'ai'),
|
||||
('ai_summary_temperature', '0.1', 'ai'),
|
||||
('ai_summary_max_tokens', '1000', 'ai'),
|
||||
('ai_tagging_enabled', '1', 'ai'),
|
||||
('ai_tagging_temperature', '0.3', 'ai'),
|
||||
('ai_tagging_max_tokens', '200', 'ai'),
|
||||
('max_tags_per_article', '10', 'ai'),
|
||||
('min_confidence_threshold', '0.7', 'ai'),
|
||||
('enable_geographic_tagging', '1', 'ai'),
|
||||
('enable_category_tagging', '1', 'ai'),
|
||||
('geographic_hierarchy_levels', '3', 'ai'),
|
||||
|
||||
-- Scraping settings
|
||||
('scraping_timeout_seconds', '30', 'scraping'),
|
||||
('scraping_max_retries', '3', 'scraping'),
|
||||
('max_content_length', '50000', 'scraping'),
|
||||
('respect_robots_txt', '1', 'scraping'),
|
||||
('rate_limit_delay_ms', '1000', 'scraping'),
|
||||
|
||||
-- Processing settings
|
||||
('batch_size', '10', 'processing'),
|
||||
('max_concurrent', '5', 'processing'),
|
||||
('retry_attempts', '3', 'processing'),
|
||||
('priority_manual', '1', 'processing'),
|
||||
('auto_mark_read_on_view', '0', 'processing');
|
@@ -0,0 +1,39 @@
|
||||
-- Remove migrated data (this will remove all articles and tags created from migration)
|
||||
-- WARNING: This will delete all migrated data
|
||||
|
||||
-- Remove legacy migration records
|
||||
DELETE
|
||||
FROM legacy_migration
|
||||
WHERE old_filter_type IN ('country', 'category');
|
||||
|
||||
-- Remove article-tag associations for migrated data (non-AI generated)
|
||||
DELETE
|
||||
FROM article_tags
|
||||
WHERE ai_generated = 0;
|
||||
|
||||
-- Remove migrated geographic tags (only those created from country data)
|
||||
DELETE
|
||||
FROM tags
|
||||
WHERE tags.category = 'geographic'
|
||||
AND EXISTS (SELECT 1 FROM news WHERE news.country = tags.name);
|
||||
|
||||
-- Remove migrated content tags (only those created from category data)
|
||||
DELETE
|
||||
FROM tags
|
||||
WHERE tags.category = 'content'
|
||||
AND EXISTS (SELECT 1 FROM news WHERE news.category = tags.name);
|
||||
|
||||
-- Remove migrated articles (only those that match news entries)
|
||||
DELETE
|
||||
FROM articles
|
||||
WHERE EXISTS (SELECT 1
|
||||
FROM news
|
||||
WHERE news.url = articles.url
|
||||
AND news.title = articles.title
|
||||
AND articles.source_type = 'rss');
|
||||
|
||||
-- Reset tag usage counts
|
||||
UPDATE tags
|
||||
SET usage_count = (SELECT COUNT(*)
|
||||
FROM article_tags
|
||||
WHERE tag_id = tags.id);
|
@@ -0,0 +1,84 @@
|
||||
|
||||
-- Migrate data from old news table to new articles table
|
||||
INSERT INTO articles (title, url, summary, published_at, added_at, source_type, processing_status)
|
||||
SELECT title,
|
||||
url,
|
||||
summary,
|
||||
published,
|
||||
datetime(created_at, 'unixepoch'),
|
||||
'rss',
|
||||
CASE
|
||||
WHEN summary IS NOT NULL AND summary != '' THEN 'completed'
|
||||
ELSE 'pending'
|
||||
END
|
||||
FROM news;
|
||||
|
||||
-- Create geographic tags from existing country data
|
||||
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
|
||||
SELECT DISTINCT country,
|
||||
'geographic',
|
||||
'Geographic location: ' || country,
|
||||
COUNT(*)
|
||||
FROM news
|
||||
WHERE country IS NOT NULL
|
||||
AND country != ''
|
||||
GROUP BY country;
|
||||
|
||||
-- Link articles to their geographic tags
|
||||
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
|
||||
SELECT a.id,
|
||||
t.id,
|
||||
0, -- Not AI generated, migrated from legacy data
|
||||
1.0 -- Full confidence for existing data
|
||||
FROM articles a
|
||||
JOIN news n ON a.url = n.url AND a.title = n.title
|
||||
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
|
||||
WHERE n.country IS NOT NULL
|
||||
AND n.country != '';
|
||||
|
||||
-- Create category tags if category column exists in news table
|
||||
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
|
||||
SELECT DISTINCT n.category,
|
||||
'content',
|
||||
'Content category: ' || n.category,
|
||||
COUNT(*)
|
||||
FROM news n
|
||||
WHERE n.category IS NOT NULL
|
||||
AND n.category != ''
|
||||
GROUP BY n.category;
|
||||
|
||||
-- Link articles to their category tags
|
||||
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
|
||||
SELECT a.id,
|
||||
t.id,
|
||||
0, -- Not AI generated, migrated from legacy data
|
||||
1.0 -- Full confidence for existing data
|
||||
FROM articles a
|
||||
JOIN news n ON a.url = n.url AND a.title = n.title
|
||||
JOIN tags t ON t.name = n.category AND t.category = 'content'
|
||||
WHERE n.category IS NOT NULL
|
||||
AND n.category != '';
|
||||
|
||||
-- Record migration in legacy_migration table for countries
|
||||
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
|
||||
SELECT 'country',
|
||||
n.country,
|
||||
'[' || GROUP_CONCAT(t.id) || ']'
|
||||
FROM (SELECT DISTINCT country FROM news WHERE country IS NOT NULL AND country != '') n
|
||||
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
|
||||
GROUP BY n.country;
|
||||
|
||||
-- Record migration in legacy_migration table for categories (if they exist)
|
||||
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
|
||||
SELECT 'category',
|
||||
n.category,
|
||||
'[' || GROUP_CONCAT(t.id) || ']'
|
||||
FROM (SELECT DISTINCT category FROM news WHERE category IS NOT NULL AND category != '') n
|
||||
JOIN tags t ON t.name = n.category AND t.category = 'content'
|
||||
GROUP BY n.category;
|
||||
|
||||
-- Update tag usage counts
|
||||
UPDATE tags
|
||||
SET usage_count = (SELECT COUNT(*)
|
||||
FROM article_tags
|
||||
WHERE tag_id = tags.id);
|
@@ -0,0 +1,4 @@
|
||||
-- Remove default sharing templates
|
||||
DELETE
|
||||
FROM share_templates
|
||||
WHERE name IN ('Default Text', 'Markdown', 'Simple Text', 'HTML Email');
|
@@ -0,0 +1,39 @@
|
||||
-- Insert default sharing templates
|
||||
INSERT INTO share_templates (name, format, template_content, is_default)
|
||||
VALUES ('Default Text', 'text', '📰 {title}
|
||||
|
||||
{summary}
|
||||
|
||||
🏷️ Tags: {tags}
|
||||
🌍 Location: {geographic_tags}
|
||||
🔗 Source: {url}
|
||||
📅 Published: {published_at}
|
||||
|
||||
Shared via Owly News Summariser', 1),
|
||||
|
||||
('Markdown', 'markdown', '# {title}
|
||||
|
||||
{summary}
|
||||
|
||||
**Tags:** {tags}
|
||||
**Location:** {geographic_tags}
|
||||
**Source:** [{url}]({url})
|
||||
**Published:** {published_at}
|
||||
|
||||
---
|
||||
*Shared via Owly News Summariser*', 1),
|
||||
|
||||
('Simple Text', 'text', '{title}
|
||||
|
||||
{summary}
|
||||
|
||||
Source: {url}', 0),
|
||||
|
||||
('HTML Email', 'html', '<h2>{title}</h2>
|
||||
<p>{summary}</p>
|
||||
<p><strong>Tags:</strong> {tags}<br>
|
||||
<strong>Location:</strong> {geographic_tags}<br>
|
||||
<strong>Source:</strong> <a href="{url}">{url}</a><br>
|
||||
<strong>Published:</strong> {published_at}</p>
|
||||
<hr>
|
||||
<small>Shared via Owly News Summariser</small>', 0);
|
@@ -8,11 +8,11 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
|
||||
DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
|
||||
CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
|
||||
SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
|
||||
LLM_MODEL = os.getenv("LLM_MODEL", "mistral-nemo:12b")
|
||||
LLM_MODEL = os.getenv("LLM_MODEL", "gemma2:9b")
|
||||
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
|
||||
OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
|
||||
ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
|
||||
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 10_000))
|
||||
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 40_000))
|
||||
|
||||
frontend_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
@@ -21,7 +21,7 @@ frontend_path = os.path.join(
|
||||
)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.WARNING,
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@@ -150,8 +150,6 @@ async def get_news(
|
||||
where_conditions.append("published BETWEEN ? AND ?")
|
||||
params.extend([from_ts, to_ts])
|
||||
|
||||
logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})")
|
||||
|
||||
# Build the complete SQL query
|
||||
base_sql = """
|
||||
SELECT id, title, summary, url, published, country, created_at
|
||||
@@ -163,27 +161,13 @@ async def get_news(
|
||||
else:
|
||||
sql = base_sql
|
||||
|
||||
sql += " ORDER BY published DESC LIMIT 1000"
|
||||
|
||||
# Log query info
|
||||
if all_countries and all_dates:
|
||||
logger.info("Querying ALL news articles (no filters)")
|
||||
elif all_countries:
|
||||
logger.info(f"Querying news from ALL countries with date filter")
|
||||
elif all_dates:
|
||||
logger.info(f"Querying ALL dates for countries: {country}")
|
||||
else:
|
||||
logger.info(f"Querying news: countries={country}, timezone={timezone_name}")
|
||||
|
||||
logger.info(f"SQL: {sql}")
|
||||
logger.info(f"Parameters: {params}")
|
||||
sql += " ORDER BY published DESC"
|
||||
|
||||
# Execute the query
|
||||
db.execute(sql, params)
|
||||
rows = db.fetchall()
|
||||
result = [dict(row) for row in rows]
|
||||
|
||||
logger.info(f"Found {len(result)} news articles")
|
||||
return result
|
||||
|
||||
except ValueError as e:
|
||||
|
@@ -119,12 +119,11 @@ class NewsFetcher:
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"❌ Error fetching article content from {url}: {
|
||||
type(e).__name__}: {e}")
|
||||
type(e).__name__}: {e}")
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def build_prompt(
|
||||
url: str,
|
||||
title: str = "",
|
||||
summary: str = "",
|
||||
content: str = "") -> str:
|
||||
@@ -132,14 +131,13 @@ class NewsFetcher:
|
||||
Generate a prompt for the LLM to summarize an article.
|
||||
|
||||
Args:
|
||||
url: Public URL of the article to summarize
|
||||
title: Article title from RSS feed (optional)
|
||||
summary: Article summary from RSS feed (optional)
|
||||
content: Extracted article content (optional)
|
||||
|
||||
Returns:
|
||||
A formatted prompt string that instructs the LLM to generate
|
||||
a JSON response with title and summaries in German and English
|
||||
a JSON response with title, summary and tags in German
|
||||
"""
|
||||
context_info = []
|
||||
if title:
|
||||
@@ -148,28 +146,43 @@ class NewsFetcher:
|
||||
context_info.append(f"RSS-Beschreibung: {summary}")
|
||||
if content:
|
||||
content_preview = content[:500] + \
|
||||
"..." if len(content) > 500 else content
|
||||
"..." if len(content) > 500 else content
|
||||
context_info.append(f"Artikel-Inhalt: {content_preview}")
|
||||
|
||||
context = "\n".join(
|
||||
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
|
||||
|
||||
return (
|
||||
"### Aufgabe\n"
|
||||
f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
|
||||
f"URL: {url}\n"
|
||||
f"Verfügbare Informationen:\n{context}\n\n"
|
||||
"### Vorliegende Informationen\n"
|
||||
f"{context}\n\n"
|
||||
"### Längenbegrenzungen\n"
|
||||
"title: Format \"ORT: Titel\", max 100 Zeichen\n"
|
||||
"location: nur der ORT-Teil, max 40 Zeichen\n"
|
||||
"summary: 100–160 Wörter\n"
|
||||
"tags: bis zu 6 Schlüsselwörter, durch Komma getrennt, alles Kleinbuchstaben.\n\n"
|
||||
"### Regeln\n"
|
||||
"1. Nutze VORRANGIG den Artikel-Inhalt falls verfügbar, ergänze mit RSS-Informationen\n"
|
||||
"2. Falls kein Artikel-Inhalt verfügbar ist, nutze RSS-Titel und -Beschreibung\n"
|
||||
"3. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
|
||||
"4. Gib ausschließlich **gültiges minifiziertes JSON** zurück – kein Markdown, keine Kommentare\n"
|
||||
"5. Struktur: {\"title\":\"…\",\"summary\":\"…\"}\n"
|
||||
"6. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
|
||||
"7. summary: Deutsche Zusammenfassung (zwischen 100 und 160 Wörter)\n"
|
||||
"8. Kein Text vor oder nach dem JSON\n\n"
|
||||
"1. Nutze ausschließlich Informationen, die im bereitgestellten Material eindeutig vorkommen. Externes Wissen ist untersagt.\n"
|
||||
"2. Liegt sowohl Artikel-Text als auch RSS-Metadaten vor, hat der Artikel-Text Vorrang; verwende RSS nur ergänzend.\n"
|
||||
"3. Liegt nur RSS-Titel und/oder -Beschreibung vor, stütze dich ausschließlich darauf.\n"
|
||||
"4. Sind die Informationen unzureichend, gib exakt {\"location\":\"\",\"title\":\"\",\"summary\":\"\",\"tags\":\"\"} zurück.\n"
|
||||
"5. Gib nur gültiges, minifiziertes JSON zurück – keine Zeilenumbrüche, kein Markdown, keine Kommentare.\n"
|
||||
"6. Verwende keine hypothetischen Formulierungen (\"könnte\", \"möglicherweise\" etc.).\n"
|
||||
"7. Wörtliche Zitate dürfen höchstens 15 % des Summary-Texts ausmachen.\n"
|
||||
"8. Kein Text vor oder nach dem JSON.\n\n"
|
||||
"### Ausgabe\n"
|
||||
"Jetzt antworte mit dem JSON:"
|
||||
"Antworte jetzt ausschließlich mit dem JSON:\n"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def build_system_prompt():
|
||||
return (
|
||||
"Du bist ein hochpräziser JSON-Summarizer und Experte für die Zusammenfassung von Artikeln.\n\n"
|
||||
"### Vorgehen\n"
|
||||
"Schritt 1: Identifiziere Hauptthema und Zweck.\n"
|
||||
"Schritt 2: Extrahiere die wichtigsten Fakten und Ergebnisse.\n"
|
||||
"Schritt 3: Erkenne die zentralen Argumente und Standpunkte.\n"
|
||||
"Schritt 4: Ordne die Informationen nach Wichtigkeit.\n"
|
||||
"Schritt 5: Erstelle eine prägnante, klare und sachliche Zusammenfassung.\n\n"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -193,26 +206,55 @@ class NewsFetcher:
|
||||
A dictionary containing the article title and summaries in German and English,
|
||||
or None if summarization failed
|
||||
"""
|
||||
logger.debug("[AI] Fetching article content from: " + url)
|
||||
|
||||
article_content = await NewsFetcher.fetch_article_content(client, url)
|
||||
|
||||
if not article_content:
|
||||
logger.warning(
|
||||
f"⚠️ Could not fetch article content, using RSS data only")
|
||||
|
||||
prompt = NewsFetcher.build_prompt(
|
||||
url, title, summary, article_content)
|
||||
prompt = NewsFetcher.build_prompt(title, summary, article_content)
|
||||
system_prompt = NewsFetcher.build_system_prompt()
|
||||
payload = {
|
||||
"model": LLM_MODEL,
|
||||
"prompt": prompt,
|
||||
"system": system_prompt,
|
||||
"stream": False,
|
||||
"temperature": 0.1,
|
||||
"format": "json",
|
||||
"format": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"location": {
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"title",
|
||||
"summary",
|
||||
"tags"
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"num_gpu": 1, # Force GPU usage
|
||||
"num_ctx": 128_000, # Context size
|
||||
"num_ctx": 8192, # Context size
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("[AI] Running summary generation...")
|
||||
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{OLLAMA_HOST}/api/generate",
|
||||
@@ -224,6 +266,8 @@ class NewsFetcher:
|
||||
result = response.json()
|
||||
llm_response = result["response"]
|
||||
|
||||
logger.debug("[AI] " + llm_response)
|
||||
|
||||
if isinstance(llm_response, str):
|
||||
summary_data = json.loads(llm_response)
|
||||
else:
|
||||
@@ -263,7 +307,7 @@ class NewsFetcher:
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"❌ Unexpected error summarizing {url}: {
|
||||
type(e).__name__}: {e}")
|
||||
type(e).__name__}: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@@ -300,7 +344,7 @@ class NewsFetcher:
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"❌ Critical error during harvest: {
|
||||
type(e).__name__}: {e}")
|
||||
type(e).__name__}: {e}")
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
@@ -327,18 +371,18 @@ class NewsFetcher:
|
||||
if hasattr(feed_data, 'bozo') and feed_data.bozo:
|
||||
logger.warning(
|
||||
f"⚠️ Feed has parsing issues: {
|
||||
feed_row['url']}")
|
||||
feed_row['url']}")
|
||||
if hasattr(feed_data, 'bozo_exception'):
|
||||
logger.warning(
|
||||
f"⚠️ Feed exception: {
|
||||
feed_data.bozo_exception}")
|
||||
feed_data.bozo_exception}")
|
||||
|
||||
total_entries = len(feed_data.entries)
|
||||
|
||||
if total_entries == 0:
|
||||
logger.warning(
|
||||
f"⚠️ No entries found in feed: {
|
||||
feed_row['url']}")
|
||||
feed_row['url']}")
|
||||
return stats
|
||||
|
||||
for i, entry in enumerate(feed_data.entries, 1):
|
||||
@@ -388,8 +432,6 @@ class NewsFetcher:
|
||||
summary=rss_summary
|
||||
)
|
||||
|
||||
logger.info(summary)
|
||||
|
||||
if not summary:
|
||||
logger.warning(
|
||||
f"❌ Failed to get summary for article {i}: {article_url}")
|
||||
@@ -403,8 +445,9 @@ class NewsFetcher:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT
|
||||
OR IGNORE INTO news
|
||||
(title, summary, url, published, country)
|
||||
OR IGNORE
|
||||
INTO news
|
||||
(title, summary, url, published, country)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
@@ -429,7 +472,7 @@ class NewsFetcher:
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"❌ Error processing feed {
|
||||
feed_row['url']}: {
|
||||
type(e).__name__}: {e}")
|
||||
feed_row['url']}: {
|
||||
type(e).__name__}: {e}")
|
||||
|
||||
return stats
|
||||
|
@@ -11,10 +11,12 @@ MIN_CRON_HOURS=0.5
|
||||
SYNC_COOLDOWN_MINUTES=30
|
||||
|
||||
# LLM model to use for summarization
|
||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M
|
||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
|
||||
LLM_MODEL=mistral-nemo:12b
|
||||
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S
|
||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M # ca 7-9GB (typisch 8GB)
|
||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0 # ca 6-8GB (langer kontext)
|
||||
LLM_MODEL=mistral-nemo:12b # ca 16-24+GB
|
||||
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S # ca 22GB
|
||||
LLM_MODEL=yarn-mistral:7b-64k-q4_K_M # ca 11GB
|
||||
LLM_MODEL=gemma2:9b # ca 8GB
|
||||
|
||||
# Timeout in seconds for LLM requests
|
||||
LLM_TIMEOUT_SECONDS=180
|
||||
|
Binary file not shown.
@@ -1,8 +0,0 @@
|
||||
import { defineConfig } from 'cypress'
|
||||
|
||||
export default defineConfig({
|
||||
e2e: {
|
||||
specPattern: 'cypress/e2e/**/*.{cy,spec}.{js,jsx,ts,tsx}',
|
||||
baseUrl: 'http://localhost:4173',
|
||||
},
|
||||
})
|
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "owly-news-summariser",
|
||||
"name": "owly-news",
|
||||
"version": "0.0.1",
|
||||
"private": true,
|
||||
"license": "PolyForm-Noncommercial-1.0.0",
|
||||
|
@@ -14,75 +14,77 @@
|
||||
|
||||
<!-- Articles Grid -->
|
||||
<div v-else class="grid gap-4 sm:gap-6 md:grid-cols-2 xl:grid-cols-3">
|
||||
<article
|
||||
v-for="article in news.articles"
|
||||
:key="article.id"
|
||||
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
|
||||
>
|
||||
<!-- Article Header -->
|
||||
<div class="flex-1 p-4 sm:p-6">
|
||||
<div class="flex items-start justify-between mb-3">
|
||||
<template v-for="article in news.articles"
|
||||
:key="article.id">
|
||||
<article
|
||||
v-if="isValidArticleContent(article)"
|
||||
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
|
||||
>
|
||||
<!-- Article Header -->
|
||||
<div class="flex-1 p-4 sm:p-6">
|
||||
<div class="flex items-start justify-between mb-3">
|
||||
<span
|
||||
class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-200">
|
||||
class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-200">
|
||||
{{ article.country }}
|
||||
</span>
|
||||
<time
|
||||
:datetime="new Date(article.published * 1000).toISOString()"
|
||||
:title="new Date(article.published * 1000).toLocaleString(userLocale.value, {
|
||||
<time
|
||||
:datetime="new Date(article.published * 1000).toISOString()"
|
||||
:title="new Date(article.published * 1000).toLocaleString(userLocale, {
|
||||
dateStyle: 'full',
|
||||
timeStyle: 'long'
|
||||
})"
|
||||
class="text-xs text-gray-500 flex-shrink-0 ml-2 cursor-help hover:text-green-600 dark:hover:text-green-400 transition-colors relative group"
|
||||
>
|
||||
{{ formatDate(article.published) }}
|
||||
</time>
|
||||
class="text-xs text-gray-500 flex-shrink-0 ml-2 cursor-help hover:text-green-600 dark:hover:text-green-400 transition-colors relative group"
|
||||
>
|
||||
{{ formatDate(article.published) }}
|
||||
</time>
|
||||
</div>
|
||||
|
||||
<!-- Title -->
|
||||
<h3
|
||||
class="text-base sm:text-lg font-semibold text-gray-900 dark:text-white mb-3 line-clamp-2 group-hover:text-green-600 dark:group-hover:text-green-400 transition-colors">
|
||||
<a :href="article.url" target="_blank" rel="noopener noreferrer">
|
||||
{{ article.title }}
|
||||
</a>
|
||||
</h3>
|
||||
|
||||
<!-- Summary -->
|
||||
<p
|
||||
class="text-sm sm:text-base text-gray-700 dark:text-gray-300 line-clamp-5 leading-relaxed">
|
||||
{{ article.summary }}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- Title -->
|
||||
<h3
|
||||
class="text-base sm:text-lg font-semibold text-gray-900 dark:text-white mb-3 line-clamp-2 group-hover:text-green-600 dark:group-hover:text-green-400 transition-colors">
|
||||
<a :href="article.url" target="_blank" rel="noopener noreferrer">
|
||||
{{ article.title }}
|
||||
<!-- Article Footer -->
|
||||
<div
|
||||
class="flex justify-between items-center gap-4 p-4 sm:p-6">
|
||||
<button
|
||||
@click="openModal(article)"
|
||||
class="flex-1 inline-flex items-center justify-center cursor-pointer px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Full summary
|
||||
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z"/>
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<a
|
||||
:href="article.url"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
class="flex-1 inline-flex items-center justify-center px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Full article
|
||||
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"/>
|
||||
</svg>
|
||||
</a>
|
||||
</h3>
|
||||
|
||||
<!-- Summary -->
|
||||
<p
|
||||
class="text-sm sm:text-base text-gray-700 dark:text-gray-300 line-clamp-5 leading-relaxed">
|
||||
{{ article.summary }}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- Article Footer -->
|
||||
<div
|
||||
class="flex justify-between items-center gap-4 p-4 sm:p-6">
|
||||
<button
|
||||
@click="openModal(article)"
|
||||
class="flex-1 inline-flex items-center justify-center cursor-pointer px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Full summary
|
||||
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M2.458 12C3.732 7.943 7.523 5 12 5c4.478 0 8.268 2.943 9.542 7-1.274 4.057-5.064 7-9.542 7-4.477 0-8.268-2.943-9.542-7z"/>
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<a
|
||||
:href="article.url"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
class="flex-1 inline-flex items-center justify-center px-4 py-2 text-sm font-medium rounded-lg bg-green-100 text-green-700 hover:bg-green-200 dark:bg-green-900/30 dark:text-green-400 dark:hover:bg-green-900/50 transition-colors"
|
||||
>
|
||||
Full article
|
||||
<svg class="ml-1 w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
||||
d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"/>
|
||||
</svg>
|
||||
</a>
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</article>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Loading State & Load More Trigger -->
|
||||
@@ -100,9 +102,9 @@
|
||||
|
||||
<!-- Article Modal -->
|
||||
<ArticleModal
|
||||
:is-open="isModalOpen"
|
||||
:article="selectedArticle"
|
||||
@close="closeModal"
|
||||
:is-open="isModalOpen"
|
||||
:article="selectedArticle"
|
||||
@close="closeModal"
|
||||
/>
|
||||
</div>
|
||||
</template>
|
||||
@@ -129,17 +131,48 @@ const loadMoreArticles = async () => {
|
||||
}
|
||||
};
|
||||
|
||||
interface Article {
|
||||
id: number;
|
||||
title: string;
|
||||
summary: string;
|
||||
url: string;
|
||||
published: number;
|
||||
country: string;
|
||||
created_at: number;
|
||||
}
|
||||
|
||||
const INVALID_MARKERS = ['---', '...', '…', 'Title', 'Summary', 'Titel', 'Zusammenfassung'] as const;
|
||||
const REQUIRED_TEXT_FIELDS = ['title', 'summary', 'url'] as const;
|
||||
|
||||
const isValidArticleContent = (article: Article): boolean => {
|
||||
const hasEmptyRequiredFields = REQUIRED_TEXT_FIELDS.some(
|
||||
field => article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].length === 0
|
||||
);
|
||||
|
||||
if (hasEmptyRequiredFields) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const hasInvalidMarkers = REQUIRED_TEXT_FIELDS.some(field =>
|
||||
INVALID_MARKERS.some(marker =>
|
||||
article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].includes(marker)
|
||||
)
|
||||
);
|
||||
|
||||
return !hasInvalidMarkers;
|
||||
};
|
||||
|
||||
const observer = ref<IntersectionObserver | null>(null);
|
||||
const loadMoreTrigger = ref<HTMLElement | null>(null);
|
||||
|
||||
onMounted(() => {
|
||||
observer.value = new IntersectionObserver(
|
||||
(entries) => {
|
||||
if (entries[0].isIntersecting) {
|
||||
loadMoreArticles();
|
||||
}
|
||||
},
|
||||
{threshold: 0.5}
|
||||
(entries) => {
|
||||
if (entries[0].isIntersecting) {
|
||||
loadMoreArticles();
|
||||
}
|
||||
},
|
||||
{threshold: 0.5}
|
||||
);
|
||||
|
||||
if (loadMoreTrigger.value) {
|
||||
|
@@ -6470,9 +6470,9 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"owly-news-summariser@workspace:.":
|
||||
"owly-news@workspace:.":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "owly-news-summariser@workspace:."
|
||||
resolution: "owly-news@workspace:."
|
||||
dependencies:
|
||||
"@tailwindcss/vite": "npm:^4.1.11"
|
||||
"@tsconfig/node22": "npm:^22.0.2"
|
||||
|
Reference in New Issue
Block a user