Compare commits
35 Commits
main
...
16167d18ff
Author | SHA1 | Date | |
---|---|---|---|
16167d18ff | |||
7c6724800f | |||
af304266a4 | |||
815e3b22fd | |||
e8e61faf61 | |||
c19813cbe2 | |||
cf163082b2 | |||
011b256662 | |||
0a97a57c76 | |||
338b3ac7c1 | |||
13fbac5009 | |||
9b805e891a | |||
78073d27d7 | |||
c3b0c87bfa | |||
0aa8d9fa3a | |||
cbbd0948e6 | |||
3a5b0d8f4b | |||
0ce916c654 | |||
f853213d15 | |||
300845c655 | |||
d90c618ee3 | |||
e7a97206a9 | |||
c2adfa711d | |||
b2d82892ef | |||
0f1632ad65 | |||
7b114a6145 | |||
4edb2b2179 | |||
aa520efb82 | |||
f22259b863 | |||
a30f8467bc | |||
79e4d7f1de | |||
37ebf45d82 | |||
bc1735448a | |||
59b19a22ff | |||
86b5f83140 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -34,10 +34,11 @@ build/
|
||||
logs/
|
||||
*.log
|
||||
|
||||
# Database files
|
||||
# Database files (now includes the specific dev database)
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.db
|
||||
owlynews.sqlite3*
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
|
236
README.md
236
README.md
@@ -1,105 +1,67 @@
|
||||
# Owly News Summariser
|
||||
# Owly News
|
||||
|
||||
Owly News Summariser is a web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
|
||||
Owly News is a modern web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
|
||||
|
||||
## Features
|
||||
|
||||
- Fetches news from configurable RSS feeds
|
||||
- Automatically summarizes articles using Ollama LLM
|
||||
- Filters news by country
|
||||
- **AI-powered intelligent tagging** with geographic, category, and source tags
|
||||
- **Advanced multi-criteria filtering** with hierarchical tag support
|
||||
- Progressive Web App (PWA) support for offline access
|
||||
- Scheduled background updates
|
||||
- High-performance Rust backend for optimal resource usage
|
||||
- Modern Vue.js frontend with TypeScript support
|
||||
- **Comprehensive analytics** and reading statistics
|
||||
- **Flexible sharing system** with multiple format options
|
||||
|
||||
## Project Structure
|
||||
|
||||
The project consists of two main components:
|
||||
The project consists of multiple components:
|
||||
|
||||
- **Backend**: A FastAPI application that fetches and processes news feeds, summarizes articles, and provides API endpoints
|
||||
- **Frontend**: A Vue.js application that displays the news and provides a user interface for managing feeds
|
||||
- **Backend (Rust)**: Primary backend written in Rust using Axum framework for high performance (`backend-rust/`)
|
||||
- **Backend (Python)**: Legacy FastAPI backend (`backend/`)
|
||||
- **Frontend**: Modern Vue.js 3 application with TypeScript and Tailwind CSS (`frontend/`)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python 3.8+ for the backend
|
||||
- Node.js 16+ and Yarn for the frontend
|
||||
### For Rust Backend (Recommended)
|
||||
- Rust 1.88.0+
|
||||
- [Ollama](https://ollama.ai/) for article summarization and tagging
|
||||
- SQLite (handled automatically by SQLx)
|
||||
|
||||
### For Python Backend (Legacy)
|
||||
- Python 3.8+
|
||||
- [Ollama](https://ollama.ai/) for article summarization
|
||||
|
||||
## Installing Yarn
|
||||
|
||||
Yarn is a package manager for JavaScript that's required for the frontend. Here's how to install it:
|
||||
|
||||
### Using npm (recommended)
|
||||
|
||||
If you already have Node.js installed, the easiest way to install Yarn is via npm:
|
||||
|
||||
```bash
|
||||
npm install -g yarn
|
||||
```
|
||||
|
||||
### Platform-specific installations
|
||||
|
||||
#### Windows
|
||||
|
||||
- **Using Chocolatey**: `choco install yarn`
|
||||
- **Using Scoop**: `scoop install yarn`
|
||||
- **Manual installation**: Download and run the [installer](https://classic.yarnpkg.com/latest.msi)
|
||||
|
||||
#### macOS
|
||||
|
||||
- **Using Homebrew**: `brew install yarn`
|
||||
- **Using MacPorts**: `sudo port install yarn`
|
||||
|
||||
#### Linux
|
||||
|
||||
- **Debian/Ubuntu**:
|
||||
```bash
|
||||
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
|
||||
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
|
||||
sudo apt update && sudo apt install yarn
|
||||
```
|
||||
|
||||
- **CentOS/Fedora/RHEL**:
|
||||
```bash
|
||||
curl --silent --location https://dl.yarnpkg.com/rpm/yarn.repo | sudo tee /etc/yum.repos.d/yarn.repo
|
||||
sudo yum install yarn
|
||||
```
|
||||
|
||||
- **Arch Linux**: `pacman -S yarn`
|
||||
|
||||
After installation, verify Yarn is installed correctly:
|
||||
|
||||
```bash
|
||||
yarn --version
|
||||
```
|
||||
### For Frontend
|
||||
- Node.js 22+ and npm
|
||||
- Modern web browser with PWA support
|
||||
|
||||
## Setup
|
||||
|
||||
### Backend Setup
|
||||
### Rust Backend Setup (Recommended)
|
||||
|
||||
1. Navigate to the backend directory:
|
||||
1. Navigate to the Rust backend directory:
|
||||
```bash
|
||||
cd backend
|
||||
cd backend-rust
|
||||
```
|
||||
|
||||
2. Create a virtual environment:
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. Create a `.env` file based on the example:
|
||||
2. Create a `.env` file based on the example:
|
||||
```bash
|
||||
cp example.env .env
|
||||
```
|
||||
|
||||
5. Customize the `.env` file as needed:
|
||||
- `OLLAMA_HOST`: URL for the Ollama service (default: http://localhost:11434)
|
||||
- `CRON_HOURS`: Interval for scheduled news fetching (default: 1)
|
||||
3. Customize the `.env` file as needed:
|
||||
- `DATABASE_URL`: SQLite database connection string
|
||||
- `OLLAMA_BASE_URL`: URL for the Ollama service (default: http://localhost:11434)
|
||||
- Other configuration options as documented in the example file
|
||||
|
||||
4. Run database migrations:
|
||||
```bash
|
||||
cargo install sqlx-cli
|
||||
sqlx migrate run
|
||||
```
|
||||
|
||||
### Frontend Setup
|
||||
|
||||
@@ -110,29 +72,24 @@ yarn --version
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
yarn
|
||||
npm install
|
||||
```
|
||||
|
||||
## Running the Application
|
||||
|
||||
### Running the Backend
|
||||
### Running the Rust Backend
|
||||
|
||||
1. Navigate to the backend directory:
|
||||
1. Navigate to the Rust backend directory:
|
||||
```bash
|
||||
cd backend
|
||||
cd backend-rust
|
||||
```
|
||||
|
||||
2. Activate the virtual environment:
|
||||
2. Start the backend server:
|
||||
```bash
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
cargo run
|
||||
```
|
||||
|
||||
3. Start the backend server:
|
||||
```bash
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
The backend will be available at http://localhost:8000
|
||||
The backend will be available at http://localhost:3000
|
||||
|
||||
### Running the Frontend
|
||||
|
||||
@@ -143,22 +100,53 @@ yarn --version
|
||||
|
||||
2. Start the development server:
|
||||
```bash
|
||||
yarn dev:watch
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The frontend will be available at http://localhost:5173
|
||||
|
||||
## Key Features
|
||||
|
||||
### Intelligent Content Organization
|
||||
- **AI-Powered Tagging**: Automatic classification with geographic, topical, and source tags
|
||||
- **Hierarchical Filtering**: Multi-level filtering by location (country → region → city), categories, and content types
|
||||
- **Smart Search**: Advanced filtering with suggestions based on tag relationships and usage patterns
|
||||
- **Legacy Migration**: Seamless upgrade from simple country-based filtering to comprehensive tag-based system
|
||||
|
||||
### Advanced Analytics
|
||||
- **Reading Statistics**: Track reading time, completion rates, and engagement patterns
|
||||
- **Content Analytics**: Source performance, tag usage, and trending topics analysis
|
||||
- **Geographic Insights**: Location-based content distribution and reading preferences
|
||||
- **Goal Tracking**: Personal reading goals with progress monitoring
|
||||
|
||||
### Flexible Article Display
|
||||
- **Compact View**: Title, excerpt, tags, and action buttons for quick browsing
|
||||
- **On-Demand Loading**: Full content, AI summaries, and source links as needed
|
||||
- **Visual Tag System**: Color-coded, hierarchical tags with click-to-filter functionality
|
||||
- **Reading Status**: Visual indicators for read/unread status and progress tracking
|
||||
|
||||
### Enhanced Sharing
|
||||
- **Multiple Formats**: Text, Markdown, HTML, and JSON export options
|
||||
- **Custom Templates**: User-configurable sharing formats
|
||||
- **One-Click Operations**: Copy to clipboard with formatted content
|
||||
- **Privacy Controls**: Configurable information inclusion in shared content
|
||||
|
||||
## Building for Production
|
||||
|
||||
### Building the Backend
|
||||
|
||||
The backend can be deployed as a standard FastAPI application. You can use tools like Gunicorn with Uvicorn workers:
|
||||
### Building the Rust Backend
|
||||
|
||||
1. Navigate to the Rust backend directory:
|
||||
```bash
|
||||
pip install gunicorn
|
||||
gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
|
||||
cd backend-rust
|
||||
```
|
||||
|
||||
2. Build the optimized release binary:
|
||||
```bash
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
The binary will be available at `target/release/owly-news`
|
||||
|
||||
### Building the Frontend
|
||||
|
||||
1. Navigate to the frontend directory:
|
||||
@@ -168,32 +156,62 @@ gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
|
||||
|
||||
2. Build the frontend:
|
||||
```bash
|
||||
yarn build
|
||||
npm run build
|
||||
```
|
||||
|
||||
The built files will be in the `dist` directory and can be served by any static file server.
|
||||
|
||||
## API Endpoints
|
||||
## Development
|
||||
|
||||
The backend provides the following API endpoints:
|
||||
### Code Quality
|
||||
|
||||
- `GET /news`: Get news articles with optional filtering
|
||||
- `GET /meta/last_sync`: Get the timestamp of the last feed synchronization
|
||||
- `POST /meta/cron`: Set the schedule for automatic feed synchronization
|
||||
- `GET /meta/feeds`: List all configured feeds
|
||||
- `POST /meta/feeds`: Add a new feed
|
||||
- `DELETE /meta/feeds`: Delete a feed
|
||||
- `GET /meta/model`: Check the status of the LLM model
|
||||
- `POST /meta/sync`: Manually trigger a feed synchronization
|
||||
The project includes comprehensive tooling for code quality:
|
||||
|
||||
## Environment Variables
|
||||
**Frontend:**
|
||||
- ESLint with Vue and TypeScript support
|
||||
- Prettier for code formatting
|
||||
- Vitest for testing
|
||||
- TypeScript for type safety
|
||||
- Oxlint for additional linting
|
||||
|
||||
### Backend
|
||||
**Backend (Rust):**
|
||||
- Standard Rust tooling (`cargo fmt`, `cargo clippy`)
|
||||
- SQLx for compile-time checked SQL queries
|
||||
|
||||
- `OLLAMA_HOST`: URL for the Ollama service
|
||||
- `CRON_HOURS`: Interval for scheduled news fetching in hours
|
||||
- `DATABASE_URL`: SQLite database connection string
|
||||
### Testing
|
||||
|
||||
## License
|
||||
Run frontend tests:
|
||||
```bash
|
||||
cd frontend
|
||||
npm run test
|
||||
```
|
||||
|
||||
Code ist unter der [PolyForm Noncommercial 1.0.0](https://polyformproject.org/licenses/noncommercial/1.0.0/) lizenziert. Für jegliche kommerzielle Nutzung bitte Kontakt aufnehmen.
|
||||
## Configuration
|
||||
|
||||
The application uses a comprehensive configuration system via `config.toml`:
|
||||
|
||||
- **AI Settings**: Configure Ollama integration for summaries and tagging
|
||||
- **Display Preferences**: Default views, themes, and UI customization
|
||||
- **Analytics**: Control data collection and retention policies
|
||||
- **Filtering**: Smart suggestions, saved filters, and geographic hierarchy
|
||||
- **Sharing**: Default formats and custom templates
|
||||
|
||||
See the example configuration in the project for detailed options.
|
||||
|
||||
## Migration from Legacy Systems
|
||||
|
||||
The application includes automatic migration tools for upgrading from simpler filtering systems:
|
||||
|
||||
- **Country Filter Migration**: Automatic conversion to hierarchical geographic tags
|
||||
- **Data Preservation**: Maintains historical data during migration
|
||||
- **Backward Compatibility**: Gradual transition with user control
|
||||
- **Validation Tools**: Ensure data integrity throughout the migration process
|
||||
|
||||
## Future Roadmap
|
||||
|
||||
The project is evolving through three phases:
|
||||
1. **Phase 1**: High-performance Rust backend with advanced filtering and analytics
|
||||
2. **Phase 2**: CLI application for power users and automation
|
||||
3. **Phase 3**: Migration to Dioxus for a full Rust stack
|
||||
|
||||
See `ROADMAP.md` for detailed development plans and architectural decisions.
|
||||
|
2
backend-rust/.gitignore
vendored
Normal file
2
backend-rust/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
target/
|
||||
/config.toml
|
3581
backend-rust/Cargo.lock
generated
Normal file
3581
backend-rust/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
38
backend-rust/Cargo.toml
Normal file
38
backend-rust/Cargo.toml
Normal file
@@ -0,0 +1,38 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"crates/app",
|
||||
"crates/api",
|
||||
"crates/module-api",
|
||||
"crates/module-host",
|
||||
"crates/modules/summarizer",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
version = "0.1.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
anyhow = "1.0"
|
||||
tokio = "1"
|
||||
axum = "0.8.4"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
sqlx = "0.8"
|
||||
dotenv = "0.15"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
toml = "0.9.5"
|
||||
unicode-segmentation = "1.12.0"
|
||||
sha2 = "0.10.9"
|
||||
hex = "0.4.3"
|
||||
readability = "0.3.0"
|
||||
scraper = "0.23.1"
|
||||
libloading = "0.8"
|
||||
async-trait = "0.1"
|
||||
once_cell = "1.19"
|
||||
num_cpus = "1.16"
|
||||
|
||||
# Dev-only deps centralized (optional)
|
||||
tokio-test = "0.4"
|
||||
axum-test = "17.3"
|
584
backend-rust/ROADMAP.md
Normal file
584
backend-rust/ROADMAP.md
Normal file
@@ -0,0 +1,584 @@
|
||||
# Owly News Summariser - Project Roadmap
|
||||
|
||||
This document outlines the strategic approach for transforming the project through three phases: Python-to-Rust backend migration, CLI application addition, and Vue-to-Dioxus frontend migration.
|
||||
|
||||
## Project Structure Strategy
|
||||
|
||||
### Current Phase: Axum API Setup
|
||||
```
|
||||
|
||||
owly-news-summariser/
|
||||
├── src/
|
||||
│ ├── main.rs # Entry point (will evolve)
|
||||
│ ├── db.rs # Database connection & SQLx setup
|
||||
│ ├── api.rs # API module declaration
|
||||
│ ├── api/ # API-specific modules (no mod.rs needed)
|
||||
│ │ ├── routes.rs # Route definitions
|
||||
│ │ ├── middleware.rs # Custom middleware
|
||||
│ │ └── handlers.rs # Request handlers & business logic
|
||||
│ ├── models.rs # Models module declaration
|
||||
│ ├── models/ # Data models & database entities
|
||||
│ │ ├── user.rs
|
||||
│ │ ├── article.rs
|
||||
│ │ ├── summary.rs
|
||||
│ │ ├── tag.rs # Tag models and relationships
|
||||
│ │ ├── analytics.rs # Analytics and statistics models
|
||||
│ │ └── settings.rs # User settings and preferences
|
||||
│ ├── services.rs # Services module declaration
|
||||
│ ├── services/ # Business logic layer
|
||||
│ │ ├── news_service.rs
|
||||
│ │ ├── summary_service.rs
|
||||
│ │ ├── scraping_service.rs # Article content extraction
|
||||
│ │ ├── tagging_service.rs # AI-powered tagging
|
||||
│ │ ├── analytics_service.rs # Reading stats and analytics
|
||||
│ │ └── sharing_service.rs # Article sharing functionality
|
||||
│ └── config.rs # Configuration management
|
||||
├── migrations/ # SQLx migrations (managed by SQLx CLI)
|
||||
├── frontend/ # Keep existing Vue frontend for now
|
||||
├── config.toml # Configuration file with AI settings
|
||||
└── Cargo.toml
|
||||
```
|
||||
### Phase 2: Multi-Binary Structure (API + CLI)
|
||||
```
|
||||
|
||||
owly-news-summariser/
|
||||
├── src/
|
||||
│ ├── lib.rs # Shared library code
|
||||
│ ├── bin/
|
||||
│ │ ├── server.rs # API server binary
|
||||
│ │ └── cli.rs # CLI application binary
|
||||
│ ├── [same module structure as Phase 1]
|
||||
├── migrations/
|
||||
├── frontend/
|
||||
├── completions/ # Shell completion scripts
|
||||
│ ├── owly.bash
|
||||
│ ├── owly.zsh
|
||||
│ └── owly.fish
|
||||
├── config.toml
|
||||
└── Cargo.toml # Updated for multiple binaries
|
||||
```
|
||||
### Phase 3: Full Rust Stack
|
||||
```
|
||||
|
||||
owly-news-summariser/
|
||||
├── src/
|
||||
│ ├── [same structure as Phase 2]
|
||||
├── migrations/
|
||||
├── frontend-dioxus/ # New Dioxus frontend
|
||||
├── frontend/ # Legacy Vue (to be removed)
|
||||
├── completions/
|
||||
├── config.toml
|
||||
└── Cargo.toml
|
||||
```
|
||||
## Core Features & Architecture
|
||||
|
||||
### Article Processing & Display Workflow
|
||||
**Hybrid Approach: RSS Feeds + Manual Submissions with Smart Content Management**
|
||||
|
||||
1. **Article Collection**
|
||||
- RSS feed monitoring and batch processing
|
||||
- Manual article URL submission
|
||||
- Store original content and metadata in database
|
||||
|
||||
2. **Content Processing Pipeline**
|
||||
- Fetch RSS articles → scrape full content → store in DB
|
||||
- **Compact Article Display**:
|
||||
- Title (primary display)
|
||||
- RSS description text
|
||||
- Tags (visual indicators)
|
||||
- Time posted (from RSS)
|
||||
- Time added (when added to system)
|
||||
- Action buttons: [Full Article] [Summary] [Source]
|
||||
- **On-Demand Content Loading**:
|
||||
- Full Article: Display complete scraped content
|
||||
- Summary: Show AI-generated summary
|
||||
- Source: Open original URL in new tab
|
||||
- Background async processing with status updates
|
||||
- Support for re-processing without re-fetching
|
||||
|
||||
3. **Intelligent Tagging System**
|
||||
- **Automatic Tag Generation**: AI analyzes content and assigns relevant tags
|
||||
- **Geographic & Source Tags**: AI-generated location tags (countries, regions, cities) and publication source tags
|
||||
- **Content Category Tags**: Technology, Politics, Business, Sports, Health, etc.
|
||||
- **Visual Tag Display**: Color-coded tags in compact article view with hierarchical display
|
||||
- **Tag Filtering**: Quick filtering by clicking tags with smart suggestions
|
||||
- **Custom Tags**: User-defined tags and categories
|
||||
- **Tag Confidence**: Visual indicators for AI vs manual tags
|
||||
- **Tag Migration**: Automatic conversion of legacy country filters to geographic tags
|
||||
|
||||
4. **Analytics & Statistics System**
|
||||
- **Reading Analytics**:
|
||||
- Articles read vs added
|
||||
- Reading time tracking
|
||||
- Most read categories and tags
|
||||
- Reading patterns over time
|
||||
- **Content Analytics**:
|
||||
- Source reliability and quality metrics
|
||||
- Tag usage statistics
|
||||
- Processing success rates
|
||||
- Content freshness tracking
|
||||
- **Performance Metrics**:
|
||||
- AI processing times
|
||||
- Scraping success rates
|
||||
- User engagement patterns
|
||||
|
||||
5. **Advanced Filtering System**
|
||||
- **Multi-Criteria Filtering**:
|
||||
- By tags (single or multiple with AND/OR logic)
|
||||
- By geographic tags (country, region, city with hierarchical filtering)
|
||||
- By content categories and topics
|
||||
- By date ranges (posted, added, read)
|
||||
- By processing status (pending, completed, failed)
|
||||
- By content availability (scraped, summary, RSS-only)
|
||||
- By read/unread status
|
||||
- **Smart Filter Migration**: Automatic conversion of legacy country filters to tag-based equivalents
|
||||
- **Saved Filter Presets**:
|
||||
- Custom filter combinations
|
||||
- Quick access to frequent searches
|
||||
- Geographic preset templates (e.g., "European Tech News", "US Politics")
|
||||
- **Smart Suggestions**: Filter suggestions based on usage patterns and tag relationships
|
||||
|
||||
6. **Settings & Management System**
|
||||
- **User Preferences**:
|
||||
- Default article view mode
|
||||
- Tag display preferences with geographic hierarchy settings
|
||||
- Reading tracking settings
|
||||
- Notification preferences
|
||||
- **System Settings**:
|
||||
- AI configuration (via API and config file)
|
||||
- Processing settings
|
||||
- Display customization
|
||||
- Export preferences
|
||||
- **Content Management**:
|
||||
- Bulk operations (mark read, delete, retag)
|
||||
- Archive old articles
|
||||
- Export/import functionality
|
||||
- Legacy data migration tools
|
||||
|
||||
7. **Article Sharing System**
|
||||
- **Multiple Share Formats**:
|
||||
- Clean text format with title, summary, and source link
|
||||
- Markdown format for developers
|
||||
- Rich HTML format for email/web
|
||||
- JSON format for API integration
|
||||
- **Copy to Clipboard**: One-click formatted sharing
|
||||
- **Share Templates**: Customizable sharing formats
|
||||
- **Privacy Controls**: Control what information is included in shares
|
||||
|
||||
8. **Database Schema**
|
||||
```
|
||||
Articles: id, title, url, source_type, rss_content, full_content,
|
||||
summary, processing_status, published_at, added_at, read_at,
|
||||
read_count, reading_time, ai_enabled, created_at, updated_at
|
||||
Tags: id, name, category, description, color, usage_count, parent_id, created_at
|
||||
ArticleTags: article_id, tag_id, confidence_score, ai_generated, created_at
|
||||
ReadingStats: user_id, article_id, read_at, reading_time, completion_rate
|
||||
FilterPresets: id, name, filter_criteria, user_id, created_at
|
||||
Settings: key, value, category, user_id, updated_at
|
||||
ShareTemplates: id, name, format, template_content, created_at
|
||||
LegacyMigration: old_filter_type, old_value, new_tag_ids, migrated_at
|
||||
```
|
||||
|
||||
## Step-by-Step Process
|
||||
|
||||
### Phase 1: Axum API Implementation
|
||||
|
||||
**Step 1: Core Infrastructure Setup**
|
||||
- Set up database connection pooling with SQLx
|
||||
- **Enhanced Configuration System**:
|
||||
- Extend config.toml with comprehensive settings
|
||||
- AI provider configurations with separate summary/tagging settings
|
||||
- Display preferences and UI customization
|
||||
- Analytics and tracking preferences
|
||||
- Sharing templates and formats
|
||||
- Filter and search settings
|
||||
- Geographic tagging preferences
|
||||
- Establish error handling patterns with `anyhow`
|
||||
- Set up logging and analytics infrastructure
|
||||
|
||||
**Step 2: Data Layer**
|
||||
- Design comprehensive database schema with analytics and settings support
|
||||
- Create SQLx migrations for all tables including analytics and user preferences
|
||||
- Implement hierarchical tag system with geographic and content categories
|
||||
- Add legacy migration support for country filters
|
||||
- Implement article models with reading tracking and statistics
|
||||
- Add settings and preferences data layer
|
||||
- Create analytics data models and aggregation queries
|
||||
- Implement sharing templates and format management
|
||||
- Use SQLx's compile-time checked queries
|
||||
|
||||
**Step 3: Enhanced Services Layer**
|
||||
- **Content Processing Services**:
|
||||
- RSS feed fetching and parsing
|
||||
- Web scraping with quality tracking
|
||||
- AI services for summary and tagging
|
||||
- **Enhanced Tagging Service**:
|
||||
- Geographic location detection and tagging
|
||||
- Content category classification
|
||||
- Hierarchical tag relationships
|
||||
- Legacy filter migration logic
|
||||
- **Analytics Service**:
|
||||
- Reading statistics collection and aggregation
|
||||
- Content performance metrics
|
||||
- User behavior tracking
|
||||
- Trend analysis and insights
|
||||
- **Settings Management Service**:
|
||||
- User preference handling
|
||||
- System configuration management
|
||||
- Real-time settings updates
|
||||
- **Sharing Service**:
|
||||
- Multiple format generation
|
||||
- Template processing
|
||||
- Privacy-aware content filtering
|
||||
- **Advanced Filtering Service**:
|
||||
- Complex query building with geographic hierarchy
|
||||
- Filter preset management
|
||||
- Search optimization
|
||||
- Legacy filter migration
|
||||
|
||||
**Step 4: Comprehensive API Layer**
|
||||
- **Article Management Routes**:
|
||||
- `GET /api/articles` - List articles with compact display data
|
||||
- `POST /api/articles` - Submit manual article URL
|
||||
- `GET /api/articles/:id` - Get basic article info
|
||||
- `GET /api/articles/:id/full` - Get complete scraped content
|
||||
- `GET /api/articles/:id/summary` - Get AI summary
|
||||
- `POST /api/articles/:id/read` - Mark as read and track reading time
|
||||
- `POST /api/articles/:id/share` - Generate shareable content
|
||||
- **Analytics Routes**:
|
||||
- `GET /api/analytics/dashboard` - Main analytics dashboard data
|
||||
- `GET /api/analytics/reading-stats` - Personal reading statistics
|
||||
- `GET /api/analytics/content-stats` - Content and source analytics
|
||||
- `GET /api/analytics/trends` - Trending topics and patterns
|
||||
- `GET /api/analytics/export` - Export analytics data
|
||||
- **Enhanced Filtering & Search Routes**:
|
||||
- `GET /api/filters/presets` - Get saved filter presets
|
||||
- `POST /api/filters/presets` - Save new filter preset
|
||||
- `GET /api/search/suggestions` - Get search and filter suggestions
|
||||
- `POST /api/search` - Advanced search with multiple criteria
|
||||
- `POST /api/filters/migrate` - Migrate legacy country filters to tags
|
||||
- **Settings Routes**:
|
||||
- `GET /api/settings` - Get all user settings
|
||||
- `PUT /api/settings` - Update user settings
|
||||
- `GET /api/settings/system` - Get system configuration
|
||||
- `PUT /api/settings/system` - Update system settings (admin)
|
||||
- **Enhanced Tag Management Routes**:
|
||||
- `GET /api/tags` - List tags with usage statistics and hierarchy
|
||||
- `GET /api/tags/geographic` - Get geographic tag hierarchy
|
||||
- `GET /api/tags/trending` - Get trending tags
|
||||
- `POST /api/tags/:id/follow` - Follow/unfollow tag for notifications
|
||||
- `GET /api/tags/categories` - Get tag categories and relationships
|
||||
- **Sharing Routes**:
|
||||
- `GET /api/share/templates` - Get sharing templates
|
||||
- `POST /api/share/templates` - Create custom sharing template
|
||||
- `POST /api/articles/:id/share/:format` - Generate share content
|
||||
|
||||
**Step 5: Enhanced Frontend Features**
|
||||
- **Compact Article Display**:
|
||||
- Card-based layout with title, RSS excerpt, tags, and timestamps
|
||||
- Action buttons for Full Article, Summary, and Source
|
||||
- Hierarchical tag display with geographic and category indicators
|
||||
- Reading status and progress indicators
|
||||
- **Advanced Analytics Dashboard**:
|
||||
- Reading statistics with charts and trends
|
||||
- Content source performance metrics
|
||||
- Tag usage and trending topics with geographic breakdowns
|
||||
- Personal reading insights and goals
|
||||
- **Comprehensive Filtering Interface**:
|
||||
- Multi-criteria filter builder with geographic hierarchy
|
||||
- Saved filter presets with quick access
|
||||
- Smart filter suggestions based on tag relationships
|
||||
- Visual filter indicators and clear actions
|
||||
- Legacy filter migration interface
|
||||
- **Settings Management Panel**:
|
||||
- User preference configuration
|
||||
- AI and processing settings
|
||||
- Display and UI customization
|
||||
- Export/import functionality
|
||||
- **Enhanced Sharing System**:
|
||||
- Quick share buttons with format selection
|
||||
- Copy-to-clipboard functionality
|
||||
- Custom sharing templates
|
||||
- Preview before sharing
|
||||
|
||||
**Step 6: Integration & Testing**
|
||||
- Test all API endpoints with comprehensive coverage
|
||||
- Test analytics collection and aggregation
|
||||
- Test enhanced filtering and search functionality
|
||||
- Test legacy filter migration
|
||||
- Validate settings persistence and real-time updates
|
||||
- Test sharing functionality across different formats
|
||||
- Performance testing with large datasets and hierarchical tags
|
||||
- Deploy and monitor
|
||||
|
||||
### Phase 2: CLI Application Addition
|
||||
|
||||
**Step 1: Restructure for Multiple Binaries**
|
||||
- Move API code to `src/bin/server.rs`
|
||||
- Create `src/bin/cli.rs` for CLI application
|
||||
- Keep shared logic in `src/lib.rs`
|
||||
- Update Cargo.toml to support multiple binaries
|
||||
|
||||
**Step 2: Enhanced CLI with Analytics and Management**
|
||||
- **Core Commands**:
|
||||
- `owly list [--filters] [--format table|json|compact]` - List articles
|
||||
- `owly show <id> [--content|--summary]` - Display specific article
|
||||
- `owly read <id>` - Mark article as read and open in pager
|
||||
- `owly open <id>` - Open source URL in browser
|
||||
- **Analytics Commands**:
|
||||
- `owly stats [--period day|week|month|year]` - Show reading statistics
|
||||
- `owly trends [--tags|--sources|--topics|--geo]` - Display trending content
|
||||
- `owly analytics export [--format csv|json]` - Export analytics data
|
||||
- **Management Commands**:
|
||||
- `owly settings [--get key] [--set key=value]` - Manage settings
|
||||
- `owly filters [--list|--save name|--load name]` - Manage filter presets
|
||||
- `owly cleanup [--old|--unread|--failed]` - Clean up articles
|
||||
- `owly migrate [--from-country-filters]` - Migrate legacy data
|
||||
- **Enhanced Filtering Commands**:
|
||||
- `owly filter [--tag] [--geo] [--category]` - Advanced filtering with geographic support
|
||||
- `owly tags [--list|--hierarchy|--geo]` - Tag management with geographic display
|
||||
- **Sharing Commands**:
|
||||
- `owly share <id> [--format text|markdown|html]` - Generate share content
|
||||
- `owly export <id> [--template name] [--output file]` - Export article
|
||||
|
||||
**Step 3: Advanced CLI Features**
|
||||
- Interactive filtering and search with geographic hierarchy
|
||||
- Real-time analytics display with charts (using ASCII graphs)
|
||||
- Bulk operations with progress indicators
|
||||
- Settings management with validation
|
||||
- Shell completion for all commands and parameters
|
||||
- Legacy data migration tools
|
||||
|
||||
### Phase 3: Dioxus Frontend Migration
|
||||
|
||||
**Step 1: Component Architecture**
|
||||
- **Core Display Components**:
|
||||
- `ArticleCard` - Compact article display with action buttons
|
||||
- `ArticleViewer` - Full article content display
|
||||
- `SummaryViewer` - AI summary display
|
||||
- `TagCloud` - Interactive tag display with geographic hierarchy
|
||||
- `GeographicMap` - Visual geographic filtering interface
|
||||
- **Analytics Components**:
|
||||
- `AnalyticsDashboard` - Main analytics overview
|
||||
- `ReadingStats` - Personal reading statistics
|
||||
- `TrendChart` - Trending topics and patterns
|
||||
- `ContentMetrics` - Source and content analytics
|
||||
- `GeographicAnalytics` - Location-based content insights
|
||||
- **Enhanced Filtering Components**:
|
||||
- `FilterBuilder` - Advanced filter creation interface with geographic support
|
||||
- `FilterPresets` - Saved filter management
|
||||
- `SearchBar` - Smart search with suggestions
|
||||
- `GeographicFilter` - Hierarchical location filtering
|
||||
- `MigrationTool` - Legacy filter migration interface
|
||||
- **Settings Components**:
|
||||
- `SettingsPanel` - User preference management
|
||||
- `SystemConfig` - System-wide configuration
|
||||
- `ExportImport` - Data export/import functionality
|
||||
- **Sharing Components**:
|
||||
- `ShareDialog` - Sharing interface with format options
|
||||
- `ShareTemplates` - Custom template management
|
||||
|
||||
**Step 2: Enhanced UX Features**
|
||||
- **Smart Article Display**:
|
||||
- Lazy loading for performance
|
||||
- Infinite scroll with virtualization
|
||||
- Quick preview on hover
|
||||
- Keyboard navigation support
|
||||
- **Advanced Analytics**:
|
||||
- Interactive charts and graphs with geographic data
|
||||
- Customizable dashboard widgets
|
||||
- Goal setting and progress tracking
|
||||
- Comparison and trend analysis
|
||||
- **Intelligent Filtering**:
|
||||
- Auto-complete for filters with geographic suggestions
|
||||
- Visual filter builder with map integration
|
||||
- Filter combination suggestions based on tag relationships
|
||||
- Saved search notifications
|
||||
- **Seamless Sharing**:
|
||||
- One-click sharing with clipboard integration
|
||||
- Live preview of shared content
|
||||
- Social media format optimization
|
||||
- Batch sharing capabilities
|
||||
|
||||
## Key Strategic Considerations
|
||||
|
||||
### 1. Performance & Scalability
|
||||
- **Efficient Data Loading**: Lazy loading and pagination for large datasets
|
||||
- **Optimized Queries**: Indexed database queries for filtering and analytics with hierarchical tag support
|
||||
- **Caching Strategy**: Smart caching for frequently accessed content and tag hierarchies
|
||||
- **Real-time Updates**: WebSocket integration for live analytics
|
||||
|
||||
### 2. User Experience Focus
|
||||
- **Progressive Disclosure**: Show essential info first, details on demand
|
||||
- **Responsive Design**: Optimized for mobile and desktop
|
||||
- **Accessibility**: Full keyboard navigation and screen reader support
|
||||
- **Customization**: User-configurable interface and behavior
|
||||
- **Smooth Migration**: Seamless transition from country-based to tag-based filtering
|
||||
|
||||
### 3. Analytics & Insights
|
||||
- **Privacy-First**: User control over data collection and retention
|
||||
- **Actionable Insights**: Meaningful statistics that guide reading habits
|
||||
- **Performance Metrics**: System health and efficiency tracking
|
||||
- **Trend Analysis**: Pattern recognition for content and behavior with geographic context
|
||||
|
||||
### 4. Content Management
|
||||
- **Flexible Display**: Multiple view modes for different use cases
|
||||
- **Smart Organization**: AI-assisted content categorization with geographic awareness
|
||||
- **Bulk Operations**: Efficient management of large article collections
|
||||
- **Data Integrity**: Reliable content processing and error handling
|
||||
- **Legacy Support**: Smooth migration from existing country-based filtering
|
||||
|
||||
## Enhanced Configuration File Structure
|
||||
|
||||
```toml
|
||||
[server]
|
||||
host = '127.0.0.1'
|
||||
port = 8090
|
||||
|
||||
[display]
|
||||
default_view = "compact" # compact, full, summary
|
||||
articles_per_page = 50
|
||||
show_reading_time = true
|
||||
show_word_count = false
|
||||
highlight_unread = true
|
||||
theme = "auto" # light, dark, auto
|
||||
|
||||
[analytics]
|
||||
enabled = true
|
||||
track_reading_time = true
|
||||
track_scroll_position = true
|
||||
retention_days = 365 # How long to keep detailed analytics
|
||||
aggregate_older_data = true
|
||||
|
||||
[filtering]
|
||||
enable_smart_suggestions = true
|
||||
max_recent_filters = 10
|
||||
auto_save_filters = true
|
||||
default_sort = "added_desc" # added_desc, published_desc, title_asc
|
||||
enable_geographic_hierarchy = true
|
||||
auto_migrate_country_filters = true
|
||||
|
||||
[sharing]
|
||||
default_format = "text"
|
||||
include_summary = true
|
||||
include_tags = true
|
||||
include_source = true
|
||||
copy_to_clipboard = true
|
||||
|
||||
[sharing.templates.text]
|
||||
format = """
|
||||
📰 {title}
|
||||
|
||||
{summary}
|
||||
|
||||
🏷️ Tags: {tags}
|
||||
🌍 Location: {geographic_tags}
|
||||
🔗 Source: {url}
|
||||
📅 Published: {published_at}
|
||||
|
||||
Shared via Owly News Summariser
|
||||
"""
|
||||
|
||||
[sharing.templates.markdown]
|
||||
format = """
|
||||
# {title}
|
||||
|
||||
{summary}
|
||||
|
||||
**Tags:** {tags}
|
||||
**Location:** {geographic_tags}
|
||||
**Source:** [{url}]({url})
|
||||
**Published:** {published_at}
|
||||
|
||||
---
|
||||
*Shared via Owly News Summariser*
|
||||
"""
|
||||
|
||||
[ai]
|
||||
enabled = true
|
||||
provider = "ollama"
|
||||
timeout_seconds = 120
|
||||
|
||||
[ai.summary]
|
||||
enabled = true
|
||||
temperature = 0.1
|
||||
max_tokens = 1000
|
||||
|
||||
[ai.tagging]
|
||||
enabled = true
|
||||
temperature = 0.3
|
||||
max_tokens = 200
|
||||
max_tags_per_article = 10
|
||||
min_confidence_threshold = 0.7
|
||||
enable_geographic_tagging = true
|
||||
enable_category_tagging = true
|
||||
geographic_hierarchy_levels = 3 # country, region, city
|
||||
|
||||
[scraping]
|
||||
timeout_seconds = 30
|
||||
max_retries = 3
|
||||
max_content_length = 50000
|
||||
respect_robots_txt = true
|
||||
rate_limit_delay_ms = 1000
|
||||
|
||||
[processing]
|
||||
batch_size = 10
|
||||
max_concurrent = 5
|
||||
retry_attempts = 3
|
||||
priority_manual = true
|
||||
auto_mark_read_on_view = false
|
||||
|
||||
[migration]
|
||||
auto_convert_country_filters = true
|
||||
preserve_legacy_data = true
|
||||
migration_batch_size = 100
|
||||
|
||||
[cli]
|
||||
default_output = "table"
|
||||
pager_command = "less"
|
||||
show_progress = true
|
||||
auto_confirm_bulk = false
|
||||
show_geographic_hierarchy = true
|
||||
```
|
||||
|
||||
## Migration Strategy for Country-Based Filtering
|
||||
|
||||
### Automatic Migration Process
|
||||
1. **Data Analysis**: Scan existing country filter data and RSS feed origins
|
||||
2. **Tag Generation**: Create geographic tags for each country with hierarchical structure
|
||||
3. **Filter Conversion**: Convert country-based filters to tag-based equivalents
|
||||
4. **User Notification**: Inform users about the migration and new capabilities
|
||||
5. **Gradual Rollout**: Maintain backward compatibility during transition period
|
||||
|
||||
### Enhanced Geographic Features
|
||||
- **Hierarchical Display**: Country → Region → City tag hierarchy
|
||||
- **Visual Map Integration**: Interactive geographic filtering via map interface
|
||||
- **Smart Suggestions**: Related location and content suggestions
|
||||
- **Multi-Level Filtering**: Filter by specific cities, regions, or broader geographic areas
|
||||
- **Source Intelligence**: AI detection of article geographic relevance beyond RSS origin
|
||||
|
||||
## Future Enhancements (Post Phase 3)
|
||||
|
||||
### Advanced Analytics
|
||||
- **Machine Learning Insights**: Content recommendation based on reading patterns and geographic preferences
|
||||
- **Predictive Analytics**: Trending topic prediction with geographic context
|
||||
- **Behavioral Analysis**: Reading habit optimization suggestions
|
||||
- **Comparative Analytics**: Benchmark against reading goals and regional averages
|
||||
|
||||
### Enhanced Content Management
|
||||
- **Smart Collections**: AI-curated article collections with geographic themes
|
||||
- **Reading Lists**: Planned reading with progress tracking
|
||||
- **Content Relationships**: Related article suggestions with geographic relevance
|
||||
- **Advanced Search**: Full-text search with relevance scoring and geographic weighting
|
||||
|
||||
### Social & Collaboration Features
|
||||
- **Reading Groups**: Shared reading lists and discussions with geographic focus
|
||||
- **Social Sharing**: Integration with social platforms
|
||||
- **Collaborative Tagging**: Community-driven content organization
|
||||
- **Reading Challenges**: Gamification of reading habits with geographic themes
|
||||
|
||||
### Integration & Extensibility
|
||||
- **Browser Extension**: Seamless article saving and reading
|
||||
- **Mobile Apps**: Native iOS/Android applications with location awareness
|
||||
- **API Ecosystem**: Third-party integrations and plugins
|
||||
- **Webhook System**: Real-time notifications and integrations with geographic filtering
|
72
backend-rust/TODO.md
Normal file
72
backend-rust/TODO.md
Normal file
@@ -0,0 +1,72 @@
|
||||
## CPU and resource limiting
|
||||
- Tokio worker threads
|
||||
- Decide thread policy:
|
||||
- Option A: set TOKIO_WORKER_THREADS in the environment for deployments.
|
||||
- Option B: build a custom runtime with tokio::runtime::Builder::new_multi_thread().worker_threads(n).
|
||||
|
||||
- Document your default policy (e.g., 50% of physical cores).
|
||||
|
||||
- Concurrency guard for CPU-heavy tasks
|
||||
- Create a global tokio::sync::Semaphore with N permits (N = allowed concurrent heavy tasks).
|
||||
- Acquire a permit before invoking heavy module operations; release automatically on drop.
|
||||
- Expose the semaphore in app state so handlers/jobs can share it.
|
||||
|
||||
- HTTP backpressure and rate limiting (if using API)
|
||||
- Add tower::limit::ConcurrencyLimitLayer to cap in-flight requests.
|
||||
- Add tower::limit::RateLimitLayer or request-size/timeouts as needed.
|
||||
- Optionally add tower::timeout::TimeoutLayer to bound handler latency.
|
||||
|
||||
- Stronger isolation (optional, later)
|
||||
- Evaluate running certain modules as separate processes for strict CPU caps.
|
||||
- Use cgroups v2 (Linux) or Job Objects (Windows) to bound CPU/memory per process.
|
||||
- Reuse the same JSON interface over IPC (e.g., stdio or a local socket).
|
||||
|
||||
## Build and run
|
||||
- Build all crates
|
||||
- Run: cargo build --workspace
|
||||
|
||||
- Build each plugin as cdylib
|
||||
- Example: cd crates/modules/summarizer && cargo build --release
|
||||
|
||||
- Stage plugin libraries for the host to find
|
||||
- Create a modules directory the daemon will read, e.g. target/modules
|
||||
- Copy the built artifact into that directory:
|
||||
- Linux: copy target/release/libsummarizer.so -> target/modules/libsummarizer.so
|
||||
- macOS: copy target/release/libsummarizer.dylib -> target/modules/libsummarizer.dylib
|
||||
- Windows: copy target/release/summarizer.dll -> target/modules/summarizer.dll
|
||||
|
||||
- Alternatively set OWLY_MODULES_DIR to your chosen directory.
|
||||
|
||||
- Run the daemon
|
||||
- cargo run -p owly-news
|
||||
- Optionally set:
|
||||
- OWLY_MODULES_DIR=/absolute/path/to/modules
|
||||
- TOKIO_WORKER_THREADS=N
|
||||
|
||||
## Wire into the API
|
||||
- Share ModuleHost in app state
|
||||
- Create a struct AppState { host: Arc, cpu_sem: Arc , ... }.
|
||||
- Add AppState to Axum with .with_state(state).
|
||||
|
||||
- In a handler (example: POST /summarize)
|
||||
- Parse payload as JSON.
|
||||
- Acquire a permit from cpu_sem before heavy work.
|
||||
- host.get("summarizer").await? to lazily load the module.
|
||||
- Call module.invoke_json("summarize", payload_value)?.
|
||||
- Map success to 200 with JSON; map errors to appropriate status codes.
|
||||
|
||||
- Error handling and observability
|
||||
- Use thiserror/anyhow to classify operational vs. client errors.
|
||||
- Add tracing spans around module loading and invocation; include module name and op.
|
||||
- Return structured error JSON when module reports an error.
|
||||
|
||||
- Configuration
|
||||
- Decide env vars and defaults: OWLY_MODULES_DIR, TOKIO_WORKER_THREADS, concurrency permits, rate limits.
|
||||
- Optionally add a config file (toml) and load via figment or config crate.
|
||||
|
||||
- Health and lifecycle
|
||||
- Add a /health route that checks:
|
||||
- Tokio is responsive.
|
||||
- Optional: preflight-check that required modules are present (or skip to keep lazy).
|
||||
|
||||
- Graceful shutdown: listen for SIGINT/SIGTERM and drain in-flight requests before exit.
|
7
backend-rust/crates/api/Cargo.lock
generated
Normal file
7
backend-rust/crates/api/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.1.0"
|
28
backend-rust/crates/api/Cargo.toml
Normal file
28
backend-rust/crates/api/Cargo.toml
Normal file
@@ -0,0 +1,28 @@
|
||||
[package]
|
||||
name = "owly-news-api"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
axum = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
sqlx = { workspace = true, features = ["runtime-tokio", "tls-native-tls", "sqlite", "macros", "migrate", "chrono", "json"] }
|
||||
dotenv = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }
|
||||
toml = { workspace = true }
|
||||
unicode-segmentation = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
readability = { workspace = true }
|
||||
scraper = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio-test = { workspace = true }
|
||||
axum-test = { workspace = true }
|
3
backend-rust/crates/api/src/api.rs
Normal file
3
backend-rust/crates/api/src/api.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod handlers;
|
||||
pub mod middleware;
|
||||
pub mod routes;
|
41
backend-rust/crates/api/src/api/handlers.rs
Normal file
41
backend-rust/crates/api/src/api/handlers.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use axum::Json;
|
||||
use axum::extract::State;
|
||||
use serde_json::Value;
|
||||
use sqlx::SqlitePool;
|
||||
|
||||
pub async fn get_articles(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
|
||||
// TODO: Article logic
|
||||
Ok(Json(serde_json::json!({"articles": []})))
|
||||
}
|
||||
|
||||
pub async fn get_summaries(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
|
||||
// TODO: Summaries logic
|
||||
Ok(Json(serde_json::json!({"summaries": []})))
|
||||
}
|
||||
|
||||
use axum::{
|
||||
http::StatusCode,
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
|
||||
pub struct AppError(anyhow::Error);
|
||||
|
||||
impl IntoResponse for AppError {
|
||||
fn into_response(self) -> Response {
|
||||
let (status, message) = match self.0.downcast_ref::<sqlx::Error>() {
|
||||
Some(_) => (StatusCode::INTERNAL_SERVER_ERROR, "Database error occurred"),
|
||||
None => (StatusCode::INTERNAL_SERVER_ERROR, "An error occurred"),
|
||||
};
|
||||
|
||||
tracing::error!("API Error: {:?}", self.0);
|
||||
(status, message).into_response()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> From<E> for AppError
|
||||
where
|
||||
E: Into<anyhow::Error>, {
|
||||
fn from(err: E) -> Self {
|
||||
Self(err.into())
|
||||
}
|
||||
}
|
0
backend-rust/crates/api/src/api/middleware.rs
Normal file
0
backend-rust/crates/api/src/api/middleware.rs
Normal file
11
backend-rust/crates/api/src/api/routes.rs
Normal file
11
backend-rust/crates/api/src/api/routes.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
use axum::Router;
|
||||
use axum::routing::get;
|
||||
use sqlx::SqlitePool;
|
||||
use crate::api::handlers;
|
||||
|
||||
pub fn routes() -> Router<SqlitePool> {
|
||||
Router::new()
|
||||
.route("/articles", get(handlers::get_articles))
|
||||
.route("/summaries", get(handlers::get_summaries))
|
||||
// Add more routes as needed
|
||||
}
|
2
backend-rust/crates/api/src/lib.rs
Normal file
2
backend-rust/crates/api/src/lib.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod api;
|
||||
pub use api::*;
|
14
backend-rust/crates/app/Cargo.toml
Normal file
14
backend-rust/crates/app/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "owly-news"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[dependencies]
|
||||
owly-news-api = { path = "../api" }
|
||||
owly-news-module-host = { path = "../module-host" }
|
||||
tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter", "json"] }
|
||||
anyhow = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
num_cpus = { workspace = true }
|
45
backend-rust/crates/app/src/main.rs
Normal file
45
backend-rust/crates/app/src/main.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
#[tokio::main(flavor = "multi_thread")]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
// Tracing setup
|
||||
tracing_subscriber::registry()
|
||||
.with(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| "info".into()),
|
||||
)
|
||||
.with(tracing_subscriber::fmt::layer())
|
||||
.init();
|
||||
|
||||
// Limit worker threads for CPU control (can be tuned via env)
|
||||
// Note: When using #[tokio::main], configure via env TOKIO_WORKER_THREADS.
|
||||
// Alternatively, build a Runtime manually for stricter control.
|
||||
if let Ok(threads) = std::env::var("TOKIO_WORKER_THREADS") {
|
||||
tracing::warn!(
|
||||
"TOKIO_WORKER_THREADS is set to {threads}, ensure it matches deployment requirements"
|
||||
);
|
||||
} else {
|
||||
// Provide a sane default via env if not set
|
||||
let default_threads = std::cmp::max(1, num_cpus::get_physical() / 2);
|
||||
unsafe { std::env::set_var("TOKIO_WORKER_THREADS", default_threads.to_string()); }
|
||||
tracing::info!("Defaulting worker threads to {}", default_threads);
|
||||
}
|
||||
|
||||
// Example: lazily load and invoke the "summarizer" module when needed
|
||||
let host = owly_news_module_host::ModuleHost::default();
|
||||
|
||||
// Simulate an on-demand call (e.g., from an HTTP handler)
|
||||
let summarizer = host.get("summarizer").await?;
|
||||
let resp = summarizer.invoke_json(
|
||||
"summarize",
|
||||
serde_json::json!({
|
||||
"text": "Rust enables fearless concurrency with strong guarantees over memory safety.",
|
||||
"ratio": 0.3
|
||||
}),
|
||||
)?;
|
||||
tracing::info!(?resp, "summarizer response");
|
||||
|
||||
// TODO: wire this into your API routes/handlers, using the host.get("<module>").await when needed.
|
||||
tracing::info!("owly-news daemon running");
|
||||
Ok(())
|
||||
}
|
12
backend-rust/crates/module-api/Cargo.toml
Normal file
12
backend-rust/crates/module-api/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "owly-news-module-api"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
30
backend-rust/crates/module-api/src/lib.rs
Normal file
30
backend-rust/crates/module-api/src/lib.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::os::raw::c_char;
|
||||
|
||||
// Symbols every module must export with `extern "C"` and `#[no_mangle]`.
|
||||
// Signature: fn module_name() -> *const c_char
|
||||
// Signature: fn module_invoke(op: *const c_char, payload: *const c_char) -> *mut c_char
|
||||
pub const SYMBOL_NAME: &str = "module_name";
|
||||
pub const SYMBOL_INVOKE: &str = "module_invoke";
|
||||
|
||||
// Helper to convert C char* to &str
|
||||
pub unsafe fn cstr_to_str<'a>(ptr: *const c_char) -> anyhow::Result<&'a str> {
|
||||
if ptr.is_null() {
|
||||
anyhow::bail!("null pointer");
|
||||
}
|
||||
Ok(CStr::from_ptr(ptr).to_str()?)
|
||||
}
|
||||
|
||||
// Helper to allocate a CString for return across FFI boundary (module side)
|
||||
pub fn string_to_cstring_ptr(s: String) -> *mut c_char {
|
||||
CString::new(s).unwrap().into_raw()
|
||||
}
|
||||
|
||||
// Helper to take back ownership of a CString (host side), then free by letting CString drop
|
||||
pub unsafe fn take_cstring(ptr: *mut c_char) -> anyhow::Result<String> {
|
||||
if ptr.is_null() {
|
||||
anyhow::bail!("null pointer");
|
||||
}
|
||||
let s = CString::from_raw(ptr);
|
||||
Ok(s.into_string()?)
|
||||
}
|
17
backend-rust/crates/module-host/Cargo.toml
Normal file
17
backend-rust/crates/module-host/Cargo.toml
Normal file
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "owly-news-module-host"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lib]
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
libloading = { workspace = true }
|
||||
once_cell = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] }
|
||||
tracing = { workspace = true }
|
||||
owly-news-module-api = { path = "../module-api" }
|
114
backend-rust/crates/module-host/src/lib.rs
Normal file
114
backend-rust/crates/module-host/src/lib.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
use anyhow::Context;
|
||||
use libloading::{Library, Symbol};
|
||||
use once_cell::sync::OnceCell;
|
||||
use owly_news_module_api::{take_cstring, SYMBOL_INVOKE, SYMBOL_NAME};
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::CString;
|
||||
use std::os::raw::c_char;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::info;
|
||||
|
||||
type ModuleNameFn = unsafe extern "C" fn() -> *const c_char;
|
||||
type ModuleInvokeFn = unsafe extern "C" fn(*const c_char, *const c_char) -> *mut c_char;
|
||||
|
||||
pub struct ModuleHandle {
|
||||
_lib: Arc<Library>,
|
||||
invoke: ModuleInvokeFn,
|
||||
}
|
||||
|
||||
impl ModuleHandle {
|
||||
pub fn invoke_json(&self, op: &str, payload: serde_json::Value) -> anyhow::Result<serde_json::Value> {
|
||||
let op_c = CString::new(op)?;
|
||||
let payload_c = CString::new(serde_json::to_string(&payload)?)?;
|
||||
|
||||
let out_ptr = unsafe { (self.invoke)(op_c.as_ptr(), payload_c.as_ptr()) };
|
||||
let out = unsafe { take_cstring(out_ptr) }?;
|
||||
let val = serde_json::from_str(&out).context("module returned invalid JSON")?;
|
||||
Ok(val)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ModuleHost {
|
||||
// Lazy cache of loaded modules by logical name
|
||||
loaded: Mutex<HashMap<String, Arc<ModuleHandle>>>,
|
||||
modules_dir: PathBuf,
|
||||
}
|
||||
|
||||
static DEFAULT_HOST: OnceCell<Arc<ModuleHost>> = OnceCell::new();
|
||||
|
||||
impl ModuleHost {
|
||||
pub fn default() -> Arc<Self> {
|
||||
DEFAULT_HOST
|
||||
.get_or_init(|| {
|
||||
Arc::new(Self::new(
|
||||
std::env::var_os("OWLY_MODULES_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| PathBuf::from("target/modules")), // default location
|
||||
))
|
||||
})
|
||||
.clone()
|
||||
}
|
||||
|
||||
pub fn new(modules_dir: PathBuf) -> Self {
|
||||
Self {
|
||||
loaded: Mutex::new(HashMap::new()),
|
||||
modules_dir,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get(&self, name: &str) -> anyhow::Result<Arc<ModuleHandle>> {
|
||||
if let Some(h) = self.loaded.lock().await.get(name).cloned() {
|
||||
return Ok(h);
|
||||
}
|
||||
let handle = Arc::new(self.load_module(name)?);
|
||||
self.loaded.lock().await.insert(name.to_string(), handle.clone());
|
||||
Ok(handle)
|
||||
}
|
||||
|
||||
fn load_module(&self, name: &str) -> anyhow::Result<ModuleHandle> {
|
||||
let lib_path = resolve_module_path(&self.modules_dir, name)?;
|
||||
info!(module = name, path = %lib_path.display(), "loading module");
|
||||
|
||||
// SAFETY: we keep Library alive in ModuleHandle to ensure symbols remain valid
|
||||
let lib = unsafe { Library::new(lib_path) }.with_context(|| "failed to load module library")?;
|
||||
|
||||
// Validate and bind symbols
|
||||
let name_fn: Symbol<ModuleNameFn> = unsafe { lib.get(SYMBOL_NAME.as_bytes()) }
|
||||
.with_context(|| "missing symbol `module_name`")?;
|
||||
let invoke_fn: Symbol<ModuleInvokeFn> = unsafe { lib.get(SYMBOL_INVOKE.as_bytes()) }
|
||||
.with_context(|| "missing symbol `module_invoke`")?;
|
||||
|
||||
// Optional: verify reported name matches requested
|
||||
let c_name_ptr = unsafe { name_fn() };
|
||||
let c_name = unsafe { std::ffi::CStr::from_ptr(c_name_ptr) }.to_string_lossy().into_owned();
|
||||
if c_name != name {
|
||||
anyhow::bail!("module reported name `{c_name}`, expected `{name}`");
|
||||
}
|
||||
|
||||
// Copy the function pointer before moving the library
|
||||
let invoke_fn_copy = *invoke_fn;
|
||||
|
||||
Ok(ModuleHandle {
|
||||
_lib: Arc::new(lib),
|
||||
invoke: invoke_fn_copy,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_module_path(dir: &Path, name: &str) -> anyhow::Result<PathBuf> {
|
||||
#[cfg(target_os = "windows")]
|
||||
const EXT: &str = "dll";
|
||||
#[cfg(target_os = "macos")]
|
||||
const EXT: &str = "dylib";
|
||||
#[cfg(all(unix, not(target_os = "macos")))]
|
||||
const EXT: &str = "so";
|
||||
|
||||
let fname = format!("lib{name}.{EXT}");
|
||||
let path = dir.join(fname);
|
||||
if !path.exists() {
|
||||
anyhow::bail!("module `{name}` not found at {}", path.display());
|
||||
}
|
||||
Ok(path)
|
||||
}
|
14
backend-rust/crates/modules/summarizer/Cargo.toml
Normal file
14
backend-rust/crates/modules/summarizer/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "owly-news-module-summarizer"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
owly-news-module-api = { path = "../../module-api" }
|
50
backend-rust/crates/modules/summarizer/src/lib.rs
Normal file
50
backend-rust/crates/modules/summarizer/src/lib.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use owly_news_module_api::{cstr_to_str, string_to_cstring_ptr};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::os::raw::c_char;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct SummarizeReq {
|
||||
text: String,
|
||||
#[serde(default = "default_ratio")]
|
||||
ratio: f32,
|
||||
}
|
||||
|
||||
fn default_ratio() -> f32 { 0.2 }
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SummarizeResp {
|
||||
summary: String,
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn module_name() -> *const c_char {
|
||||
// IMPORTANT: string must live forever; use a const C string
|
||||
static NAME: &str = "summarizer\0";
|
||||
NAME.as_ptr() as *const c_char
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn module_invoke(op: *const c_char, payload: *const c_char) -> *mut c_char {
|
||||
// SAFETY: called by trusted host with valid pointers
|
||||
let res = (|| -> anyhow::Result<String> {
|
||||
let op = unsafe { cstr_to_str(op)? };
|
||||
let payload = unsafe { cstr_to_str(payload)? };
|
||||
|
||||
match op {
|
||||
"summarize" => {
|
||||
let req: SummarizeReq = serde_json::from_str(payload)?;
|
||||
// Placeholder summarization logic. Replace with real algorithm.
|
||||
let words: Vec<&str> = req.text.split_whitespace().collect();
|
||||
let take = ((words.len() as f32) * req.ratio).max(1.0).round() as usize;
|
||||
let summary = words.into_iter().take(take).collect::<Vec<_>>().join(" ");
|
||||
let resp = SummarizeResp { summary };
|
||||
Ok(serde_json::to_string(&resp)?)
|
||||
}
|
||||
_ => anyhow::bail!("unknown op: {op}"),
|
||||
}
|
||||
})();
|
||||
|
||||
let json = res.unwrap_or_else(|e| serde_json::json!({ "error": e.to_string() }).to_string());
|
||||
|
||||
string_to_cstring_ptr(json)
|
||||
}
|
31
backend-rust/example.env
Normal file
31
backend-rust/example.env
Normal file
@@ -0,0 +1,31 @@
|
||||
# URL for the Ollama service
|
||||
OLLAMA_HOST=http://localhost:11434
|
||||
|
||||
# Interval for scheduled news fetching in hours
|
||||
CRON_HOURS=1
|
||||
|
||||
# Minimum interval for scheduled news fetching in hours
|
||||
MIN_CRON_HOURS=0.5
|
||||
|
||||
# Cooldown period in minutes between manual syncs
|
||||
SYNC_COOLDOWN_MINUTES=30
|
||||
|
||||
# LLM model to use for summarization
|
||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M
|
||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
|
||||
LLM_MODEL=mistral-nemo:12b
|
||||
|
||||
# Timeout in seconds for LLM requests
|
||||
LLM_TIMEOUT_SECONDS=180
|
||||
|
||||
# Timeout in seconds for Ollama API requests
|
||||
OLLAMA_API_TIMEOUT_SECONDS=10
|
||||
|
||||
# Timeout in seconds for article fetching
|
||||
ARTICLE_FETCH_TIMEOUT=30
|
||||
|
||||
# Maximum length of article content to process
|
||||
MAX_ARTICLE_LENGTH=5000
|
||||
|
||||
# SQLite database connection string
|
||||
DB_NAME=owlynews.sqlite3
|
5
backend-rust/migrations/001_initial_schema.down.sql
Normal file
5
backend-rust/migrations/001_initial_schema.down.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
DROP TABLE IF EXISTS meta;
|
||||
DROP TABLE IF EXISTS settings;
|
||||
DROP TABLE IF EXISTS feeds;
|
||||
DROP INDEX IF EXISTS idx_news_published;
|
||||
DROP TABLE IF EXISTS news;
|
38
backend-rust/migrations/001_initial_schema.up.sql
Normal file
38
backend-rust/migrations/001_initial_schema.up.sql
Normal file
@@ -0,0 +1,38 @@
|
||||
-- Initial database schema for Owly News Summariser
|
||||
|
||||
-- News table to store articles
|
||||
CREATE TABLE IF NOT EXISTS news
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
summary TEXT,
|
||||
url TEXT NOT NULL,
|
||||
published TIMESTAMP NOT NULL,
|
||||
country TEXT NOT NULL,
|
||||
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
);
|
||||
|
||||
-- Index for faster queries on published date
|
||||
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
|
||||
|
||||
-- Feeds table to store RSS feed sources
|
||||
CREATE TABLE IF NOT EXISTS feeds
|
||||
(
|
||||
id INTEGER PRIMARY KEY,
|
||||
country TEXT,
|
||||
url TEXT UNIQUE NOT NULL
|
||||
);
|
||||
|
||||
-- Settings table for application configuration
|
||||
CREATE TABLE IF NOT EXISTS settings
|
||||
(
|
||||
key TEXT PRIMARY KEY,
|
||||
val TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Meta table for application metadata
|
||||
CREATE TABLE IF NOT EXISTS meta
|
||||
(
|
||||
key TEXT PRIMARY KEY,
|
||||
val TEXT NOT NULL
|
||||
);
|
18
backend-rust/migrations/002_add_category_to_news.down.sql
Normal file
18
backend-rust/migrations/002_add_category_to_news.down.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
CREATE TABLE news_backup
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
summary TEXT,
|
||||
url TEXT NOT NULL,
|
||||
published TIMESTAMP NOT NULL,
|
||||
country TEXT NOT NULL,
|
||||
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||
);
|
||||
|
||||
INSERT INTO news_backup
|
||||
SELECT id, title, summary, url, published, country, created_at
|
||||
FROM news;
|
||||
DROP TABLE news;
|
||||
ALTER TABLE news_backup
|
||||
RENAME TO news;
|
||||
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
|
3
backend-rust/migrations/002_add_category_to_news.up.sql
Normal file
3
backend-rust/migrations/002_add_category_to_news.up.sql
Normal file
@@ -0,0 +1,3 @@
|
||||
-- Add category field to news table
|
||||
ALTER TABLE news
|
||||
ADD COLUMN category TEXT;
|
@@ -0,0 +1,7 @@
|
||||
-- Drop articles table and its indexes
|
||||
DROP INDEX IF EXISTS idx_articles_read_at;
|
||||
DROP INDEX IF EXISTS idx_articles_source_type;
|
||||
DROP INDEX IF EXISTS idx_articles_processing_status;
|
||||
DROP INDEX IF EXISTS idx_articles_added_at;
|
||||
DROP INDEX IF EXISTS idx_articles_published_at;
|
||||
DROP TABLE IF EXISTS articles;
|
27
backend-rust/migrations/003_create_articles_table.up.sql
Normal file
27
backend-rust/migrations/003_create_articles_table.up.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Create enhanced articles table to replace news table structure
|
||||
CREATE TABLE IF NOT EXISTS articles
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
source_type TEXT NOT NULL DEFAULT 'rss', -- 'rss', 'manual'
|
||||
rss_content TEXT, -- RSS description/excerpt
|
||||
full_content TEXT, -- Scraped full content
|
||||
summary TEXT, -- AI-generated summary
|
||||
processing_status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'processing', 'completed', 'failed'
|
||||
published_at TIMESTAMP NOT NULL,
|
||||
added_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||
read_at TIMESTAMP,
|
||||
read_count INTEGER NOT NULL DEFAULT 0,
|
||||
reading_time INTEGER, -- in seconds
|
||||
ai_enabled BOOLEAN NOT NULL DEFAULT 1,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create indexes for performance
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles (published_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_added_at ON articles (added_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_processing_status ON articles (processing_status);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_source_type ON articles (source_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_articles_read_at ON articles (read_at);
|
9
backend-rust/migrations/004_create_tags_table.down.sql
Normal file
9
backend-rust/migrations/004_create_tags_table.down.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
-- Drop tag system tables and indexes
|
||||
DROP INDEX IF EXISTS idx_article_tags_ai_generated;
|
||||
DROP INDEX IF EXISTS idx_article_tags_tag_id;
|
||||
DROP INDEX IF EXISTS idx_article_tags_article_id;
|
||||
DROP INDEX IF EXISTS idx_tags_usage_count;
|
||||
DROP INDEX IF EXISTS idx_tags_parent_id;
|
||||
DROP INDEX IF EXISTS idx_tags_category;
|
||||
DROP TABLE IF EXISTS article_tags;
|
||||
DROP TABLE IF EXISTS tags;
|
31
backend-rust/migrations/004_create_tags_table.up.sql
Normal file
31
backend-rust/migrations/004_create_tags_table.up.sql
Normal file
@@ -0,0 +1,31 @@
|
||||
-- Create tags table with hierarchical support
|
||||
CREATE TABLE IF NOT EXISTS tags
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
category TEXT NOT NULL, -- 'geographic', 'content', 'source', 'custom'
|
||||
description TEXT,
|
||||
color TEXT, -- Hex color for UI display
|
||||
usage_count INTEGER NOT NULL DEFAULT 0,
|
||||
parent_id INTEGER REFERENCES tags (id), -- For hierarchical tags (e.g., Country -> Region -> City)
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create article_tags junction table
|
||||
CREATE TABLE IF NOT EXISTS article_tags
|
||||
(
|
||||
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
|
||||
tag_id INTEGER NOT NULL REFERENCES tags (id) ON DELETE CASCADE,
|
||||
confidence_score REAL DEFAULT 1.0, -- AI confidence (0.0-1.0)
|
||||
ai_generated BOOLEAN NOT NULL DEFAULT 0,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||
PRIMARY KEY (article_id, tag_id)
|
||||
);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags (category);
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_parent_id ON tags (parent_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_tags_usage_count ON tags (usage_count DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_article_tags_article_id ON article_tags (article_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_article_tags_tag_id ON article_tags (tag_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_article_tags_ai_generated ON article_tags (ai_generated);
|
11
backend-rust/migrations/005_create_statistics_table.down.sql
Normal file
11
backend-rust/migrations/005_create_statistics_table.down.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Drop analytics system tables and indexes
|
||||
DROP INDEX IF EXISTS idx_legacy_migration_old_filter_type;
|
||||
DROP INDEX IF EXISTS idx_share_templates_format;
|
||||
DROP INDEX IF EXISTS idx_filter_presets_user_id;
|
||||
DROP INDEX IF EXISTS idx_reading_stats_read_at;
|
||||
DROP INDEX IF EXISTS idx_reading_stats_article_id;
|
||||
DROP INDEX IF EXISTS idx_reading_stats_user_id;
|
||||
DROP TABLE IF EXISTS legacy_migration;
|
||||
DROP TABLE IF EXISTS share_templates;
|
||||
DROP TABLE IF EXISTS filter_presets;
|
||||
DROP TABLE IF EXISTS reading_stats;
|
50
backend-rust/migrations/005_create_statistics_table.up.sql
Normal file
50
backend-rust/migrations/005_create_statistics_table.up.sql
Normal file
@@ -0,0 +1,50 @@
|
||||
-- Create reading statistics table
|
||||
CREATE TABLE IF NOT EXISTS reading_stats
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
user_id INTEGER DEFAULT 1, -- For future multi-user support
|
||||
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
|
||||
read_at TIMESTAMP NOT NULL,
|
||||
reading_time INTEGER, -- in seconds
|
||||
completion_rate REAL DEFAULT 1.0, -- 0.0-1.0, how much of the article was read
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create filter presets table
|
||||
CREATE TABLE IF NOT EXISTS filter_presets
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
filter_criteria TEXT NOT NULL, -- JSON string of filter parameters
|
||||
user_id INTEGER DEFAULT 1, -- For future multi-user support
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create share templates table
|
||||
CREATE TABLE IF NOT EXISTS share_templates
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
format TEXT NOT NULL, -- 'text', 'markdown', 'html', 'json'
|
||||
template_content TEXT NOT NULL,
|
||||
is_default BOOLEAN NOT NULL DEFAULT 0,
|
||||
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create legacy migration tracking table
|
||||
CREATE TABLE IF NOT EXISTS legacy_migration
|
||||
(
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
old_filter_type TEXT NOT NULL, -- 'country', 'category', etc.
|
||||
old_value TEXT NOT NULL,
|
||||
new_tag_ids TEXT, -- JSON array of tag IDs
|
||||
migrated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_reading_stats_user_id ON reading_stats (user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_reading_stats_article_id ON reading_stats (article_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_reading_stats_read_at ON reading_stats (read_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_filter_presets_user_id ON filter_presets (user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_share_templates_format ON share_templates (format);
|
||||
CREATE INDEX IF NOT EXISTS idx_legacy_migration_old_filter_type ON legacy_migration (old_filter_type);
|
18
backend-rust/migrations/006_update_settings_table.down.sql
Normal file
18
backend-rust/migrations/006_update_settings_table.down.sql
Normal file
@@ -0,0 +1,18 @@
|
||||
-- Remove enhanced settings columns and indexes
|
||||
DROP INDEX IF EXISTS idx_settings_user_id;
|
||||
DROP INDEX IF EXISTS idx_settings_category;
|
||||
|
||||
-- Note: SQLite doesn't support DROP COLUMN, so we recreate the table
|
||||
CREATE TABLE settings_backup AS
|
||||
SELECT key, val
|
||||
FROM settings;
|
||||
DROP TABLE settings;
|
||||
CREATE TABLE settings
|
||||
(
|
||||
key TEXT PRIMARY KEY,
|
||||
val TEXT NOT NULL
|
||||
);
|
||||
INSERT INTO settings
|
||||
SELECT key, val
|
||||
FROM settings_backup;
|
||||
DROP TABLE settings_backup;
|
74
backend-rust/migrations/006_update_settings_table.up.sql
Normal file
74
backend-rust/migrations/006_update_settings_table.up.sql
Normal file
@@ -0,0 +1,74 @@
|
||||
-- Enhance settings table to support more structured configuration
|
||||
ALTER TABLE settings
|
||||
ADD COLUMN category TEXT DEFAULT 'general';
|
||||
ALTER TABLE settings
|
||||
ADD COLUMN user_id INTEGER DEFAULT 1;
|
||||
ALTER TABLE settings
|
||||
ADD COLUMN updated_at TIMESTAMP DEFAULT (datetime('now'));
|
||||
|
||||
-- Create index for better performance
|
||||
CREATE INDEX IF NOT EXISTS idx_settings_category ON settings (category);
|
||||
CREATE INDEX IF NOT EXISTS idx_settings_user_id ON settings (user_id);
|
||||
|
||||
-- Insert default settings based on roadmap configuration
|
||||
INSERT OR IGNORE INTO settings (key, val, category)
|
||||
VALUES
|
||||
-- Display settings
|
||||
('default_view', 'compact', 'display'),
|
||||
('articles_per_page', '50', 'display'),
|
||||
('show_reading_time', '1', 'display'),
|
||||
('show_word_count', '0', 'display'),
|
||||
('highlight_unread', '1', 'display'),
|
||||
('theme', 'auto', 'display'),
|
||||
|
||||
-- Analytics settings
|
||||
('analytics_enabled', '1', 'analytics'),
|
||||
('track_reading_time', '1', 'analytics'),
|
||||
('track_scroll_position', '1', 'analytics'),
|
||||
('retention_days', '365', 'analytics'),
|
||||
('aggregate_older_data', '1', 'analytics'),
|
||||
|
||||
-- Filtering settings
|
||||
('enable_smart_suggestions', '1', 'filtering'),
|
||||
('max_recent_filters', '10', 'filtering'),
|
||||
('auto_save_filters', '1', 'filtering'),
|
||||
('default_sort', 'added_desc', 'filtering'),
|
||||
('enable_geographic_hierarchy', '1', 'filtering'),
|
||||
('auto_migrate_country_filters', '1', 'filtering'),
|
||||
|
||||
-- Sharing settings
|
||||
('default_share_format', 'text', 'sharing'),
|
||||
('include_summary', '1', 'sharing'),
|
||||
('include_tags', '1', 'sharing'),
|
||||
('include_source', '1', 'sharing'),
|
||||
('copy_to_clipboard', '1', 'sharing'),
|
||||
|
||||
-- AI settings
|
||||
('ai_enabled', '1', 'ai'),
|
||||
('ai_provider', 'ollama', 'ai'),
|
||||
('ai_timeout_seconds', '120', 'ai'),
|
||||
('ai_summary_enabled', '1', 'ai'),
|
||||
('ai_summary_temperature', '0.1', 'ai'),
|
||||
('ai_summary_max_tokens', '1000', 'ai'),
|
||||
('ai_tagging_enabled', '1', 'ai'),
|
||||
('ai_tagging_temperature', '0.3', 'ai'),
|
||||
('ai_tagging_max_tokens', '200', 'ai'),
|
||||
('max_tags_per_article', '10', 'ai'),
|
||||
('min_confidence_threshold', '0.7', 'ai'),
|
||||
('enable_geographic_tagging', '1', 'ai'),
|
||||
('enable_category_tagging', '1', 'ai'),
|
||||
('geographic_hierarchy_levels', '3', 'ai'),
|
||||
|
||||
-- Scraping settings
|
||||
('scraping_timeout_seconds', '30', 'scraping'),
|
||||
('scraping_max_retries', '3', 'scraping'),
|
||||
('max_content_length', '50000', 'scraping'),
|
||||
('respect_robots_txt', '1', 'scraping'),
|
||||
('rate_limit_delay_ms', '1000', 'scraping'),
|
||||
|
||||
-- Processing settings
|
||||
('batch_size', '10', 'processing'),
|
||||
('max_concurrent', '5', 'processing'),
|
||||
('retry_attempts', '3', 'processing'),
|
||||
('priority_manual', '1', 'processing'),
|
||||
('auto_mark_read_on_view', '0', 'processing');
|
@@ -0,0 +1,39 @@
|
||||
-- Remove migrated data (this will remove all articles and tags created from migration)
|
||||
-- WARNING: This will delete all migrated data
|
||||
|
||||
-- Remove legacy migration records
|
||||
DELETE
|
||||
FROM legacy_migration
|
||||
WHERE old_filter_type IN ('country', 'category');
|
||||
|
||||
-- Remove article-tag associations for migrated data (non-AI generated)
|
||||
DELETE
|
||||
FROM article_tags
|
||||
WHERE ai_generated = 0;
|
||||
|
||||
-- Remove migrated geographic tags (only those created from country data)
|
||||
DELETE
|
||||
FROM tags
|
||||
WHERE tags.category = 'geographic'
|
||||
AND EXISTS (SELECT 1 FROM news WHERE news.country = tags.name);
|
||||
|
||||
-- Remove migrated content tags (only those created from category data)
|
||||
DELETE
|
||||
FROM tags
|
||||
WHERE tags.category = 'content'
|
||||
AND EXISTS (SELECT 1 FROM news WHERE news.category = tags.name);
|
||||
|
||||
-- Remove migrated articles (only those that match news entries)
|
||||
DELETE
|
||||
FROM articles
|
||||
WHERE EXISTS (SELECT 1
|
||||
FROM news
|
||||
WHERE news.url = articles.url
|
||||
AND news.title = articles.title
|
||||
AND articles.source_type = 'rss');
|
||||
|
||||
-- Reset tag usage counts
|
||||
UPDATE tags
|
||||
SET usage_count = (SELECT COUNT(*)
|
||||
FROM article_tags
|
||||
WHERE tag_id = tags.id);
|
@@ -0,0 +1,84 @@
|
||||
|
||||
-- Migrate data from old news table to new articles table
|
||||
INSERT INTO articles (title, url, summary, published_at, added_at, source_type, processing_status)
|
||||
SELECT title,
|
||||
url,
|
||||
summary,
|
||||
published,
|
||||
datetime(created_at, 'unixepoch'),
|
||||
'rss',
|
||||
CASE
|
||||
WHEN summary IS NOT NULL AND summary != '' THEN 'completed'
|
||||
ELSE 'pending'
|
||||
END
|
||||
FROM news;
|
||||
|
||||
-- Create geographic tags from existing country data
|
||||
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
|
||||
SELECT DISTINCT country,
|
||||
'geographic',
|
||||
'Geographic location: ' || country,
|
||||
COUNT(*)
|
||||
FROM news
|
||||
WHERE country IS NOT NULL
|
||||
AND country != ''
|
||||
GROUP BY country;
|
||||
|
||||
-- Link articles to their geographic tags
|
||||
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
|
||||
SELECT a.id,
|
||||
t.id,
|
||||
0, -- Not AI generated, migrated from legacy data
|
||||
1.0 -- Full confidence for existing data
|
||||
FROM articles a
|
||||
JOIN news n ON a.url = n.url AND a.title = n.title
|
||||
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
|
||||
WHERE n.country IS NOT NULL
|
||||
AND n.country != '';
|
||||
|
||||
-- Create category tags if category column exists in news table
|
||||
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
|
||||
SELECT DISTINCT n.category,
|
||||
'content',
|
||||
'Content category: ' || n.category,
|
||||
COUNT(*)
|
||||
FROM news n
|
||||
WHERE n.category IS NOT NULL
|
||||
AND n.category != ''
|
||||
GROUP BY n.category;
|
||||
|
||||
-- Link articles to their category tags
|
||||
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
|
||||
SELECT a.id,
|
||||
t.id,
|
||||
0, -- Not AI generated, migrated from legacy data
|
||||
1.0 -- Full confidence for existing data
|
||||
FROM articles a
|
||||
JOIN news n ON a.url = n.url AND a.title = n.title
|
||||
JOIN tags t ON t.name = n.category AND t.category = 'content'
|
||||
WHERE n.category IS NOT NULL
|
||||
AND n.category != '';
|
||||
|
||||
-- Record migration in legacy_migration table for countries
|
||||
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
|
||||
SELECT 'country',
|
||||
n.country,
|
||||
'[' || GROUP_CONCAT(t.id) || ']'
|
||||
FROM (SELECT DISTINCT country FROM news WHERE country IS NOT NULL AND country != '') n
|
||||
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
|
||||
GROUP BY n.country;
|
||||
|
||||
-- Record migration in legacy_migration table for categories (if they exist)
|
||||
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
|
||||
SELECT 'category',
|
||||
n.category,
|
||||
'[' || GROUP_CONCAT(t.id) || ']'
|
||||
FROM (SELECT DISTINCT category FROM news WHERE category IS NOT NULL AND category != '') n
|
||||
JOIN tags t ON t.name = n.category AND t.category = 'content'
|
||||
GROUP BY n.category;
|
||||
|
||||
-- Update tag usage counts
|
||||
UPDATE tags
|
||||
SET usage_count = (SELECT COUNT(*)
|
||||
FROM article_tags
|
||||
WHERE tag_id = tags.id);
|
@@ -0,0 +1,4 @@
|
||||
-- Remove default sharing templates
|
||||
DELETE
|
||||
FROM share_templates
|
||||
WHERE name IN ('Default Text', 'Markdown', 'Simple Text', 'HTML Email');
|
@@ -0,0 +1,39 @@
|
||||
-- Insert default sharing templates
|
||||
INSERT INTO share_templates (name, format, template_content, is_default)
|
||||
VALUES ('Default Text', 'text', '📰 {title}
|
||||
|
||||
{summary}
|
||||
|
||||
🏷️ Tags: {tags}
|
||||
🌍 Location: {geographic_tags}
|
||||
🔗 Source: {url}
|
||||
📅 Published: {published_at}
|
||||
|
||||
Shared via Owly News Summariser', 1),
|
||||
|
||||
('Markdown', 'markdown', '# {title}
|
||||
|
||||
{summary}
|
||||
|
||||
**Tags:** {tags}
|
||||
**Location:** {geographic_tags}
|
||||
**Source:** [{url}]({url})
|
||||
**Published:** {published_at}
|
||||
|
||||
---
|
||||
*Shared via Owly News Summariser*', 1),
|
||||
|
||||
('Simple Text', 'text', '{title}
|
||||
|
||||
{summary}
|
||||
|
||||
Source: {url}', 0),
|
||||
|
||||
('HTML Email', 'html', '<h2>{title}</h2>
|
||||
<p>{summary}</p>
|
||||
<p><strong>Tags:</strong> {tags}<br>
|
||||
<strong>Location:</strong> {geographic_tags}<br>
|
||||
<strong>Source:</strong> <a href="{url}">{url}</a><br>
|
||||
<strong>Published:</strong> {published_at}</p>
|
||||
<hr>
|
||||
<small>Shared via Owly News Summariser</small>', 0);
|
1003
backend-rust/src/config.rs
Normal file
1003
backend-rust/src/config.rs
Normal file
File diff suppressed because it is too large
Load Diff
38
backend-rust/src/db.rs
Normal file
38
backend-rust/src/db.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use crate::config::AppSettings;
|
||||
use anyhow::{Context, Result};
|
||||
use sqlx::migrate::Migrator;
|
||||
use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
|
||||
use sqlx::{Pool, Sqlite, SqlitePool};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use tracing::info;
|
||||
|
||||
pub const MIGRATOR: Migrator = sqlx::migrate!("./migrations");
|
||||
|
||||
pub async fn initialize_db(app_settings: &AppSettings) -> Result<Pool<Sqlite>> {
|
||||
app_settings.ensure_default_directory()?;
|
||||
|
||||
let options = SqliteConnectOptions::from_str(&app_settings.database_url())?
|
||||
.create_if_missing(true)
|
||||
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
||||
.foreign_keys(true);
|
||||
|
||||
let pool = SqlitePoolOptions::new()
|
||||
.max_connections(20)
|
||||
.min_connections(5)
|
||||
.acquire_timeout(Duration::from_secs(30))
|
||||
.idle_timeout(Duration::from_secs(600))
|
||||
.connect_with(options)
|
||||
.await?;
|
||||
|
||||
MIGRATOR.run(&pool).await.with_context(|| "Database migrations failed")?;
|
||||
info!("Database migrations completed successfully");
|
||||
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
pub async fn create_pool(opts: SqliteConnectOptions) -> Result<SqlitePool> {
|
||||
let pool = SqlitePool::connect_with(opts).await?;
|
||||
|
||||
Ok(pool)
|
||||
}
|
104
backend-rust/src/main.rs
Normal file
104
backend-rust/src/main.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
mod config;
|
||||
mod db;
|
||||
mod models;
|
||||
mod services;
|
||||
|
||||
use crate::config::{AppSettings, ConfigFile};
|
||||
use anyhow::Result;
|
||||
use axum::Router;
|
||||
use axum::routing::get;
|
||||
use tokio::signal;
|
||||
use tracing::info;
|
||||
use tracing_subscriber;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
init_logging();
|
||||
|
||||
info!("Starting server");
|
||||
|
||||
let app_settings = load_app_settings();
|
||||
|
||||
let pool = db::initialize_db(&app_settings).await?;
|
||||
let app = create_app(pool);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(format!(
|
||||
"{}:{}",
|
||||
&app_settings.config.server.host, &app_settings.config.server.port
|
||||
))
|
||||
.await?;
|
||||
info!(
|
||||
"Server starting on http://{}:{}",
|
||||
&app_settings.config.server.host, &app_settings.config.server.port
|
||||
);
|
||||
|
||||
axum::serve(listener, app)
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await?;
|
||||
info!("Server stopped");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_app(pool: sqlx::SqlitePool) -> Router {
|
||||
Router::new()
|
||||
.route("/health", get(health_check))
|
||||
.nest("/api", api::routes::routes())
|
||||
.with_state(pool)
|
||||
}
|
||||
|
||||
async fn health_check() -> &'static str {
|
||||
"OK"
|
||||
}
|
||||
|
||||
fn init_logging() {
|
||||
tracing_subscriber::fmt()
|
||||
.with_target(false)
|
||||
.compact()
|
||||
// .with_env_filter(EnvFilter::from_default_env())
|
||||
// .json() // For Production
|
||||
.init();
|
||||
}
|
||||
|
||||
fn load_app_settings() -> AppSettings {
|
||||
AppSettings::default();
|
||||
let app_settings = AppSettings::get_app_settings();
|
||||
|
||||
AppSettings::ensure_default_directory(&app_settings)
|
||||
.expect("Failed to create default directory");
|
||||
|
||||
let config = ConfigFile::load_from_file(&AppSettings::get_app_settings())
|
||||
.expect("Failed to load config file");
|
||||
|
||||
let app_settings = AppSettings {
|
||||
config,
|
||||
..app_settings
|
||||
};
|
||||
app_settings
|
||||
}
|
||||
|
||||
async fn shutdown_signal() {
|
||||
let ctrl_c = async {
|
||||
signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to install CTRL+C handler");
|
||||
};
|
||||
|
||||
#[cfg(unix)]
|
||||
let terminate = async {
|
||||
signal::unix::signal(signal::unix::SignalKind::terminate())
|
||||
.expect("failed to install terminate handler")
|
||||
.recv()
|
||||
.await;
|
||||
};
|
||||
|
||||
#[cfg(not(unix))]
|
||||
let terminate = std::future::pending::<()>();
|
||||
|
||||
tokio::select! {
|
||||
_ = ctrl_c => {},
|
||||
_ = terminate => {},
|
||||
}
|
||||
|
||||
info!("Signal received, shutting down");
|
||||
}
|
6
backend-rust/src/models.rs
Normal file
6
backend-rust/src/models.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
mod article;
|
||||
mod summary;
|
||||
mod user;
|
||||
mod tag;
|
||||
mod analytics;
|
||||
mod settings;
|
0
backend-rust/src/models/analytics.rs
Normal file
0
backend-rust/src/models/analytics.rs
Normal file
0
backend-rust/src/models/article.rs
Normal file
0
backend-rust/src/models/article.rs
Normal file
0
backend-rust/src/models/settings.rs
Normal file
0
backend-rust/src/models/settings.rs
Normal file
0
backend-rust/src/models/summary.rs
Normal file
0
backend-rust/src/models/summary.rs
Normal file
0
backend-rust/src/models/tag.rs
Normal file
0
backend-rust/src/models/tag.rs
Normal file
0
backend-rust/src/models/user.rs
Normal file
0
backend-rust/src/models/user.rs
Normal file
7
backend-rust/src/services.rs
Normal file
7
backend-rust/src/services.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
mod summary_service;
|
||||
mod news_service;
|
||||
mod scraping_service;
|
||||
mod tagging_service;
|
||||
mod analytics_service;
|
||||
mod sharing_service;
|
||||
pub(crate) mod content_processor;
|
0
backend-rust/src/services/analytics_service.rs
Normal file
0
backend-rust/src/services/analytics_service.rs
Normal file
0
backend-rust/src/services/news_service.rs
Normal file
0
backend-rust/src/services/news_service.rs
Normal file
0
backend-rust/src/services/scraping_service.rs
Normal file
0
backend-rust/src/services/scraping_service.rs
Normal file
0
backend-rust/src/services/sharing_service.rs
Normal file
0
backend-rust/src/services/sharing_service.rs
Normal file
0
backend-rust/src/services/summary_service.rs
Normal file
0
backend-rust/src/services/summary_service.rs
Normal file
0
backend-rust/src/services/tagging_service.rs
Normal file
0
backend-rust/src/services/tagging_service.rs
Normal file
@@ -8,11 +8,11 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
|
||||
DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
|
||||
CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
|
||||
SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
|
||||
LLM_MODEL = os.getenv("LLM_MODEL", "mistral-nemo:12b")
|
||||
LLM_MODEL = os.getenv("LLM_MODEL", "gemma2:9b")
|
||||
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
|
||||
OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
|
||||
ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
|
||||
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 10_000))
|
||||
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 40_000))
|
||||
|
||||
frontend_path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||
@@ -21,7 +21,7 @@ frontend_path = os.path.join(
|
||||
)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.WARNING,
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@@ -150,8 +150,6 @@ async def get_news(
|
||||
where_conditions.append("published BETWEEN ? AND ?")
|
||||
params.extend([from_ts, to_ts])
|
||||
|
||||
logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})")
|
||||
|
||||
# Build the complete SQL query
|
||||
base_sql = """
|
||||
SELECT id, title, summary, url, published, country, created_at
|
||||
@@ -163,27 +161,13 @@ async def get_news(
|
||||
else:
|
||||
sql = base_sql
|
||||
|
||||
sql += " ORDER BY published DESC LIMIT 1000"
|
||||
|
||||
# Log query info
|
||||
if all_countries and all_dates:
|
||||
logger.info("Querying ALL news articles (no filters)")
|
||||
elif all_countries:
|
||||
logger.info(f"Querying news from ALL countries with date filter")
|
||||
elif all_dates:
|
||||
logger.info(f"Querying ALL dates for countries: {country}")
|
||||
else:
|
||||
logger.info(f"Querying news: countries={country}, timezone={timezone_name}")
|
||||
|
||||
logger.info(f"SQL: {sql}")
|
||||
logger.info(f"Parameters: {params}")
|
||||
sql += " ORDER BY published DESC"
|
||||
|
||||
# Execute the query
|
||||
db.execute(sql, params)
|
||||
rows = db.fetchall()
|
||||
result = [dict(row) for row in rows]
|
||||
|
||||
logger.info(f"Found {len(result)} news articles")
|
||||
return result
|
||||
|
||||
except ValueError as e:
|
||||
|
@@ -124,7 +124,6 @@ class NewsFetcher:
|
||||
|
||||
@staticmethod
|
||||
def build_prompt(
|
||||
url: str,
|
||||
title: str = "",
|
||||
summary: str = "",
|
||||
content: str = "") -> str:
|
||||
@@ -132,14 +131,13 @@ class NewsFetcher:
|
||||
Generate a prompt for the LLM to summarize an article.
|
||||
|
||||
Args:
|
||||
url: Public URL of the article to summarize
|
||||
title: Article title from RSS feed (optional)
|
||||
summary: Article summary from RSS feed (optional)
|
||||
content: Extracted article content (optional)
|
||||
|
||||
Returns:
|
||||
A formatted prompt string that instructs the LLM to generate
|
||||
a JSON response with title and summaries in German and English
|
||||
a JSON response with title, summary and tags in German
|
||||
"""
|
||||
context_info = []
|
||||
if title:
|
||||
@@ -155,21 +153,36 @@ class NewsFetcher:
|
||||
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
|
||||
|
||||
return (
|
||||
"### Aufgabe\n"
|
||||
f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
|
||||
f"URL: {url}\n"
|
||||
f"Verfügbare Informationen:\n{context}\n\n"
|
||||
"### Vorliegende Informationen\n"
|
||||
f"{context}\n\n"
|
||||
"### Längenbegrenzungen\n"
|
||||
"title: Format \"ORT: Titel\", max 100 Zeichen\n"
|
||||
"location: nur der ORT-Teil, max 40 Zeichen\n"
|
||||
"summary: 100–160 Wörter\n"
|
||||
"tags: bis zu 6 Schlüsselwörter, durch Komma getrennt, alles Kleinbuchstaben.\n\n"
|
||||
"### Regeln\n"
|
||||
"1. Nutze VORRANGIG den Artikel-Inhalt falls verfügbar, ergänze mit RSS-Informationen\n"
|
||||
"2. Falls kein Artikel-Inhalt verfügbar ist, nutze RSS-Titel und -Beschreibung\n"
|
||||
"3. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
|
||||
"4. Gib ausschließlich **gültiges minifiziertes JSON** zurück – kein Markdown, keine Kommentare\n"
|
||||
"5. Struktur: {\"title\":\"…\",\"summary\":\"…\"}\n"
|
||||
"6. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
|
||||
"7. summary: Deutsche Zusammenfassung (zwischen 100 und 160 Wörter)\n"
|
||||
"8. Kein Text vor oder nach dem JSON\n\n"
|
||||
"1. Nutze ausschließlich Informationen, die im bereitgestellten Material eindeutig vorkommen. Externes Wissen ist untersagt.\n"
|
||||
"2. Liegt sowohl Artikel-Text als auch RSS-Metadaten vor, hat der Artikel-Text Vorrang; verwende RSS nur ergänzend.\n"
|
||||
"3. Liegt nur RSS-Titel und/oder -Beschreibung vor, stütze dich ausschließlich darauf.\n"
|
||||
"4. Sind die Informationen unzureichend, gib exakt {\"location\":\"\",\"title\":\"\",\"summary\":\"\",\"tags\":\"\"} zurück.\n"
|
||||
"5. Gib nur gültiges, minifiziertes JSON zurück – keine Zeilenumbrüche, kein Markdown, keine Kommentare.\n"
|
||||
"6. Verwende keine hypothetischen Formulierungen (\"könnte\", \"möglicherweise\" etc.).\n"
|
||||
"7. Wörtliche Zitate dürfen höchstens 15 % des Summary-Texts ausmachen.\n"
|
||||
"8. Kein Text vor oder nach dem JSON.\n\n"
|
||||
"### Ausgabe\n"
|
||||
"Jetzt antworte mit dem JSON:"
|
||||
"Antworte jetzt ausschließlich mit dem JSON:\n"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def build_system_prompt():
|
||||
return (
|
||||
"Du bist ein hochpräziser JSON-Summarizer und Experte für die Zusammenfassung von Artikeln.\n\n"
|
||||
"### Vorgehen\n"
|
||||
"Schritt 1: Identifiziere Hauptthema und Zweck.\n"
|
||||
"Schritt 2: Extrahiere die wichtigsten Fakten und Ergebnisse.\n"
|
||||
"Schritt 3: Erkenne die zentralen Argumente und Standpunkte.\n"
|
||||
"Schritt 4: Ordne die Informationen nach Wichtigkeit.\n"
|
||||
"Schritt 5: Erstelle eine prägnante, klare und sachliche Zusammenfassung.\n\n"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -193,26 +206,55 @@ class NewsFetcher:
|
||||
A dictionary containing the article title and summaries in German and English,
|
||||
or None if summarization failed
|
||||
"""
|
||||
logger.debug("[AI] Fetching article content from: " + url)
|
||||
|
||||
article_content = await NewsFetcher.fetch_article_content(client, url)
|
||||
|
||||
if not article_content:
|
||||
logger.warning(
|
||||
f"⚠️ Could not fetch article content, using RSS data only")
|
||||
|
||||
prompt = NewsFetcher.build_prompt(
|
||||
url, title, summary, article_content)
|
||||
prompt = NewsFetcher.build_prompt(title, summary, article_content)
|
||||
system_prompt = NewsFetcher.build_system_prompt()
|
||||
payload = {
|
||||
"model": LLM_MODEL,
|
||||
"prompt": prompt,
|
||||
"system": system_prompt,
|
||||
"stream": False,
|
||||
"temperature": 0.1,
|
||||
"format": "json",
|
||||
"format": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string"
|
||||
},
|
||||
"location": {
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"title",
|
||||
"summary",
|
||||
"tags"
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"num_gpu": 1, # Force GPU usage
|
||||
"num_ctx": 128_000, # Context size
|
||||
"num_ctx": 8192, # Context size
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("[AI] Running summary generation...")
|
||||
|
||||
try:
|
||||
response = await client.post(
|
||||
f"{OLLAMA_HOST}/api/generate",
|
||||
@@ -224,6 +266,8 @@ class NewsFetcher:
|
||||
result = response.json()
|
||||
llm_response = result["response"]
|
||||
|
||||
logger.debug("[AI] " + llm_response)
|
||||
|
||||
if isinstance(llm_response, str):
|
||||
summary_data = json.loads(llm_response)
|
||||
else:
|
||||
@@ -388,8 +432,6 @@ class NewsFetcher:
|
||||
summary=rss_summary
|
||||
)
|
||||
|
||||
logger.info(summary)
|
||||
|
||||
if not summary:
|
||||
logger.warning(
|
||||
f"❌ Failed to get summary for article {i}: {article_url}")
|
||||
@@ -403,7 +445,8 @@ class NewsFetcher:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT
|
||||
OR IGNORE INTO news
|
||||
OR IGNORE
|
||||
INTO news
|
||||
(title, summary, url, published, country)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""",
|
||||
|
@@ -11,10 +11,12 @@ MIN_CRON_HOURS=0.5
|
||||
SYNC_COOLDOWN_MINUTES=30
|
||||
|
||||
# LLM model to use for summarization
|
||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M
|
||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
|
||||
LLM_MODEL=mistral-nemo:12b
|
||||
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S
|
||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M # ca 7-9GB (typisch 8GB)
|
||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0 # ca 6-8GB (langer kontext)
|
||||
LLM_MODEL=mistral-nemo:12b # ca 16-24+GB
|
||||
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S # ca 22GB
|
||||
LLM_MODEL=yarn-mistral:7b-64k-q4_K_M # ca 11GB
|
||||
LLM_MODEL=gemma2:9b # ca 8GB
|
||||
|
||||
# Timeout in seconds for LLM requests
|
||||
LLM_TIMEOUT_SECONDS=180
|
||||
|
Binary file not shown.
@@ -1,8 +0,0 @@
|
||||
import { defineConfig } from 'cypress'
|
||||
|
||||
export default defineConfig({
|
||||
e2e: {
|
||||
specPattern: 'cypress/e2e/**/*.{cy,spec}.{js,jsx,ts,tsx}',
|
||||
baseUrl: 'http://localhost:4173',
|
||||
},
|
||||
})
|
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"name": "owly-news-summariser",
|
||||
"name": "owly-news",
|
||||
"version": "0.0.1",
|
||||
"private": true,
|
||||
"license": "PolyForm-Noncommercial-1.0.0",
|
||||
|
@@ -14,9 +14,10 @@
|
||||
|
||||
<!-- Articles Grid -->
|
||||
<div v-else class="grid gap-4 sm:gap-6 md:grid-cols-2 xl:grid-cols-3">
|
||||
<template v-for="article in news.articles"
|
||||
:key="article.id">
|
||||
<article
|
||||
v-for="article in news.articles"
|
||||
:key="article.id"
|
||||
v-if="isValidArticleContent(article)"
|
||||
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
|
||||
>
|
||||
<!-- Article Header -->
|
||||
@@ -28,7 +29,7 @@
|
||||
</span>
|
||||
<time
|
||||
:datetime="new Date(article.published * 1000).toISOString()"
|
||||
:title="new Date(article.published * 1000).toLocaleString(userLocale.value, {
|
||||
:title="new Date(article.published * 1000).toLocaleString(userLocale, {
|
||||
dateStyle: 'full',
|
||||
timeStyle: 'long'
|
||||
})"
|
||||
@@ -83,6 +84,7 @@
|
||||
</a>
|
||||
</div>
|
||||
</article>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Loading State & Load More Trigger -->
|
||||
@@ -129,6 +131,37 @@ const loadMoreArticles = async () => {
|
||||
}
|
||||
};
|
||||
|
||||
interface Article {
|
||||
id: number;
|
||||
title: string;
|
||||
summary: string;
|
||||
url: string;
|
||||
published: number;
|
||||
country: string;
|
||||
created_at: number;
|
||||
}
|
||||
|
||||
const INVALID_MARKERS = ['---', '...', '…', 'Title', 'Summary', 'Titel', 'Zusammenfassung'] as const;
|
||||
const REQUIRED_TEXT_FIELDS = ['title', 'summary', 'url'] as const;
|
||||
|
||||
const isValidArticleContent = (article: Article): boolean => {
|
||||
const hasEmptyRequiredFields = REQUIRED_TEXT_FIELDS.some(
|
||||
field => article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].length === 0
|
||||
);
|
||||
|
||||
if (hasEmptyRequiredFields) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const hasInvalidMarkers = REQUIRED_TEXT_FIELDS.some(field =>
|
||||
INVALID_MARKERS.some(marker =>
|
||||
article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].includes(marker)
|
||||
)
|
||||
);
|
||||
|
||||
return !hasInvalidMarkers;
|
||||
};
|
||||
|
||||
const observer = ref<IntersectionObserver | null>(null);
|
||||
const loadMoreTrigger = ref<HTMLElement | null>(null);
|
||||
|
||||
|
@@ -6470,9 +6470,9 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"owly-news-summariser@workspace:.":
|
||||
"owly-news@workspace:.":
|
||||
version: 0.0.0-use.local
|
||||
resolution: "owly-news-summariser@workspace:."
|
||||
resolution: "owly-news@workspace:."
|
||||
dependencies:
|
||||
"@tailwindcss/vite": "npm:^4.1.11"
|
||||
"@tsconfig/node22": "npm:^22.0.2"
|
||||
|
Reference in New Issue
Block a user