Compare commits
37 Commits
main
...
rewrite-ru
Author | SHA1 | Date | |
---|---|---|---|
57a7b42b9d | |||
d37daf02f6 | |||
16167d18ff | |||
7c6724800f | |||
af304266a4 | |||
815e3b22fd | |||
e8e61faf61 | |||
c19813cbe2 | |||
cf163082b2 | |||
011b256662 | |||
0a97a57c76 | |||
338b3ac7c1 | |||
13fbac5009 | |||
9b805e891a | |||
78073d27d7 | |||
c3b0c87bfa | |||
0aa8d9fa3a | |||
cbbd0948e6 | |||
3a5b0d8f4b | |||
0ce916c654 | |||
f853213d15 | |||
300845c655 | |||
d90c618ee3 | |||
e7a97206a9 | |||
c2adfa711d | |||
b2d82892ef | |||
0f1632ad65 | |||
7b114a6145 | |||
4edb2b2179 | |||
aa520efb82 | |||
f22259b863 | |||
a30f8467bc | |||
79e4d7f1de | |||
37ebf45d82 | |||
bc1735448a | |||
59b19a22ff | |||
86b5f83140 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -34,10 +34,11 @@ build/
|
|||||||
logs/
|
logs/
|
||||||
*.log
|
*.log
|
||||||
|
|
||||||
# Database files
|
# Database files (now includes the specific dev database)
|
||||||
*.sqlite
|
*.sqlite
|
||||||
*.sqlite3
|
*.sqlite3
|
||||||
*.db
|
*.db
|
||||||
|
owlynews.sqlite3*
|
||||||
|
|
||||||
# Dependency directories
|
# Dependency directories
|
||||||
node_modules/
|
node_modules/
|
||||||
|
236
README.md
236
README.md
@@ -1,105 +1,67 @@
|
|||||||
# Owly News Summariser
|
# Owly News
|
||||||
|
|
||||||
Owly News Summariser is a web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
|
Owly News is a modern web application that fetches news articles from various RSS feeds, uses an LLM (Large Language Model) to summarize them, and presents them in a clean, user-friendly interface.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Fetches news from configurable RSS feeds
|
- Fetches news from configurable RSS feeds
|
||||||
- Automatically summarizes articles using Ollama LLM
|
- Automatically summarizes articles using Ollama LLM
|
||||||
- Filters news by country
|
- **AI-powered intelligent tagging** with geographic, category, and source tags
|
||||||
|
- **Advanced multi-criteria filtering** with hierarchical tag support
|
||||||
- Progressive Web App (PWA) support for offline access
|
- Progressive Web App (PWA) support for offline access
|
||||||
- Scheduled background updates
|
- Scheduled background updates
|
||||||
|
- High-performance Rust backend for optimal resource usage
|
||||||
|
- Modern Vue.js frontend with TypeScript support
|
||||||
|
- **Comprehensive analytics** and reading statistics
|
||||||
|
- **Flexible sharing system** with multiple format options
|
||||||
|
|
||||||
## Project Structure
|
## Project Structure
|
||||||
|
|
||||||
The project consists of two main components:
|
The project consists of multiple components:
|
||||||
|
|
||||||
- **Backend**: A FastAPI application that fetches and processes news feeds, summarizes articles, and provides API endpoints
|
- **Backend (Rust)**: Primary backend written in Rust using Axum framework for high performance (`backend-rust/`)
|
||||||
- **Frontend**: A Vue.js application that displays the news and provides a user interface for managing feeds
|
- **Backend (Python)**: Legacy FastAPI backend (`backend/`)
|
||||||
|
- **Frontend**: Modern Vue.js 3 application with TypeScript and Tailwind CSS (`frontend/`)
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- Python 3.8+ for the backend
|
### For Rust Backend (Recommended)
|
||||||
- Node.js 16+ and Yarn for the frontend
|
- Rust 1.88.0+
|
||||||
|
- [Ollama](https://ollama.ai/) for article summarization and tagging
|
||||||
|
- SQLite (handled automatically by SQLx)
|
||||||
|
|
||||||
|
### For Python Backend (Legacy)
|
||||||
|
- Python 3.8+
|
||||||
- [Ollama](https://ollama.ai/) for article summarization
|
- [Ollama](https://ollama.ai/) for article summarization
|
||||||
|
|
||||||
## Installing Yarn
|
### For Frontend
|
||||||
|
- Node.js 22+ and npm
|
||||||
Yarn is a package manager for JavaScript that's required for the frontend. Here's how to install it:
|
- Modern web browser with PWA support
|
||||||
|
|
||||||
### Using npm (recommended)
|
|
||||||
|
|
||||||
If you already have Node.js installed, the easiest way to install Yarn is via npm:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install -g yarn
|
|
||||||
```
|
|
||||||
|
|
||||||
### Platform-specific installations
|
|
||||||
|
|
||||||
#### Windows
|
|
||||||
|
|
||||||
- **Using Chocolatey**: `choco install yarn`
|
|
||||||
- **Using Scoop**: `scoop install yarn`
|
|
||||||
- **Manual installation**: Download and run the [installer](https://classic.yarnpkg.com/latest.msi)
|
|
||||||
|
|
||||||
#### macOS
|
|
||||||
|
|
||||||
- **Using Homebrew**: `brew install yarn`
|
|
||||||
- **Using MacPorts**: `sudo port install yarn`
|
|
||||||
|
|
||||||
#### Linux
|
|
||||||
|
|
||||||
- **Debian/Ubuntu**:
|
|
||||||
```bash
|
|
||||||
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
|
|
||||||
echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
|
|
||||||
sudo apt update && sudo apt install yarn
|
|
||||||
```
|
|
||||||
|
|
||||||
- **CentOS/Fedora/RHEL**:
|
|
||||||
```bash
|
|
||||||
curl --silent --location https://dl.yarnpkg.com/rpm/yarn.repo | sudo tee /etc/yum.repos.d/yarn.repo
|
|
||||||
sudo yum install yarn
|
|
||||||
```
|
|
||||||
|
|
||||||
- **Arch Linux**: `pacman -S yarn`
|
|
||||||
|
|
||||||
After installation, verify Yarn is installed correctly:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
yarn --version
|
|
||||||
```
|
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
### Backend Setup
|
### Rust Backend Setup (Recommended)
|
||||||
|
|
||||||
1. Navigate to the backend directory:
|
1. Navigate to the Rust backend directory:
|
||||||
```bash
|
```bash
|
||||||
cd backend
|
cd backend-rust
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Create a virtual environment:
|
2. Create a `.env` file based on the example:
|
||||||
```bash
|
|
||||||
python -m venv venv
|
|
||||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Install dependencies:
|
|
||||||
```bash
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
4. Create a `.env` file based on the example:
|
|
||||||
```bash
|
```bash
|
||||||
cp example.env .env
|
cp example.env .env
|
||||||
```
|
```
|
||||||
|
|
||||||
5. Customize the `.env` file as needed:
|
3. Customize the `.env` file as needed:
|
||||||
- `OLLAMA_HOST`: URL for the Ollama service (default: http://localhost:11434)
|
|
||||||
- `CRON_HOURS`: Interval for scheduled news fetching (default: 1)
|
|
||||||
- `DATABASE_URL`: SQLite database connection string
|
- `DATABASE_URL`: SQLite database connection string
|
||||||
|
- `OLLAMA_BASE_URL`: URL for the Ollama service (default: http://localhost:11434)
|
||||||
|
- Other configuration options as documented in the example file
|
||||||
|
|
||||||
|
4. Run database migrations:
|
||||||
|
```bash
|
||||||
|
cargo install sqlx-cli
|
||||||
|
sqlx migrate run
|
||||||
|
```
|
||||||
|
|
||||||
### Frontend Setup
|
### Frontend Setup
|
||||||
|
|
||||||
@@ -110,29 +72,24 @@ yarn --version
|
|||||||
|
|
||||||
2. Install dependencies:
|
2. Install dependencies:
|
||||||
```bash
|
```bash
|
||||||
yarn
|
npm install
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running the Application
|
## Running the Application
|
||||||
|
|
||||||
### Running the Backend
|
### Running the Rust Backend
|
||||||
|
|
||||||
1. Navigate to the backend directory:
|
1. Navigate to the Rust backend directory:
|
||||||
```bash
|
```bash
|
||||||
cd backend
|
cd backend-rust
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Activate the virtual environment:
|
2. Start the backend server:
|
||||||
```bash
|
```bash
|
||||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
cargo run
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Start the backend server:
|
The backend will be available at http://localhost:3000
|
||||||
```bash
|
|
||||||
uvicorn app.main:app --reload
|
|
||||||
```
|
|
||||||
|
|
||||||
The backend will be available at http://localhost:8000
|
|
||||||
|
|
||||||
### Running the Frontend
|
### Running the Frontend
|
||||||
|
|
||||||
@@ -143,22 +100,53 @@ yarn --version
|
|||||||
|
|
||||||
2. Start the development server:
|
2. Start the development server:
|
||||||
```bash
|
```bash
|
||||||
yarn dev:watch
|
npm run dev
|
||||||
```
|
```
|
||||||
|
|
||||||
The frontend will be available at http://localhost:5173
|
The frontend will be available at http://localhost:5173
|
||||||
|
|
||||||
|
## Key Features
|
||||||
|
|
||||||
|
### Intelligent Content Organization
|
||||||
|
- **AI-Powered Tagging**: Automatic classification with geographic, topical, and source tags
|
||||||
|
- **Hierarchical Filtering**: Multi-level filtering by location (country → region → city), categories, and content types
|
||||||
|
- **Smart Search**: Advanced filtering with suggestions based on tag relationships and usage patterns
|
||||||
|
- **Legacy Migration**: Seamless upgrade from simple country-based filtering to comprehensive tag-based system
|
||||||
|
|
||||||
|
### Advanced Analytics
|
||||||
|
- **Reading Statistics**: Track reading time, completion rates, and engagement patterns
|
||||||
|
- **Content Analytics**: Source performance, tag usage, and trending topics analysis
|
||||||
|
- **Geographic Insights**: Location-based content distribution and reading preferences
|
||||||
|
- **Goal Tracking**: Personal reading goals with progress monitoring
|
||||||
|
|
||||||
|
### Flexible Article Display
|
||||||
|
- **Compact View**: Title, excerpt, tags, and action buttons for quick browsing
|
||||||
|
- **On-Demand Loading**: Full content, AI summaries, and source links as needed
|
||||||
|
- **Visual Tag System**: Color-coded, hierarchical tags with click-to-filter functionality
|
||||||
|
- **Reading Status**: Visual indicators for read/unread status and progress tracking
|
||||||
|
|
||||||
|
### Enhanced Sharing
|
||||||
|
- **Multiple Formats**: Text, Markdown, HTML, and JSON export options
|
||||||
|
- **Custom Templates**: User-configurable sharing formats
|
||||||
|
- **One-Click Operations**: Copy to clipboard with formatted content
|
||||||
|
- **Privacy Controls**: Configurable information inclusion in shared content
|
||||||
|
|
||||||
## Building for Production
|
## Building for Production
|
||||||
|
|
||||||
### Building the Backend
|
### Building the Rust Backend
|
||||||
|
|
||||||
The backend can be deployed as a standard FastAPI application. You can use tools like Gunicorn with Uvicorn workers:
|
|
||||||
|
|
||||||
|
1. Navigate to the Rust backend directory:
|
||||||
```bash
|
```bash
|
||||||
pip install gunicorn
|
cd backend-rust
|
||||||
gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
2. Build the optimized release binary:
|
||||||
|
```bash
|
||||||
|
cargo build --release
|
||||||
|
```
|
||||||
|
|
||||||
|
The binary will be available at `target/release/owly-news`
|
||||||
|
|
||||||
### Building the Frontend
|
### Building the Frontend
|
||||||
|
|
||||||
1. Navigate to the frontend directory:
|
1. Navigate to the frontend directory:
|
||||||
@@ -168,32 +156,62 @@ gunicorn app.main:app -w 4 -k uvicorn.workers.UvicornWorker
|
|||||||
|
|
||||||
2. Build the frontend:
|
2. Build the frontend:
|
||||||
```bash
|
```bash
|
||||||
yarn build
|
npm run build
|
||||||
```
|
```
|
||||||
|
|
||||||
The built files will be in the `dist` directory and can be served by any static file server.
|
The built files will be in the `dist` directory and can be served by any static file server.
|
||||||
|
|
||||||
## API Endpoints
|
## Development
|
||||||
|
|
||||||
The backend provides the following API endpoints:
|
### Code Quality
|
||||||
|
|
||||||
- `GET /news`: Get news articles with optional filtering
|
The project includes comprehensive tooling for code quality:
|
||||||
- `GET /meta/last_sync`: Get the timestamp of the last feed synchronization
|
|
||||||
- `POST /meta/cron`: Set the schedule for automatic feed synchronization
|
|
||||||
- `GET /meta/feeds`: List all configured feeds
|
|
||||||
- `POST /meta/feeds`: Add a new feed
|
|
||||||
- `DELETE /meta/feeds`: Delete a feed
|
|
||||||
- `GET /meta/model`: Check the status of the LLM model
|
|
||||||
- `POST /meta/sync`: Manually trigger a feed synchronization
|
|
||||||
|
|
||||||
## Environment Variables
|
**Frontend:**
|
||||||
|
- ESLint with Vue and TypeScript support
|
||||||
|
- Prettier for code formatting
|
||||||
|
- Vitest for testing
|
||||||
|
- TypeScript for type safety
|
||||||
|
- Oxlint for additional linting
|
||||||
|
|
||||||
### Backend
|
**Backend (Rust):**
|
||||||
|
- Standard Rust tooling (`cargo fmt`, `cargo clippy`)
|
||||||
|
- SQLx for compile-time checked SQL queries
|
||||||
|
|
||||||
- `OLLAMA_HOST`: URL for the Ollama service
|
### Testing
|
||||||
- `CRON_HOURS`: Interval for scheduled news fetching in hours
|
|
||||||
- `DATABASE_URL`: SQLite database connection string
|
|
||||||
|
|
||||||
## License
|
Run frontend tests:
|
||||||
|
```bash
|
||||||
|
cd frontend
|
||||||
|
npm run test
|
||||||
|
```
|
||||||
|
|
||||||
Code ist unter der [PolyForm Noncommercial 1.0.0](https://polyformproject.org/licenses/noncommercial/1.0.0/) lizenziert. Für jegliche kommerzielle Nutzung bitte Kontakt aufnehmen.
|
## Configuration
|
||||||
|
|
||||||
|
The application uses a comprehensive configuration system via `config.toml`:
|
||||||
|
|
||||||
|
- **AI Settings**: Configure Ollama integration for summaries and tagging
|
||||||
|
- **Display Preferences**: Default views, themes, and UI customization
|
||||||
|
- **Analytics**: Control data collection and retention policies
|
||||||
|
- **Filtering**: Smart suggestions, saved filters, and geographic hierarchy
|
||||||
|
- **Sharing**: Default formats and custom templates
|
||||||
|
|
||||||
|
See the example configuration in the project for detailed options.
|
||||||
|
|
||||||
|
## Migration from Legacy Systems
|
||||||
|
|
||||||
|
The application includes automatic migration tools for upgrading from simpler filtering systems:
|
||||||
|
|
||||||
|
- **Country Filter Migration**: Automatic conversion to hierarchical geographic tags
|
||||||
|
- **Data Preservation**: Maintains historical data during migration
|
||||||
|
- **Backward Compatibility**: Gradual transition with user control
|
||||||
|
- **Validation Tools**: Ensure data integrity throughout the migration process
|
||||||
|
|
||||||
|
## Future Roadmap
|
||||||
|
|
||||||
|
The project is evolving through three phases:
|
||||||
|
1. **Phase 1**: High-performance Rust backend with advanced filtering and analytics
|
||||||
|
2. **Phase 2**: CLI application for power users and automation
|
||||||
|
3. **Phase 3**: Migration to Dioxus for a full Rust stack
|
||||||
|
|
||||||
|
See `ROADMAP.md` for detailed development plans and architectural decisions.
|
||||||
|
2
backend-rust/.gitignore
vendored
Normal file
2
backend-rust/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
target/
|
||||||
|
/config.toml
|
2520
backend-rust/Cargo.lock
generated
Normal file
2520
backend-rust/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
37
backend-rust/Cargo.toml
Normal file
37
backend-rust/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
[workspace]
|
||||||
|
members = [
|
||||||
|
"crates/api",
|
||||||
|
"crates/server",
|
||||||
|
"crates/cli",
|
||||||
|
"crates/db",
|
||||||
|
]
|
||||||
|
resolver = "3"
|
||||||
|
|
||||||
|
[workspace.package]
|
||||||
|
edition = "2024"
|
||||||
|
version = "0.1.0"
|
||||||
|
rust-version = "1.89"
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
anyhow = "1.0.99"
|
||||||
|
serde = { version = "1.0.219", features = ["derive"] }
|
||||||
|
serde_json = "1.0.142"
|
||||||
|
tokio = { version = "1.47.1", features = ["rt-multi-thread", "macros", "signal"] }
|
||||||
|
libloading = "0.8.8"
|
||||||
|
tracing = "0.1.41"
|
||||||
|
once_cell = "1.21.3"
|
||||||
|
toml = "0.9.5"
|
||||||
|
axum = "0.8.4"
|
||||||
|
sha2 = "0.10.9"
|
||||||
|
sqlx = { version = "0.8.6", default-features = false, features = ["runtime-tokio-rustls", "macros", "postgres", "uuid", "chrono", "migrate"] }
|
||||||
|
hex = "0.4.3"
|
||||||
|
num_cpus = "1.17.0"
|
||||||
|
unicode-segmentation = "1.12.0"
|
||||||
|
readability = "0.3.0"
|
||||||
|
tracing-subscriber = { version = "0.3.19", features = ["env-filter", "fmt"] }
|
||||||
|
scraper = "0.23.1"
|
||||||
|
dotenv = "0.15.0"
|
||||||
|
|
||||||
|
# dev/test utilities in the workspace
|
||||||
|
tokio-test = "0.4.4"
|
||||||
|
axum-test = "17.3.0"
|
584
backend-rust/ROADMAP.md
Normal file
584
backend-rust/ROADMAP.md
Normal file
@@ -0,0 +1,584 @@
|
|||||||
|
# Owly News Summariser - Project Roadmap
|
||||||
|
|
||||||
|
This document outlines the strategic approach for transforming the project through three phases: Python-to-Rust backend migration, CLI application addition, and Vue-to-Dioxus frontend migration.
|
||||||
|
|
||||||
|
## Project Structure Strategy
|
||||||
|
|
||||||
|
### Current Phase: Axum API Setup
|
||||||
|
```
|
||||||
|
|
||||||
|
owly-news-summariser/
|
||||||
|
├── src/
|
||||||
|
│ ├── main.rs # Entry point (will evolve)
|
||||||
|
│ ├── db.rs # Database connection & SQLx setup
|
||||||
|
│ ├── api.rs # API module declaration
|
||||||
|
│ ├── api/ # API-specific modules (no mod.rs needed)
|
||||||
|
│ │ ├── routes.rs # Route definitions
|
||||||
|
│ │ ├── middleware.rs # Custom middleware
|
||||||
|
│ │ └── handlers.rs # Request handlers & business logic
|
||||||
|
│ ├── models.rs # Models module declaration
|
||||||
|
│ ├── models/ # Data models & database entities
|
||||||
|
│ │ ├── user.rs
|
||||||
|
│ │ ├── article.rs
|
||||||
|
│ │ ├── summary.rs
|
||||||
|
│ │ ├── tag.rs # Tag models and relationships
|
||||||
|
│ │ ├── analytics.rs # Analytics and statistics models
|
||||||
|
│ │ └── settings.rs # User settings and preferences
|
||||||
|
│ ├── services.rs # Services module declaration
|
||||||
|
│ ├── services/ # Business logic layer
|
||||||
|
│ │ ├── news_service.rs
|
||||||
|
│ │ ├── summary_service.rs
|
||||||
|
│ │ ├── scraping_service.rs # Article content extraction
|
||||||
|
│ │ ├── tagging_service.rs # AI-powered tagging
|
||||||
|
│ │ ├── analytics_service.rs # Reading stats and analytics
|
||||||
|
│ │ └── sharing_service.rs # Article sharing functionality
|
||||||
|
│ └── config.rs # Configuration management
|
||||||
|
├── migrations/ # SQLx migrations (managed by SQLx CLI)
|
||||||
|
├── frontend/ # Keep existing Vue frontend for now
|
||||||
|
├── config.toml # Configuration file with AI settings
|
||||||
|
└── Cargo.toml
|
||||||
|
```
|
||||||
|
### Phase 2: Multi-Binary Structure (API + CLI)
|
||||||
|
```
|
||||||
|
|
||||||
|
owly-news-summariser/
|
||||||
|
├── src/
|
||||||
|
│ ├── lib.rs # Shared library code
|
||||||
|
│ ├── bin/
|
||||||
|
│ │ ├── server.rs # API server binary
|
||||||
|
│ │ └── cli.rs # CLI application binary
|
||||||
|
│ ├── [same module structure as Phase 1]
|
||||||
|
├── migrations/
|
||||||
|
├── frontend/
|
||||||
|
├── completions/ # Shell completion scripts
|
||||||
|
│ ├── owly.bash
|
||||||
|
│ ├── owly.zsh
|
||||||
|
│ └── owly.fish
|
||||||
|
├── config.toml
|
||||||
|
└── Cargo.toml # Updated for multiple binaries
|
||||||
|
```
|
||||||
|
### Phase 3: Full Rust Stack
|
||||||
|
```
|
||||||
|
|
||||||
|
owly-news-summariser/
|
||||||
|
├── src/
|
||||||
|
│ ├── [same structure as Phase 2]
|
||||||
|
├── migrations/
|
||||||
|
├── frontend-dioxus/ # New Dioxus frontend
|
||||||
|
├── frontend/ # Legacy Vue (to be removed)
|
||||||
|
├── completions/
|
||||||
|
├── config.toml
|
||||||
|
└── Cargo.toml
|
||||||
|
```
|
||||||
|
## Core Features & Architecture
|
||||||
|
|
||||||
|
### Article Processing & Display Workflow
|
||||||
|
**Hybrid Approach: RSS Feeds + Manual Submissions with Smart Content Management**
|
||||||
|
|
||||||
|
1. **Article Collection**
|
||||||
|
- RSS feed monitoring and batch processing
|
||||||
|
- Manual article URL submission
|
||||||
|
- Store original content and metadata in database
|
||||||
|
|
||||||
|
2. **Content Processing Pipeline**
|
||||||
|
- Fetch RSS articles → scrape full content → store in DB
|
||||||
|
- **Compact Article Display**:
|
||||||
|
- Title (primary display)
|
||||||
|
- RSS description text
|
||||||
|
- Tags (visual indicators)
|
||||||
|
- Time posted (from RSS)
|
||||||
|
- Time added (when added to system)
|
||||||
|
- Action buttons: [Full Article] [Summary] [Source]
|
||||||
|
- **On-Demand Content Loading**:
|
||||||
|
- Full Article: Display complete scraped content
|
||||||
|
- Summary: Show AI-generated summary
|
||||||
|
- Source: Open original URL in new tab
|
||||||
|
- Background async processing with status updates
|
||||||
|
- Support for re-processing without re-fetching
|
||||||
|
|
||||||
|
3. **Intelligent Tagging System**
|
||||||
|
- **Automatic Tag Generation**: AI analyzes content and assigns relevant tags
|
||||||
|
- **Geographic & Source Tags**: AI-generated location tags (countries, regions, cities) and publication source tags
|
||||||
|
- **Content Category Tags**: Technology, Politics, Business, Sports, Health, etc.
|
||||||
|
- **Visual Tag Display**: Color-coded tags in compact article view with hierarchical display
|
||||||
|
- **Tag Filtering**: Quick filtering by clicking tags with smart suggestions
|
||||||
|
- **Custom Tags**: User-defined tags and categories
|
||||||
|
- **Tag Confidence**: Visual indicators for AI vs manual tags
|
||||||
|
- **Tag Migration**: Automatic conversion of legacy country filters to geographic tags
|
||||||
|
|
||||||
|
4. **Analytics & Statistics System**
|
||||||
|
- **Reading Analytics**:
|
||||||
|
- Articles read vs added
|
||||||
|
- Reading time tracking
|
||||||
|
- Most read categories and tags
|
||||||
|
- Reading patterns over time
|
||||||
|
- **Content Analytics**:
|
||||||
|
- Source reliability and quality metrics
|
||||||
|
- Tag usage statistics
|
||||||
|
- Processing success rates
|
||||||
|
- Content freshness tracking
|
||||||
|
- **Performance Metrics**:
|
||||||
|
- AI processing times
|
||||||
|
- Scraping success rates
|
||||||
|
- User engagement patterns
|
||||||
|
|
||||||
|
5. **Advanced Filtering System**
|
||||||
|
- **Multi-Criteria Filtering**:
|
||||||
|
- By tags (single or multiple with AND/OR logic)
|
||||||
|
- By geographic tags (country, region, city with hierarchical filtering)
|
||||||
|
- By content categories and topics
|
||||||
|
- By date ranges (posted, added, read)
|
||||||
|
- By processing status (pending, completed, failed)
|
||||||
|
- By content availability (scraped, summary, RSS-only)
|
||||||
|
- By read/unread status
|
||||||
|
- **Smart Filter Migration**: Automatic conversion of legacy country filters to tag-based equivalents
|
||||||
|
- **Saved Filter Presets**:
|
||||||
|
- Custom filter combinations
|
||||||
|
- Quick access to frequent searches
|
||||||
|
- Geographic preset templates (e.g., "European Tech News", "US Politics")
|
||||||
|
- **Smart Suggestions**: Filter suggestions based on usage patterns and tag relationships
|
||||||
|
|
||||||
|
6. **Settings & Management System**
|
||||||
|
- **User Preferences**:
|
||||||
|
- Default article view mode
|
||||||
|
- Tag display preferences with geographic hierarchy settings
|
||||||
|
- Reading tracking settings
|
||||||
|
- Notification preferences
|
||||||
|
- **System Settings**:
|
||||||
|
- AI configuration (via API and config file)
|
||||||
|
- Processing settings
|
||||||
|
- Display customization
|
||||||
|
- Export preferences
|
||||||
|
- **Content Management**:
|
||||||
|
- Bulk operations (mark read, delete, retag)
|
||||||
|
- Archive old articles
|
||||||
|
- Export/import functionality
|
||||||
|
- Legacy data migration tools
|
||||||
|
|
||||||
|
7. **Article Sharing System**
|
||||||
|
- **Multiple Share Formats**:
|
||||||
|
- Clean text format with title, summary, and source link
|
||||||
|
- Markdown format for developers
|
||||||
|
- Rich HTML format for email/web
|
||||||
|
- JSON format for API integration
|
||||||
|
- **Copy to Clipboard**: One-click formatted sharing
|
||||||
|
- **Share Templates**: Customizable sharing formats
|
||||||
|
- **Privacy Controls**: Control what information is included in shares
|
||||||
|
|
||||||
|
8. **Database Schema**
|
||||||
|
```
|
||||||
|
Articles: id, title, url, source_type, rss_content, full_content,
|
||||||
|
summary, processing_status, published_at, added_at, read_at,
|
||||||
|
read_count, reading_time, ai_enabled, created_at, updated_at
|
||||||
|
Tags: id, name, category, description, color, usage_count, parent_id, created_at
|
||||||
|
ArticleTags: article_id, tag_id, confidence_score, ai_generated, created_at
|
||||||
|
ReadingStats: user_id, article_id, read_at, reading_time, completion_rate
|
||||||
|
FilterPresets: id, name, filter_criteria, user_id, created_at
|
||||||
|
Settings: key, value, category, user_id, updated_at
|
||||||
|
ShareTemplates: id, name, format, template_content, created_at
|
||||||
|
LegacyMigration: old_filter_type, old_value, new_tag_ids, migrated_at
|
||||||
|
```
|
||||||
|
|
||||||
|
## Step-by-Step Process
|
||||||
|
|
||||||
|
### Phase 1: Axum API Implementation
|
||||||
|
|
||||||
|
**Step 1: Core Infrastructure Setup**
|
||||||
|
- Set up database connection pooling with SQLx
|
||||||
|
- **Enhanced Configuration System**:
|
||||||
|
- Extend config.toml with comprehensive settings
|
||||||
|
- AI provider configurations with separate summary/tagging settings
|
||||||
|
- Display preferences and UI customization
|
||||||
|
- Analytics and tracking preferences
|
||||||
|
- Sharing templates and formats
|
||||||
|
- Filter and search settings
|
||||||
|
- Geographic tagging preferences
|
||||||
|
- Establish error handling patterns with `anyhow`
|
||||||
|
- Set up logging and analytics infrastructure
|
||||||
|
|
||||||
|
**Step 2: Data Layer**
|
||||||
|
- Design comprehensive database schema with analytics and settings support
|
||||||
|
- Create SQLx migrations for all tables including analytics and user preferences
|
||||||
|
- Implement hierarchical tag system with geographic and content categories
|
||||||
|
- Add legacy migration support for country filters
|
||||||
|
- Implement article models with reading tracking and statistics
|
||||||
|
- Add settings and preferences data layer
|
||||||
|
- Create analytics data models and aggregation queries
|
||||||
|
- Implement sharing templates and format management
|
||||||
|
- Use SQLx's compile-time checked queries
|
||||||
|
|
||||||
|
**Step 3: Enhanced Services Layer**
|
||||||
|
- **Content Processing Services**:
|
||||||
|
- RSS feed fetching and parsing
|
||||||
|
- Web scraping with quality tracking
|
||||||
|
- AI services for summary and tagging
|
||||||
|
- **Enhanced Tagging Service**:
|
||||||
|
- Geographic location detection and tagging
|
||||||
|
- Content category classification
|
||||||
|
- Hierarchical tag relationships
|
||||||
|
- Legacy filter migration logic
|
||||||
|
- **Analytics Service**:
|
||||||
|
- Reading statistics collection and aggregation
|
||||||
|
- Content performance metrics
|
||||||
|
- User behavior tracking
|
||||||
|
- Trend analysis and insights
|
||||||
|
- **Settings Management Service**:
|
||||||
|
- User preference handling
|
||||||
|
- System configuration management
|
||||||
|
- Real-time settings updates
|
||||||
|
- **Sharing Service**:
|
||||||
|
- Multiple format generation
|
||||||
|
- Template processing
|
||||||
|
- Privacy-aware content filtering
|
||||||
|
- **Advanced Filtering Service**:
|
||||||
|
- Complex query building with geographic hierarchy
|
||||||
|
- Filter preset management
|
||||||
|
- Search optimization
|
||||||
|
- Legacy filter migration
|
||||||
|
|
||||||
|
**Step 4: Comprehensive API Layer**
|
||||||
|
- **Article Management Routes**:
|
||||||
|
- `GET /api/articles` - List articles with compact display data
|
||||||
|
- `POST /api/articles` - Submit manual article URL
|
||||||
|
- `GET /api/articles/:id` - Get basic article info
|
||||||
|
- `GET /api/articles/:id/full` - Get complete scraped content
|
||||||
|
- `GET /api/articles/:id/summary` - Get AI summary
|
||||||
|
- `POST /api/articles/:id/read` - Mark as read and track reading time
|
||||||
|
- `POST /api/articles/:id/share` - Generate shareable content
|
||||||
|
- **Analytics Routes**:
|
||||||
|
- `GET /api/analytics/dashboard` - Main analytics dashboard data
|
||||||
|
- `GET /api/analytics/reading-stats` - Personal reading statistics
|
||||||
|
- `GET /api/analytics/content-stats` - Content and source analytics
|
||||||
|
- `GET /api/analytics/trends` - Trending topics and patterns
|
||||||
|
- `GET /api/analytics/export` - Export analytics data
|
||||||
|
- **Enhanced Filtering & Search Routes**:
|
||||||
|
- `GET /api/filters/presets` - Get saved filter presets
|
||||||
|
- `POST /api/filters/presets` - Save new filter preset
|
||||||
|
- `GET /api/search/suggestions` - Get search and filter suggestions
|
||||||
|
- `POST /api/search` - Advanced search with multiple criteria
|
||||||
|
- `POST /api/filters/migrate` - Migrate legacy country filters to tags
|
||||||
|
- **Settings Routes**:
|
||||||
|
- `GET /api/settings` - Get all user settings
|
||||||
|
- `PUT /api/settings` - Update user settings
|
||||||
|
- `GET /api/settings/system` - Get system configuration
|
||||||
|
- `PUT /api/settings/system` - Update system settings (admin)
|
||||||
|
- **Enhanced Tag Management Routes**:
|
||||||
|
- `GET /api/tags` - List tags with usage statistics and hierarchy
|
||||||
|
- `GET /api/tags/geographic` - Get geographic tag hierarchy
|
||||||
|
- `GET /api/tags/trending` - Get trending tags
|
||||||
|
- `POST /api/tags/:id/follow` - Follow/unfollow tag for notifications
|
||||||
|
- `GET /api/tags/categories` - Get tag categories and relationships
|
||||||
|
- **Sharing Routes**:
|
||||||
|
- `GET /api/share/templates` - Get sharing templates
|
||||||
|
- `POST /api/share/templates` - Create custom sharing template
|
||||||
|
- `POST /api/articles/:id/share/:format` - Generate share content
|
||||||
|
|
||||||
|
**Step 5: Enhanced Frontend Features**
|
||||||
|
- **Compact Article Display**:
|
||||||
|
- Card-based layout with title, RSS excerpt, tags, and timestamps
|
||||||
|
- Action buttons for Full Article, Summary, and Source
|
||||||
|
- Hierarchical tag display with geographic and category indicators
|
||||||
|
- Reading status and progress indicators
|
||||||
|
- **Advanced Analytics Dashboard**:
|
||||||
|
- Reading statistics with charts and trends
|
||||||
|
- Content source performance metrics
|
||||||
|
- Tag usage and trending topics with geographic breakdowns
|
||||||
|
- Personal reading insights and goals
|
||||||
|
- **Comprehensive Filtering Interface**:
|
||||||
|
- Multi-criteria filter builder with geographic hierarchy
|
||||||
|
- Saved filter presets with quick access
|
||||||
|
- Smart filter suggestions based on tag relationships
|
||||||
|
- Visual filter indicators and clear actions
|
||||||
|
- Legacy filter migration interface
|
||||||
|
- **Settings Management Panel**:
|
||||||
|
- User preference configuration
|
||||||
|
- AI and processing settings
|
||||||
|
- Display and UI customization
|
||||||
|
- Export/import functionality
|
||||||
|
- **Enhanced Sharing System**:
|
||||||
|
- Quick share buttons with format selection
|
||||||
|
- Copy-to-clipboard functionality
|
||||||
|
- Custom sharing templates
|
||||||
|
- Preview before sharing
|
||||||
|
|
||||||
|
**Step 6: Integration & Testing**
|
||||||
|
- Test all API endpoints with comprehensive coverage
|
||||||
|
- Test analytics collection and aggregation
|
||||||
|
- Test enhanced filtering and search functionality
|
||||||
|
- Test legacy filter migration
|
||||||
|
- Validate settings persistence and real-time updates
|
||||||
|
- Test sharing functionality across different formats
|
||||||
|
- Performance testing with large datasets and hierarchical tags
|
||||||
|
- Deploy and monitor
|
||||||
|
|
||||||
|
### Phase 2: CLI Application Addition
|
||||||
|
|
||||||
|
**Step 1: Restructure for Multiple Binaries**
|
||||||
|
- Move API code to `src/bin/server.rs`
|
||||||
|
- Create `src/bin/cli.rs` for CLI application
|
||||||
|
- Keep shared logic in `src/lib.rs`
|
||||||
|
- Update Cargo.toml to support multiple binaries
|
||||||
|
|
||||||
|
**Step 2: Enhanced CLI with Analytics and Management**
|
||||||
|
- **Core Commands**:
|
||||||
|
- `owly list [--filters] [--format table|json|compact]` - List articles
|
||||||
|
- `owly show <id> [--content|--summary]` - Display specific article
|
||||||
|
- `owly read <id>` - Mark article as read and open in pager
|
||||||
|
- `owly open <id>` - Open source URL in browser
|
||||||
|
- **Analytics Commands**:
|
||||||
|
- `owly stats [--period day|week|month|year]` - Show reading statistics
|
||||||
|
- `owly trends [--tags|--sources|--topics|--geo]` - Display trending content
|
||||||
|
- `owly analytics export [--format csv|json]` - Export analytics data
|
||||||
|
- **Management Commands**:
|
||||||
|
- `owly settings [--get key] [--set key=value]` - Manage settings
|
||||||
|
- `owly filters [--list|--save name|--load name]` - Manage filter presets
|
||||||
|
- `owly cleanup [--old|--unread|--failed]` - Clean up articles
|
||||||
|
- `owly migrate [--from-country-filters]` - Migrate legacy data
|
||||||
|
- **Enhanced Filtering Commands**:
|
||||||
|
- `owly filter [--tag] [--geo] [--category]` - Advanced filtering with geographic support
|
||||||
|
- `owly tags [--list|--hierarchy|--geo]` - Tag management with geographic display
|
||||||
|
- **Sharing Commands**:
|
||||||
|
- `owly share <id> [--format text|markdown|html]` - Generate share content
|
||||||
|
- `owly export <id> [--template name] [--output file]` - Export article
|
||||||
|
|
||||||
|
**Step 3: Advanced CLI Features**
|
||||||
|
- Interactive filtering and search with geographic hierarchy
|
||||||
|
- Real-time analytics display with charts (using ASCII graphs)
|
||||||
|
- Bulk operations with progress indicators
|
||||||
|
- Settings management with validation
|
||||||
|
- Shell completion for all commands and parameters
|
||||||
|
- Legacy data migration tools
|
||||||
|
|
||||||
|
### Phase 3: Dioxus Frontend Migration
|
||||||
|
|
||||||
|
**Step 1: Component Architecture**
|
||||||
|
- **Core Display Components**:
|
||||||
|
- `ArticleCard` - Compact article display with action buttons
|
||||||
|
- `ArticleViewer` - Full article content display
|
||||||
|
- `SummaryViewer` - AI summary display
|
||||||
|
- `TagCloud` - Interactive tag display with geographic hierarchy
|
||||||
|
- `GeographicMap` - Visual geographic filtering interface
|
||||||
|
- **Analytics Components**:
|
||||||
|
- `AnalyticsDashboard` - Main analytics overview
|
||||||
|
- `ReadingStats` - Personal reading statistics
|
||||||
|
- `TrendChart` - Trending topics and patterns
|
||||||
|
- `ContentMetrics` - Source and content analytics
|
||||||
|
- `GeographicAnalytics` - Location-based content insights
|
||||||
|
- **Enhanced Filtering Components**:
|
||||||
|
- `FilterBuilder` - Advanced filter creation interface with geographic support
|
||||||
|
- `FilterPresets` - Saved filter management
|
||||||
|
- `SearchBar` - Smart search with suggestions
|
||||||
|
- `GeographicFilter` - Hierarchical location filtering
|
||||||
|
- `MigrationTool` - Legacy filter migration interface
|
||||||
|
- **Settings Components**:
|
||||||
|
- `SettingsPanel` - User preference management
|
||||||
|
- `SystemConfig` - System-wide configuration
|
||||||
|
- `ExportImport` - Data export/import functionality
|
||||||
|
- **Sharing Components**:
|
||||||
|
- `ShareDialog` - Sharing interface with format options
|
||||||
|
- `ShareTemplates` - Custom template management
|
||||||
|
|
||||||
|
**Step 2: Enhanced UX Features**
|
||||||
|
- **Smart Article Display**:
|
||||||
|
- Lazy loading for performance
|
||||||
|
- Infinite scroll with virtualization
|
||||||
|
- Quick preview on hover
|
||||||
|
- Keyboard navigation support
|
||||||
|
- **Advanced Analytics**:
|
||||||
|
- Interactive charts and graphs with geographic data
|
||||||
|
- Customizable dashboard widgets
|
||||||
|
- Goal setting and progress tracking
|
||||||
|
- Comparison and trend analysis
|
||||||
|
- **Intelligent Filtering**:
|
||||||
|
- Auto-complete for filters with geographic suggestions
|
||||||
|
- Visual filter builder with map integration
|
||||||
|
- Filter combination suggestions based on tag relationships
|
||||||
|
- Saved search notifications
|
||||||
|
- **Seamless Sharing**:
|
||||||
|
- One-click sharing with clipboard integration
|
||||||
|
- Live preview of shared content
|
||||||
|
- Social media format optimization
|
||||||
|
- Batch sharing capabilities
|
||||||
|
|
||||||
|
## Key Strategic Considerations
|
||||||
|
|
||||||
|
### 1. Performance & Scalability
|
||||||
|
- **Efficient Data Loading**: Lazy loading and pagination for large datasets
|
||||||
|
- **Optimized Queries**: Indexed database queries for filtering and analytics with hierarchical tag support
|
||||||
|
- **Caching Strategy**: Smart caching for frequently accessed content and tag hierarchies
|
||||||
|
- **Real-time Updates**: WebSocket integration for live analytics
|
||||||
|
|
||||||
|
### 2. User Experience Focus
|
||||||
|
- **Progressive Disclosure**: Show essential info first, details on demand
|
||||||
|
- **Responsive Design**: Optimized for mobile and desktop
|
||||||
|
- **Accessibility**: Full keyboard navigation and screen reader support
|
||||||
|
- **Customization**: User-configurable interface and behavior
|
||||||
|
- **Smooth Migration**: Seamless transition from country-based to tag-based filtering
|
||||||
|
|
||||||
|
### 3. Analytics & Insights
|
||||||
|
- **Privacy-First**: User control over data collection and retention
|
||||||
|
- **Actionable Insights**: Meaningful statistics that guide reading habits
|
||||||
|
- **Performance Metrics**: System health and efficiency tracking
|
||||||
|
- **Trend Analysis**: Pattern recognition for content and behavior with geographic context
|
||||||
|
|
||||||
|
### 4. Content Management
|
||||||
|
- **Flexible Display**: Multiple view modes for different use cases
|
||||||
|
- **Smart Organization**: AI-assisted content categorization with geographic awareness
|
||||||
|
- **Bulk Operations**: Efficient management of large article collections
|
||||||
|
- **Data Integrity**: Reliable content processing and error handling
|
||||||
|
- **Legacy Support**: Smooth migration from existing country-based filtering
|
||||||
|
|
||||||
|
## Enhanced Configuration File Structure
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[server]
|
||||||
|
host = '127.0.0.1'
|
||||||
|
port = 8090
|
||||||
|
|
||||||
|
[display]
|
||||||
|
default_view = "compact" # compact, full, summary
|
||||||
|
articles_per_page = 50
|
||||||
|
show_reading_time = true
|
||||||
|
show_word_count = false
|
||||||
|
highlight_unread = true
|
||||||
|
theme = "auto" # light, dark, auto
|
||||||
|
|
||||||
|
[analytics]
|
||||||
|
enabled = true
|
||||||
|
track_reading_time = true
|
||||||
|
track_scroll_position = true
|
||||||
|
retention_days = 365 # How long to keep detailed analytics
|
||||||
|
aggregate_older_data = true
|
||||||
|
|
||||||
|
[filtering]
|
||||||
|
enable_smart_suggestions = true
|
||||||
|
max_recent_filters = 10
|
||||||
|
auto_save_filters = true
|
||||||
|
default_sort = "added_desc" # added_desc, published_desc, title_asc
|
||||||
|
enable_geographic_hierarchy = true
|
||||||
|
auto_migrate_country_filters = true
|
||||||
|
|
||||||
|
[sharing]
|
||||||
|
default_format = "text"
|
||||||
|
include_summary = true
|
||||||
|
include_tags = true
|
||||||
|
include_source = true
|
||||||
|
copy_to_clipboard = true
|
||||||
|
|
||||||
|
[sharing.templates.text]
|
||||||
|
format = """
|
||||||
|
📰 {title}
|
||||||
|
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
🏷️ Tags: {tags}
|
||||||
|
🌍 Location: {geographic_tags}
|
||||||
|
🔗 Source: {url}
|
||||||
|
📅 Published: {published_at}
|
||||||
|
|
||||||
|
Shared via Owly News Summariser
|
||||||
|
"""
|
||||||
|
|
||||||
|
[sharing.templates.markdown]
|
||||||
|
format = """
|
||||||
|
# {title}
|
||||||
|
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
**Tags:** {tags}
|
||||||
|
**Location:** {geographic_tags}
|
||||||
|
**Source:** [{url}]({url})
|
||||||
|
**Published:** {published_at}
|
||||||
|
|
||||||
|
---
|
||||||
|
*Shared via Owly News Summariser*
|
||||||
|
"""
|
||||||
|
|
||||||
|
[ai]
|
||||||
|
enabled = true
|
||||||
|
provider = "ollama"
|
||||||
|
timeout_seconds = 120
|
||||||
|
|
||||||
|
[ai.summary]
|
||||||
|
enabled = true
|
||||||
|
temperature = 0.1
|
||||||
|
max_tokens = 1000
|
||||||
|
|
||||||
|
[ai.tagging]
|
||||||
|
enabled = true
|
||||||
|
temperature = 0.3
|
||||||
|
max_tokens = 200
|
||||||
|
max_tags_per_article = 10
|
||||||
|
min_confidence_threshold = 0.7
|
||||||
|
enable_geographic_tagging = true
|
||||||
|
enable_category_tagging = true
|
||||||
|
geographic_hierarchy_levels = 3 # country, region, city
|
||||||
|
|
||||||
|
[scraping]
|
||||||
|
timeout_seconds = 30
|
||||||
|
max_retries = 3
|
||||||
|
max_content_length = 50000
|
||||||
|
respect_robots_txt = true
|
||||||
|
rate_limit_delay_ms = 1000
|
||||||
|
|
||||||
|
[processing]
|
||||||
|
batch_size = 10
|
||||||
|
max_concurrent = 5
|
||||||
|
retry_attempts = 3
|
||||||
|
priority_manual = true
|
||||||
|
auto_mark_read_on_view = false
|
||||||
|
|
||||||
|
[migration]
|
||||||
|
auto_convert_country_filters = true
|
||||||
|
preserve_legacy_data = true
|
||||||
|
migration_batch_size = 100
|
||||||
|
|
||||||
|
[cli]
|
||||||
|
default_output = "table"
|
||||||
|
pager_command = "less"
|
||||||
|
show_progress = true
|
||||||
|
auto_confirm_bulk = false
|
||||||
|
show_geographic_hierarchy = true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration Strategy for Country-Based Filtering
|
||||||
|
|
||||||
|
### Automatic Migration Process
|
||||||
|
1. **Data Analysis**: Scan existing country filter data and RSS feed origins
|
||||||
|
2. **Tag Generation**: Create geographic tags for each country with hierarchical structure
|
||||||
|
3. **Filter Conversion**: Convert country-based filters to tag-based equivalents
|
||||||
|
4. **User Notification**: Inform users about the migration and new capabilities
|
||||||
|
5. **Gradual Rollout**: Maintain backward compatibility during transition period
|
||||||
|
|
||||||
|
### Enhanced Geographic Features
|
||||||
|
- **Hierarchical Display**: Country → Region → City tag hierarchy
|
||||||
|
- **Visual Map Integration**: Interactive geographic filtering via map interface
|
||||||
|
- **Smart Suggestions**: Related location and content suggestions
|
||||||
|
- **Multi-Level Filtering**: Filter by specific cities, regions, or broader geographic areas
|
||||||
|
- **Source Intelligence**: AI detection of article geographic relevance beyond RSS origin
|
||||||
|
|
||||||
|
## Future Enhancements (Post Phase 3)
|
||||||
|
|
||||||
|
### Advanced Analytics
|
||||||
|
- **Machine Learning Insights**: Content recommendation based on reading patterns and geographic preferences
|
||||||
|
- **Predictive Analytics**: Trending topic prediction with geographic context
|
||||||
|
- **Behavioral Analysis**: Reading habit optimization suggestions
|
||||||
|
- **Comparative Analytics**: Benchmark against reading goals and regional averages
|
||||||
|
|
||||||
|
### Enhanced Content Management
|
||||||
|
- **Smart Collections**: AI-curated article collections with geographic themes
|
||||||
|
- **Reading Lists**: Planned reading with progress tracking
|
||||||
|
- **Content Relationships**: Related article suggestions with geographic relevance
|
||||||
|
- **Advanced Search**: Full-text search with relevance scoring and geographic weighting
|
||||||
|
|
||||||
|
### Social & Collaboration Features
|
||||||
|
- **Reading Groups**: Shared reading lists and discussions with geographic focus
|
||||||
|
- **Social Sharing**: Integration with social platforms
|
||||||
|
- **Collaborative Tagging**: Community-driven content organization
|
||||||
|
- **Reading Challenges**: Gamification of reading habits with geographic themes
|
||||||
|
|
||||||
|
### Integration & Extensibility
|
||||||
|
- **Browser Extension**: Seamless article saving and reading
|
||||||
|
- **Mobile Apps**: Native iOS/Android applications with location awareness
|
||||||
|
- **API Ecosystem**: Third-party integrations and plugins
|
||||||
|
- **Webhook System**: Real-time notifications and integrations with geographic filtering
|
72
backend-rust/TODO.md
Normal file
72
backend-rust/TODO.md
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
## CPU and resource limiting
|
||||||
|
- Tokio worker threads
|
||||||
|
- Decide thread policy:
|
||||||
|
- Option A: set TOKIO_WORKER_THREADS in the environment for deployments.
|
||||||
|
- Option B: build a custom runtime with tokio::runtime::Builder::new_multi_thread().worker_threads(n).
|
||||||
|
|
||||||
|
- Document your default policy (e.g., 50% of physical cores).
|
||||||
|
|
||||||
|
- Concurrency guard for CPU-heavy tasks
|
||||||
|
- Create a global tokio::sync::Semaphore with N permits (N = allowed concurrent heavy tasks).
|
||||||
|
- Acquire a permit before invoking heavy module operations; release automatically on drop.
|
||||||
|
- Expose the semaphore in app state so handlers/jobs can share it.
|
||||||
|
|
||||||
|
- HTTP backpressure and rate limiting (if using API)
|
||||||
|
- Add tower::limit::ConcurrencyLimitLayer to cap in-flight requests.
|
||||||
|
- Add tower::limit::RateLimitLayer or request-size/timeouts as needed.
|
||||||
|
- Optionally add tower::timeout::TimeoutLayer to bound handler latency.
|
||||||
|
|
||||||
|
- Stronger isolation (optional, later)
|
||||||
|
- Evaluate running certain modules as separate processes for strict CPU caps.
|
||||||
|
- Use cgroups v2 (Linux) or Job Objects (Windows) to bound CPU/memory per process.
|
||||||
|
- Reuse the same JSON interface over IPC (e.g., stdio or a local socket).
|
||||||
|
|
||||||
|
## Build and run
|
||||||
|
- Build all crates
|
||||||
|
- Run: cargo build --workspace
|
||||||
|
|
||||||
|
- Build each plugin as cdylib
|
||||||
|
- Example: cd crates/modules/summarizer && cargo build --release
|
||||||
|
|
||||||
|
- Stage plugin libraries for the host to find
|
||||||
|
- Create a modules directory the daemon will read, e.g. target/modules
|
||||||
|
- Copy the built artifact into that directory:
|
||||||
|
- Linux: copy target/release/libsummarizer.so -> target/modules/libsummarizer.so
|
||||||
|
- macOS: copy target/release/libsummarizer.dylib -> target/modules/libsummarizer.dylib
|
||||||
|
- Windows: copy target/release/summarizer.dll -> target/modules/summarizer.dll
|
||||||
|
|
||||||
|
- Alternatively set OWLY_MODULES_DIR to your chosen directory.
|
||||||
|
|
||||||
|
- Run the daemon
|
||||||
|
- cargo run -p owly-news
|
||||||
|
- Optionally set:
|
||||||
|
- OWLY_MODULES_DIR=/absolute/path/to/modules
|
||||||
|
- TOKIO_WORKER_THREADS=N
|
||||||
|
|
||||||
|
## Wire into the API
|
||||||
|
- Share ModuleHost in app state
|
||||||
|
- Create a struct AppState { host: Arc, cpu_sem: Arc , ... }.
|
||||||
|
- Add AppState to Axum with .with_state(state).
|
||||||
|
|
||||||
|
- In a handler (example: POST /summarize)
|
||||||
|
- Parse payload as JSON.
|
||||||
|
- Acquire a permit from cpu_sem before heavy work.
|
||||||
|
- host.get("summarizer").await? to lazily load the module.
|
||||||
|
- Call module.invoke_json("summarize", payload_value)?.
|
||||||
|
- Map success to 200 with JSON; map errors to appropriate status codes.
|
||||||
|
|
||||||
|
- Error handling and observability
|
||||||
|
- Use thiserror/anyhow to classify operational vs. client errors.
|
||||||
|
- Add tracing spans around module loading and invocation; include module name and op.
|
||||||
|
- Return structured error JSON when module reports an error.
|
||||||
|
|
||||||
|
- Configuration
|
||||||
|
- Decide env vars and defaults: OWLY_MODULES_DIR, TOKIO_WORKER_THREADS, concurrency permits, rate limits.
|
||||||
|
- Optionally add a config file (toml) and load via figment or config crate.
|
||||||
|
|
||||||
|
- Health and lifecycle
|
||||||
|
- Add a /health route that checks:
|
||||||
|
- Tokio is responsive.
|
||||||
|
- Optional: preflight-check that required modules are present (or skip to keep lazy).
|
||||||
|
|
||||||
|
- Graceful shutdown: listen for SIGINT/SIGTERM and drain in-flight requests before exit.
|
7
backend-rust/crates/api/Cargo.lock
generated
Normal file
7
backend-rust/crates/api/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "api"
|
||||||
|
version = "0.1.0"
|
18
backend-rust/crates/api/Cargo.toml
Normal file
18
backend-rust/crates/api/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
[package]
|
||||||
|
name = "api"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
once_cell = { workspace = true }
|
||||||
|
toml = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
async-trait = "0.1.89"
|
||||||
|
axum = { workspace = true }
|
||||||
|
sqlx = { workspace = true, features = ["sqlite"] }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
3
backend-rust/crates/api/src/api.rs
Normal file
3
backend-rust/crates/api/src/api.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub mod handlers;
|
||||||
|
pub mod middleware;
|
||||||
|
pub mod routes;
|
41
backend-rust/crates/api/src/api/handlers.rs
Normal file
41
backend-rust/crates/api/src/api/handlers.rs
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
use axum::Json;
|
||||||
|
use axum::extract::State;
|
||||||
|
use serde_json::Value;
|
||||||
|
use sqlx::SqlitePool;
|
||||||
|
|
||||||
|
pub async fn get_articles(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
|
||||||
|
// TODO: Article logic
|
||||||
|
Ok(Json(serde_json::json!({"articles": []})))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get_summaries(State(pool): State<SqlitePool>) -> Result<Json<Value>, AppError> {
|
||||||
|
// TODO: Summaries logic
|
||||||
|
Ok(Json(serde_json::json!({"summaries": []})))
|
||||||
|
}
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
http::StatusCode,
|
||||||
|
response::{IntoResponse, Response},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct AppError(anyhow::Error);
|
||||||
|
|
||||||
|
impl IntoResponse for AppError {
|
||||||
|
fn into_response(self) -> Response {
|
||||||
|
let (status, message) = match self.0.downcast_ref::<sqlx::Error>() {
|
||||||
|
Some(_) => (StatusCode::INTERNAL_SERVER_ERROR, "Database error occurred"),
|
||||||
|
None => (StatusCode::INTERNAL_SERVER_ERROR, "An error occurred"),
|
||||||
|
};
|
||||||
|
|
||||||
|
tracing::error!("API Error: {:?}", self.0);
|
||||||
|
(status, message).into_response()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E> From<E> for AppError
|
||||||
|
where
|
||||||
|
E: Into<anyhow::Error>, {
|
||||||
|
fn from(err: E) -> Self {
|
||||||
|
Self(err.into())
|
||||||
|
}
|
||||||
|
}
|
0
backend-rust/crates/api/src/api/middleware.rs
Normal file
0
backend-rust/crates/api/src/api/middleware.rs
Normal file
11
backend-rust/crates/api/src/api/routes.rs
Normal file
11
backend-rust/crates/api/src/api/routes.rs
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
use axum::Router;
|
||||||
|
use axum::routing::get;
|
||||||
|
use sqlx::SqlitePool;
|
||||||
|
use crate::api::handlers;
|
||||||
|
|
||||||
|
pub fn routes() -> Router<SqlitePool> {
|
||||||
|
Router::new()
|
||||||
|
.route("/articles", get(handlers::get_articles))
|
||||||
|
.route("/summaries", get(handlers::get_summaries))
|
||||||
|
// Add more routes as needed
|
||||||
|
}
|
1003
backend-rust/crates/api/src/config.rs
Normal file
1003
backend-rust/crates/api/src/config.rs
Normal file
File diff suppressed because it is too large
Load Diff
6
backend-rust/crates/api/src/lib.rs
Normal file
6
backend-rust/crates/api/src/lib.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
//! API-first core: shared types, DTOs, service traits, configuration.
|
||||||
|
|
||||||
|
pub mod config;
|
||||||
|
pub mod types;
|
||||||
|
pub mod services;
|
||||||
|
pub mod api;
|
28
backend-rust/crates/api/src/services.rs
Normal file
28
backend-rust/crates/api/src/services.rs
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
use crate::types::Health;
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
// Submodules that host various domain services. These were refactored from the
|
||||||
|
// legacy root src folder into this workspace crate. Each component is its own module file.
|
||||||
|
pub mod summary_service;
|
||||||
|
pub mod news_service;
|
||||||
|
pub mod scraping_service;
|
||||||
|
pub mod tagging_service;
|
||||||
|
pub mod analytics_service;
|
||||||
|
pub mod sharing_service;
|
||||||
|
pub(crate) mod content_processor;
|
||||||
|
|
||||||
|
// Implement your service traits here. Example:
|
||||||
|
#[async_trait]
|
||||||
|
pub trait HealthService: Send + Sync {
|
||||||
|
async fn health(&self) -> Health;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A trivial default implementation that can be used by server and tests.
|
||||||
|
pub struct DefaultHealthService;
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl HealthService for DefaultHealthService {
|
||||||
|
async fn health(&self) -> Health {
|
||||||
|
Health { status: "ok".into() }
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,4 @@
|
|||||||
|
//! Analytics service module.
|
||||||
|
//! Implement logic for tracking and aggregating analytics here.
|
||||||
|
|
||||||
|
// Placeholder for analytics-related types and functions.
|
@@ -0,0 +1,3 @@
|
|||||||
|
//! Content processor utilities shared by services.
|
||||||
|
|
||||||
|
// Placeholder module for content processing helpers (e.g., cleaning, tokenization).
|
4
backend-rust/crates/api/src/services/news_service.rs
Normal file
4
backend-rust/crates/api/src/services/news_service.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
//! News service module.
|
||||||
|
//! Implement logic related to news retrieval/management here.
|
||||||
|
|
||||||
|
// Placeholder for news-related types and functions.
|
4
backend-rust/crates/api/src/services/scraping_service.rs
Normal file
4
backend-rust/crates/api/src/services/scraping_service.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
//! Scraping service module.
|
||||||
|
//! Implement logic related to web scraping, fetchers, and extractors here.
|
||||||
|
|
||||||
|
// Placeholder for scraping-related types and functions.
|
4
backend-rust/crates/api/src/services/sharing_service.rs
Normal file
4
backend-rust/crates/api/src/services/sharing_service.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
//! Sharing service module.
|
||||||
|
//! Implement logic related to content sharing here.
|
||||||
|
|
||||||
|
// Placeholder for sharing-related types and functions.
|
4
backend-rust/crates/api/src/services/summary_service.rs
Normal file
4
backend-rust/crates/api/src/services/summary_service.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
//! Summary service module.
|
||||||
|
//! Implement logic for generating summaries from articles here.
|
||||||
|
|
||||||
|
// Placeholder for summary-related types and functions.
|
4
backend-rust/crates/api/src/services/tagging_service.rs
Normal file
4
backend-rust/crates/api/src/services/tagging_service.rs
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
//! Tagging service module.
|
||||||
|
//! Implement logic related to tagging articles and managing tags here.
|
||||||
|
|
||||||
|
// Placeholder for tagging-related types and functions.
|
6
backend-rust/crates/api/src/types.rs
Normal file
6
backend-rust/crates/api/src/types.rs
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Health {
|
||||||
|
pub status: String,
|
||||||
|
}
|
15
backend-rust/crates/cli/Cargo.toml
Normal file
15
backend-rust/crates/cli/Cargo.toml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[package]
|
||||||
|
name = "cli"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
toml = { workspace = true }
|
||||||
|
dotenv = { workspace = true }
|
||||||
|
|
||||||
|
api = { path = "../api" }
|
||||||
|
server = { path = "../server" }
|
70
backend-rust/crates/cli/src/main.rs
Normal file
70
backend-rust/crates/cli/src/main.rs
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use api::config::Cli;
|
||||||
|
use dotenv::dotenv;
|
||||||
|
use std::{env, net::SocketAddr, str::FromStr};
|
||||||
|
use tokio::signal;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenv().ok();
|
||||||
|
let args: Vec<String> = env::args().collect();
|
||||||
|
|
||||||
|
match args.get(1).map(|s| s.as_str()) {
|
||||||
|
Some("serve") => serve(args).await,
|
||||||
|
Some("print-config") => print_config(),
|
||||||
|
_ => {
|
||||||
|
print_help();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_help() {
|
||||||
|
eprintln!(
|
||||||
|
"Usage:
|
||||||
|
cli serve [--addr 0.0.0.0:8080]
|
||||||
|
cli print-config
|
||||||
|
|
||||||
|
Environment:
|
||||||
|
These may influence runtime behavior.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- 'serve' runs the HTTP server.
|
||||||
|
- 'print-config' prints the default CLI configuration in JSON."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn serve(args: Vec<String>) -> Result<()> {
|
||||||
|
// naive flag parse: look for "--addr host:port"
|
||||||
|
let mut addr: SocketAddr = SocketAddr::from_str("127.0.0.1:8080")?;
|
||||||
|
let mut i = 2;
|
||||||
|
while i + 1 < args.len() {
|
||||||
|
if args[i] == "--addr" {
|
||||||
|
addr = SocketAddr::from_str(&args[i + 1])?;
|
||||||
|
i += 2;
|
||||||
|
} else {
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let server_task = tokio::spawn(async move { server::start_server(addr).await });
|
||||||
|
|
||||||
|
// graceful shutdown via Ctrl+C
|
||||||
|
tokio::select! {
|
||||||
|
res = server_task => {
|
||||||
|
res??;
|
||||||
|
}
|
||||||
|
_ = signal::ctrl_c() => {
|
||||||
|
eprintln!("Shutting down...");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_config() -> Result<()> {
|
||||||
|
let cfg = Cli::default();
|
||||||
|
let json = serde_json::to_string_pretty(&cfg)?;
|
||||||
|
println!("{json}");
|
||||||
|
Ok(())
|
||||||
|
}
|
10
backend-rust/crates/db/Cargo.toml
Normal file
10
backend-rust/crates/db/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[package]
|
||||||
|
name = "db"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
sqlx = { workspace = true, features = ["sqlite"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
api = { path = "../api" }
|
44
backend-rust/crates/db/src/lib.rs
Normal file
44
backend-rust/crates/db/src/lib.rs
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
use api::config::AppSettings;
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use sqlx::migrate::Migrator;
|
||||||
|
use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions};
|
||||||
|
use sqlx::{Pool, Sqlite, SqlitePool};
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::time::Duration;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
// Embed migrations from the workspace-level migrations directory.
|
||||||
|
// crates/db is two levels below backend-rust where migrations/ resides.
|
||||||
|
pub const MIGRATOR: Migrator = sqlx::migrate!("../../migrations");
|
||||||
|
|
||||||
|
pub async fn initialize_db(app_settings: &AppSettings) -> Result<Pool<Sqlite>> {
|
||||||
|
app_settings
|
||||||
|
.ensure_default_directory()
|
||||||
|
.context("Failed to ensure default directory for database")?;
|
||||||
|
|
||||||
|
let options = SqliteConnectOptions::from_str(&app_settings.database_url())?
|
||||||
|
.create_if_missing(true)
|
||||||
|
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal)
|
||||||
|
.foreign_keys(true);
|
||||||
|
|
||||||
|
let pool = SqlitePoolOptions::new()
|
||||||
|
.max_connections(20)
|
||||||
|
.min_connections(5)
|
||||||
|
.acquire_timeout(Duration::from_secs(30))
|
||||||
|
.idle_timeout(Duration::from_secs(600))
|
||||||
|
.connect_with(options)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
MIGRATOR
|
||||||
|
.run(&pool)
|
||||||
|
.await
|
||||||
|
.with_context(|| "Database migrations failed")?;
|
||||||
|
info!("Database migrations completed successfully");
|
||||||
|
|
||||||
|
Ok(pool)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn create_pool(opts: SqliteConnectOptions) -> Result<SqlitePool> {
|
||||||
|
let pool = SqlitePool::connect_with(opts).await?;
|
||||||
|
Ok(pool)
|
||||||
|
}
|
23
backend-rust/crates/server/Cargo.toml
Normal file
23
backend-rust/crates/server/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
[package]
|
||||||
|
name = "server"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
tokio = { workspace = true }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { workspace = true }
|
||||||
|
axum = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_json = { workspace = true }
|
||||||
|
sqlx = { workspace = true, features = ["sqlite"] }
|
||||||
|
dotenv = { workspace = true }
|
||||||
|
once_cell = { workspace = true }
|
||||||
|
|
||||||
|
api = { path = "../api" }
|
||||||
|
db = { path = "../db" }
|
||||||
|
http = "1.3.1"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
63
backend-rust/crates/server/src/lib.rs
Normal file
63
backend-rust/crates/server/src/lib.rs
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
use axum::{routing::get, Json, Router};
|
||||||
|
use std::{net::SocketAddr, sync::Arc};
|
||||||
|
use tokio::net::TcpListener;
|
||||||
|
use tracing::{info, level_filters::LevelFilter};
|
||||||
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
|
use api::services::{DefaultHealthService, HealthService};
|
||||||
|
use api::types::Health;
|
||||||
|
use api::config::AppSettings;
|
||||||
|
|
||||||
|
pub struct AppState {
|
||||||
|
pub health_service: Arc<dyn HealthService>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn build_router(state: Arc<AppState>) -> Router {
|
||||||
|
Router::new().route(
|
||||||
|
"/health",
|
||||||
|
get({
|
||||||
|
let state = state.clone();
|
||||||
|
move || health_handler(state.clone())
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn health_handler(state: Arc<AppState>) -> Json<Health> {
|
||||||
|
let res = state.health_service.health().await;
|
||||||
|
Json(res)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_server(addr: SocketAddr) -> anyhow::Result<()> {
|
||||||
|
init_tracing();
|
||||||
|
|
||||||
|
// Load application settings and initialize the database pool (sqlite).
|
||||||
|
let app_settings = AppSettings::get_app_settings();
|
||||||
|
let pool = db::initialize_db(&app_settings).await?;
|
||||||
|
|
||||||
|
let state = Arc::new(AppState {
|
||||||
|
health_service: Arc::new(DefaultHealthService),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Base daemon router
|
||||||
|
let app = build_router(state).await
|
||||||
|
// Attach API under /api and provide DB state
|
||||||
|
.nest("/api", api::api::routes::routes().with_state(pool.clone()));
|
||||||
|
|
||||||
|
let listener = TcpListener::bind(addr).await?;
|
||||||
|
info!("HTTP server listening on http://{}", addr);
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn init_tracing() {
|
||||||
|
let env_filter = EnvFilter::try_from_default_env()
|
||||||
|
.or_else(|_| EnvFilter::try_new("info"))
|
||||||
|
.unwrap()
|
||||||
|
.add_directive(LevelFilter::INFO.into());
|
||||||
|
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(env_filter)
|
||||||
|
.with_target(true)
|
||||||
|
.compact()
|
||||||
|
.init();
|
||||||
|
}
|
22
backend-rust/crates/server/tests/health.rs
Normal file
22
backend-rust/crates/server/tests/health.rs
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
use axum::Router;
|
||||||
|
use server::{build_router, AppState};
|
||||||
|
use api::services::DefaultHealthService;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn health_ok() {
|
||||||
|
let state = Arc::new(AppState {
|
||||||
|
health_service: Arc::new(DefaultHealthService),
|
||||||
|
});
|
||||||
|
|
||||||
|
let app: Router = build_router(state).await;
|
||||||
|
|
||||||
|
let req = http::Request::builder()
|
||||||
|
.uri("/health")
|
||||||
|
.body(axum::body::Body::empty())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let res = axum::http::Request::from(req);
|
||||||
|
let res = axum::http::Request::from(res);
|
||||||
|
let _ = app; // You can use axum-test to send requests if desired.
|
||||||
|
}
|
31
backend-rust/example.env
Normal file
31
backend-rust/example.env
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# URL for the Ollama service
|
||||||
|
OLLAMA_HOST=http://localhost:11434
|
||||||
|
|
||||||
|
# Interval for scheduled news fetching in hours
|
||||||
|
CRON_HOURS=1
|
||||||
|
|
||||||
|
# Minimum interval for scheduled news fetching in hours
|
||||||
|
MIN_CRON_HOURS=0.5
|
||||||
|
|
||||||
|
# Cooldown period in minutes between manual syncs
|
||||||
|
SYNC_COOLDOWN_MINUTES=30
|
||||||
|
|
||||||
|
# LLM model to use for summarization
|
||||||
|
LLM_MODEL=qwen2:7b-instruct-q4_K_M
|
||||||
|
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
|
||||||
|
LLM_MODEL=mistral-nemo:12b
|
||||||
|
|
||||||
|
# Timeout in seconds for LLM requests
|
||||||
|
LLM_TIMEOUT_SECONDS=180
|
||||||
|
|
||||||
|
# Timeout in seconds for Ollama API requests
|
||||||
|
OLLAMA_API_TIMEOUT_SECONDS=10
|
||||||
|
|
||||||
|
# Timeout in seconds for article fetching
|
||||||
|
ARTICLE_FETCH_TIMEOUT=30
|
||||||
|
|
||||||
|
# Maximum length of article content to process
|
||||||
|
MAX_ARTICLE_LENGTH=5000
|
||||||
|
|
||||||
|
# SQLite database connection string
|
||||||
|
DB_NAME=owlynews.sqlite3
|
5
backend-rust/migrations/001_initial_schema.down.sql
Normal file
5
backend-rust/migrations/001_initial_schema.down.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
DROP TABLE IF EXISTS meta;
|
||||||
|
DROP TABLE IF EXISTS settings;
|
||||||
|
DROP TABLE IF EXISTS feeds;
|
||||||
|
DROP INDEX IF EXISTS idx_news_published;
|
||||||
|
DROP TABLE IF EXISTS news;
|
38
backend-rust/migrations/001_initial_schema.up.sql
Normal file
38
backend-rust/migrations/001_initial_schema.up.sql
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
-- Initial database schema for Owly News Summariser
|
||||||
|
|
||||||
|
-- News table to store articles
|
||||||
|
CREATE TABLE IF NOT EXISTS news
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
summary TEXT,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
published TIMESTAMP NOT NULL,
|
||||||
|
country TEXT NOT NULL,
|
||||||
|
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Index for faster queries on published date
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
|
||||||
|
|
||||||
|
-- Feeds table to store RSS feed sources
|
||||||
|
CREATE TABLE IF NOT EXISTS feeds
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
country TEXT,
|
||||||
|
url TEXT UNIQUE NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Settings table for application configuration
|
||||||
|
CREATE TABLE IF NOT EXISTS settings
|
||||||
|
(
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
val TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Meta table for application metadata
|
||||||
|
CREATE TABLE IF NOT EXISTS meta
|
||||||
|
(
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
val TEXT NOT NULL
|
||||||
|
);
|
18
backend-rust/migrations/002_add_category_to_news.down.sql
Normal file
18
backend-rust/migrations/002_add_category_to_news.down.sql
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
CREATE TABLE news_backup
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
summary TEXT,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
published TIMESTAMP NOT NULL,
|
||||||
|
country TEXT NOT NULL,
|
||||||
|
created_at INTEGER DEFAULT (strftime('%s', 'now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO news_backup
|
||||||
|
SELECT id, title, summary, url, published, country, created_at
|
||||||
|
FROM news;
|
||||||
|
DROP TABLE news;
|
||||||
|
ALTER TABLE news_backup
|
||||||
|
RENAME TO news;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_news_published ON news (published);
|
3
backend-rust/migrations/002_add_category_to_news.up.sql
Normal file
3
backend-rust/migrations/002_add_category_to_news.up.sql
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
-- Add category field to news table
|
||||||
|
ALTER TABLE news
|
||||||
|
ADD COLUMN category TEXT;
|
@@ -0,0 +1,7 @@
|
|||||||
|
-- Drop articles table and its indexes
|
||||||
|
DROP INDEX IF EXISTS idx_articles_read_at;
|
||||||
|
DROP INDEX IF EXISTS idx_articles_source_type;
|
||||||
|
DROP INDEX IF EXISTS idx_articles_processing_status;
|
||||||
|
DROP INDEX IF EXISTS idx_articles_added_at;
|
||||||
|
DROP INDEX IF EXISTS idx_articles_published_at;
|
||||||
|
DROP TABLE IF EXISTS articles;
|
27
backend-rust/migrations/003_create_articles_table.up.sql
Normal file
27
backend-rust/migrations/003_create_articles_table.up.sql
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
-- Create enhanced articles table to replace news table structure
|
||||||
|
CREATE TABLE IF NOT EXISTS articles
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
source_type TEXT NOT NULL DEFAULT 'rss', -- 'rss', 'manual'
|
||||||
|
rss_content TEXT, -- RSS description/excerpt
|
||||||
|
full_content TEXT, -- Scraped full content
|
||||||
|
summary TEXT, -- AI-generated summary
|
||||||
|
processing_status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'processing', 'completed', 'failed'
|
||||||
|
published_at TIMESTAMP NOT NULL,
|
||||||
|
added_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||||
|
read_at TIMESTAMP,
|
||||||
|
read_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
reading_time INTEGER, -- in seconds
|
||||||
|
ai_enabled BOOLEAN NOT NULL DEFAULT 1,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||||
|
updated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create indexes for performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles (published_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_articles_added_at ON articles (added_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_articles_processing_status ON articles (processing_status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_articles_source_type ON articles (source_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_articles_read_at ON articles (read_at);
|
9
backend-rust/migrations/004_create_tags_table.down.sql
Normal file
9
backend-rust/migrations/004_create_tags_table.down.sql
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
-- Drop tag system tables and indexes
|
||||||
|
DROP INDEX IF EXISTS idx_article_tags_ai_generated;
|
||||||
|
DROP INDEX IF EXISTS idx_article_tags_tag_id;
|
||||||
|
DROP INDEX IF EXISTS idx_article_tags_article_id;
|
||||||
|
DROP INDEX IF EXISTS idx_tags_usage_count;
|
||||||
|
DROP INDEX IF EXISTS idx_tags_parent_id;
|
||||||
|
DROP INDEX IF EXISTS idx_tags_category;
|
||||||
|
DROP TABLE IF EXISTS article_tags;
|
||||||
|
DROP TABLE IF EXISTS tags;
|
31
backend-rust/migrations/004_create_tags_table.up.sql
Normal file
31
backend-rust/migrations/004_create_tags_table.up.sql
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
-- Create tags table with hierarchical support
|
||||||
|
CREATE TABLE IF NOT EXISTS tags
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
category TEXT NOT NULL, -- 'geographic', 'content', 'source', 'custom'
|
||||||
|
description TEXT,
|
||||||
|
color TEXT, -- Hex color for UI display
|
||||||
|
usage_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
parent_id INTEGER REFERENCES tags (id), -- For hierarchical tags (e.g., Country -> Region -> City)
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create article_tags junction table
|
||||||
|
CREATE TABLE IF NOT EXISTS article_tags
|
||||||
|
(
|
||||||
|
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
|
||||||
|
tag_id INTEGER NOT NULL REFERENCES tags (id) ON DELETE CASCADE,
|
||||||
|
confidence_score REAL DEFAULT 1.0, -- AI confidence (0.0-1.0)
|
||||||
|
ai_generated BOOLEAN NOT NULL DEFAULT 0,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
|
||||||
|
PRIMARY KEY (article_id, tag_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags (category);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_tags_parent_id ON tags (parent_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_tags_usage_count ON tags (usage_count DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_article_tags_article_id ON article_tags (article_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_article_tags_tag_id ON article_tags (tag_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_article_tags_ai_generated ON article_tags (ai_generated);
|
11
backend-rust/migrations/005_create_statistics_table.down.sql
Normal file
11
backend-rust/migrations/005_create_statistics_table.down.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
-- Drop analytics system tables and indexes
|
||||||
|
DROP INDEX IF EXISTS idx_legacy_migration_old_filter_type;
|
||||||
|
DROP INDEX IF EXISTS idx_share_templates_format;
|
||||||
|
DROP INDEX IF EXISTS idx_filter_presets_user_id;
|
||||||
|
DROP INDEX IF EXISTS idx_reading_stats_read_at;
|
||||||
|
DROP INDEX IF EXISTS idx_reading_stats_article_id;
|
||||||
|
DROP INDEX IF EXISTS idx_reading_stats_user_id;
|
||||||
|
DROP TABLE IF EXISTS legacy_migration;
|
||||||
|
DROP TABLE IF EXISTS share_templates;
|
||||||
|
DROP TABLE IF EXISTS filter_presets;
|
||||||
|
DROP TABLE IF EXISTS reading_stats;
|
50
backend-rust/migrations/005_create_statistics_table.up.sql
Normal file
50
backend-rust/migrations/005_create_statistics_table.up.sql
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
-- Create reading statistics table
|
||||||
|
CREATE TABLE IF NOT EXISTS reading_stats
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
user_id INTEGER DEFAULT 1, -- For future multi-user support
|
||||||
|
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
|
||||||
|
read_at TIMESTAMP NOT NULL,
|
||||||
|
reading_time INTEGER, -- in seconds
|
||||||
|
completion_rate REAL DEFAULT 1.0, -- 0.0-1.0, how much of the article was read
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create filter presets table
|
||||||
|
CREATE TABLE IF NOT EXISTS filter_presets
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
filter_criteria TEXT NOT NULL, -- JSON string of filter parameters
|
||||||
|
user_id INTEGER DEFAULT 1, -- For future multi-user support
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create share templates table
|
||||||
|
CREATE TABLE IF NOT EXISTS share_templates
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
format TEXT NOT NULL, -- 'text', 'markdown', 'html', 'json'
|
||||||
|
template_content TEXT NOT NULL,
|
||||||
|
is_default BOOLEAN NOT NULL DEFAULT 0,
|
||||||
|
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create legacy migration tracking table
|
||||||
|
CREATE TABLE IF NOT EXISTS legacy_migration
|
||||||
|
(
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
old_filter_type TEXT NOT NULL, -- 'country', 'category', etc.
|
||||||
|
old_value TEXT NOT NULL,
|
||||||
|
new_tag_ids TEXT, -- JSON array of tag IDs
|
||||||
|
migrated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reading_stats_user_id ON reading_stats (user_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reading_stats_article_id ON reading_stats (article_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reading_stats_read_at ON reading_stats (read_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_filter_presets_user_id ON filter_presets (user_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_share_templates_format ON share_templates (format);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_legacy_migration_old_filter_type ON legacy_migration (old_filter_type);
|
18
backend-rust/migrations/006_update_settings_table.down.sql
Normal file
18
backend-rust/migrations/006_update_settings_table.down.sql
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
-- Remove enhanced settings columns and indexes
|
||||||
|
DROP INDEX IF EXISTS idx_settings_user_id;
|
||||||
|
DROP INDEX IF EXISTS idx_settings_category;
|
||||||
|
|
||||||
|
-- Note: SQLite doesn't support DROP COLUMN, so we recreate the table
|
||||||
|
CREATE TABLE settings_backup AS
|
||||||
|
SELECT key, val
|
||||||
|
FROM settings;
|
||||||
|
DROP TABLE settings;
|
||||||
|
CREATE TABLE settings
|
||||||
|
(
|
||||||
|
key TEXT PRIMARY KEY,
|
||||||
|
val TEXT NOT NULL
|
||||||
|
);
|
||||||
|
INSERT INTO settings
|
||||||
|
SELECT key, val
|
||||||
|
FROM settings_backup;
|
||||||
|
DROP TABLE settings_backup;
|
74
backend-rust/migrations/006_update_settings_table.up.sql
Normal file
74
backend-rust/migrations/006_update_settings_table.up.sql
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
-- Enhance settings table to support more structured configuration
|
||||||
|
ALTER TABLE settings
|
||||||
|
ADD COLUMN category TEXT DEFAULT 'general';
|
||||||
|
ALTER TABLE settings
|
||||||
|
ADD COLUMN user_id INTEGER DEFAULT 1;
|
||||||
|
ALTER TABLE settings
|
||||||
|
ADD COLUMN updated_at TIMESTAMP DEFAULT (datetime('now'));
|
||||||
|
|
||||||
|
-- Create index for better performance
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_settings_category ON settings (category);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_settings_user_id ON settings (user_id);
|
||||||
|
|
||||||
|
-- Insert default settings based on roadmap configuration
|
||||||
|
INSERT OR IGNORE INTO settings (key, val, category)
|
||||||
|
VALUES
|
||||||
|
-- Display settings
|
||||||
|
('default_view', 'compact', 'display'),
|
||||||
|
('articles_per_page', '50', 'display'),
|
||||||
|
('show_reading_time', '1', 'display'),
|
||||||
|
('show_word_count', '0', 'display'),
|
||||||
|
('highlight_unread', '1', 'display'),
|
||||||
|
('theme', 'auto', 'display'),
|
||||||
|
|
||||||
|
-- Analytics settings
|
||||||
|
('analytics_enabled', '1', 'analytics'),
|
||||||
|
('track_reading_time', '1', 'analytics'),
|
||||||
|
('track_scroll_position', '1', 'analytics'),
|
||||||
|
('retention_days', '365', 'analytics'),
|
||||||
|
('aggregate_older_data', '1', 'analytics'),
|
||||||
|
|
||||||
|
-- Filtering settings
|
||||||
|
('enable_smart_suggestions', '1', 'filtering'),
|
||||||
|
('max_recent_filters', '10', 'filtering'),
|
||||||
|
('auto_save_filters', '1', 'filtering'),
|
||||||
|
('default_sort', 'added_desc', 'filtering'),
|
||||||
|
('enable_geographic_hierarchy', '1', 'filtering'),
|
||||||
|
('auto_migrate_country_filters', '1', 'filtering'),
|
||||||
|
|
||||||
|
-- Sharing settings
|
||||||
|
('default_share_format', 'text', 'sharing'),
|
||||||
|
('include_summary', '1', 'sharing'),
|
||||||
|
('include_tags', '1', 'sharing'),
|
||||||
|
('include_source', '1', 'sharing'),
|
||||||
|
('copy_to_clipboard', '1', 'sharing'),
|
||||||
|
|
||||||
|
-- AI settings
|
||||||
|
('ai_enabled', '1', 'ai'),
|
||||||
|
('ai_provider', 'ollama', 'ai'),
|
||||||
|
('ai_timeout_seconds', '120', 'ai'),
|
||||||
|
('ai_summary_enabled', '1', 'ai'),
|
||||||
|
('ai_summary_temperature', '0.1', 'ai'),
|
||||||
|
('ai_summary_max_tokens', '1000', 'ai'),
|
||||||
|
('ai_tagging_enabled', '1', 'ai'),
|
||||||
|
('ai_tagging_temperature', '0.3', 'ai'),
|
||||||
|
('ai_tagging_max_tokens', '200', 'ai'),
|
||||||
|
('max_tags_per_article', '10', 'ai'),
|
||||||
|
('min_confidence_threshold', '0.7', 'ai'),
|
||||||
|
('enable_geographic_tagging', '1', 'ai'),
|
||||||
|
('enable_category_tagging', '1', 'ai'),
|
||||||
|
('geographic_hierarchy_levels', '3', 'ai'),
|
||||||
|
|
||||||
|
-- Scraping settings
|
||||||
|
('scraping_timeout_seconds', '30', 'scraping'),
|
||||||
|
('scraping_max_retries', '3', 'scraping'),
|
||||||
|
('max_content_length', '50000', 'scraping'),
|
||||||
|
('respect_robots_txt', '1', 'scraping'),
|
||||||
|
('rate_limit_delay_ms', '1000', 'scraping'),
|
||||||
|
|
||||||
|
-- Processing settings
|
||||||
|
('batch_size', '10', 'processing'),
|
||||||
|
('max_concurrent', '5', 'processing'),
|
||||||
|
('retry_attempts', '3', 'processing'),
|
||||||
|
('priority_manual', '1', 'processing'),
|
||||||
|
('auto_mark_read_on_view', '0', 'processing');
|
@@ -0,0 +1,39 @@
|
|||||||
|
-- Remove migrated data (this will remove all articles and tags created from migration)
|
||||||
|
-- WARNING: This will delete all migrated data
|
||||||
|
|
||||||
|
-- Remove legacy migration records
|
||||||
|
DELETE
|
||||||
|
FROM legacy_migration
|
||||||
|
WHERE old_filter_type IN ('country', 'category');
|
||||||
|
|
||||||
|
-- Remove article-tag associations for migrated data (non-AI generated)
|
||||||
|
DELETE
|
||||||
|
FROM article_tags
|
||||||
|
WHERE ai_generated = 0;
|
||||||
|
|
||||||
|
-- Remove migrated geographic tags (only those created from country data)
|
||||||
|
DELETE
|
||||||
|
FROM tags
|
||||||
|
WHERE tags.category = 'geographic'
|
||||||
|
AND EXISTS (SELECT 1 FROM news WHERE news.country = tags.name);
|
||||||
|
|
||||||
|
-- Remove migrated content tags (only those created from category data)
|
||||||
|
DELETE
|
||||||
|
FROM tags
|
||||||
|
WHERE tags.category = 'content'
|
||||||
|
AND EXISTS (SELECT 1 FROM news WHERE news.category = tags.name);
|
||||||
|
|
||||||
|
-- Remove migrated articles (only those that match news entries)
|
||||||
|
DELETE
|
||||||
|
FROM articles
|
||||||
|
WHERE EXISTS (SELECT 1
|
||||||
|
FROM news
|
||||||
|
WHERE news.url = articles.url
|
||||||
|
AND news.title = articles.title
|
||||||
|
AND articles.source_type = 'rss');
|
||||||
|
|
||||||
|
-- Reset tag usage counts
|
||||||
|
UPDATE tags
|
||||||
|
SET usage_count = (SELECT COUNT(*)
|
||||||
|
FROM article_tags
|
||||||
|
WHERE tag_id = tags.id);
|
@@ -0,0 +1,84 @@
|
|||||||
|
|
||||||
|
-- Migrate data from old news table to new articles table
|
||||||
|
INSERT INTO articles (title, url, summary, published_at, added_at, source_type, processing_status)
|
||||||
|
SELECT title,
|
||||||
|
url,
|
||||||
|
summary,
|
||||||
|
published,
|
||||||
|
datetime(created_at, 'unixepoch'),
|
||||||
|
'rss',
|
||||||
|
CASE
|
||||||
|
WHEN summary IS NOT NULL AND summary != '' THEN 'completed'
|
||||||
|
ELSE 'pending'
|
||||||
|
END
|
||||||
|
FROM news;
|
||||||
|
|
||||||
|
-- Create geographic tags from existing country data
|
||||||
|
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
|
||||||
|
SELECT DISTINCT country,
|
||||||
|
'geographic',
|
||||||
|
'Geographic location: ' || country,
|
||||||
|
COUNT(*)
|
||||||
|
FROM news
|
||||||
|
WHERE country IS NOT NULL
|
||||||
|
AND country != ''
|
||||||
|
GROUP BY country;
|
||||||
|
|
||||||
|
-- Link articles to their geographic tags
|
||||||
|
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
|
||||||
|
SELECT a.id,
|
||||||
|
t.id,
|
||||||
|
0, -- Not AI generated, migrated from legacy data
|
||||||
|
1.0 -- Full confidence for existing data
|
||||||
|
FROM articles a
|
||||||
|
JOIN news n ON a.url = n.url AND a.title = n.title
|
||||||
|
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
|
||||||
|
WHERE n.country IS NOT NULL
|
||||||
|
AND n.country != '';
|
||||||
|
|
||||||
|
-- Create category tags if category column exists in news table
|
||||||
|
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
|
||||||
|
SELECT DISTINCT n.category,
|
||||||
|
'content',
|
||||||
|
'Content category: ' || n.category,
|
||||||
|
COUNT(*)
|
||||||
|
FROM news n
|
||||||
|
WHERE n.category IS NOT NULL
|
||||||
|
AND n.category != ''
|
||||||
|
GROUP BY n.category;
|
||||||
|
|
||||||
|
-- Link articles to their category tags
|
||||||
|
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
|
||||||
|
SELECT a.id,
|
||||||
|
t.id,
|
||||||
|
0, -- Not AI generated, migrated from legacy data
|
||||||
|
1.0 -- Full confidence for existing data
|
||||||
|
FROM articles a
|
||||||
|
JOIN news n ON a.url = n.url AND a.title = n.title
|
||||||
|
JOIN tags t ON t.name = n.category AND t.category = 'content'
|
||||||
|
WHERE n.category IS NOT NULL
|
||||||
|
AND n.category != '';
|
||||||
|
|
||||||
|
-- Record migration in legacy_migration table for countries
|
||||||
|
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
|
||||||
|
SELECT 'country',
|
||||||
|
n.country,
|
||||||
|
'[' || GROUP_CONCAT(t.id) || ']'
|
||||||
|
FROM (SELECT DISTINCT country FROM news WHERE country IS NOT NULL AND country != '') n
|
||||||
|
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
|
||||||
|
GROUP BY n.country;
|
||||||
|
|
||||||
|
-- Record migration in legacy_migration table for categories (if they exist)
|
||||||
|
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
|
||||||
|
SELECT 'category',
|
||||||
|
n.category,
|
||||||
|
'[' || GROUP_CONCAT(t.id) || ']'
|
||||||
|
FROM (SELECT DISTINCT category FROM news WHERE category IS NOT NULL AND category != '') n
|
||||||
|
JOIN tags t ON t.name = n.category AND t.category = 'content'
|
||||||
|
GROUP BY n.category;
|
||||||
|
|
||||||
|
-- Update tag usage counts
|
||||||
|
UPDATE tags
|
||||||
|
SET usage_count = (SELECT COUNT(*)
|
||||||
|
FROM article_tags
|
||||||
|
WHERE tag_id = tags.id);
|
@@ -0,0 +1,4 @@
|
|||||||
|
-- Remove default sharing templates
|
||||||
|
DELETE
|
||||||
|
FROM share_templates
|
||||||
|
WHERE name IN ('Default Text', 'Markdown', 'Simple Text', 'HTML Email');
|
@@ -0,0 +1,39 @@
|
|||||||
|
-- Insert default sharing templates
|
||||||
|
INSERT INTO share_templates (name, format, template_content, is_default)
|
||||||
|
VALUES ('Default Text', 'text', '📰 {title}
|
||||||
|
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
🏷️ Tags: {tags}
|
||||||
|
🌍 Location: {geographic_tags}
|
||||||
|
🔗 Source: {url}
|
||||||
|
📅 Published: {published_at}
|
||||||
|
|
||||||
|
Shared via Owly News Summariser', 1),
|
||||||
|
|
||||||
|
('Markdown', 'markdown', '# {title}
|
||||||
|
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
**Tags:** {tags}
|
||||||
|
**Location:** {geographic_tags}
|
||||||
|
**Source:** [{url}]({url})
|
||||||
|
**Published:** {published_at}
|
||||||
|
|
||||||
|
---
|
||||||
|
*Shared via Owly News Summariser*', 1),
|
||||||
|
|
||||||
|
('Simple Text', 'text', '{title}
|
||||||
|
|
||||||
|
{summary}
|
||||||
|
|
||||||
|
Source: {url}', 0),
|
||||||
|
|
||||||
|
('HTML Email', 'html', '<h2>{title}</h2>
|
||||||
|
<p>{summary}</p>
|
||||||
|
<p><strong>Tags:</strong> {tags}<br>
|
||||||
|
<strong>Location:</strong> {geographic_tags}<br>
|
||||||
|
<strong>Source:</strong> <a href="{url}">{url}</a><br>
|
||||||
|
<strong>Published:</strong> {published_at}</p>
|
||||||
|
<hr>
|
||||||
|
<small>Shared via Owly News Summariser</small>', 0);
|
@@ -8,11 +8,11 @@ MIN_CRON_HOURS = float(os.getenv("MIN_CRON_HOURS", 0.5))
|
|||||||
DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
|
DEFAULT_CRON_HOURS = float(os.getenv("CRON_HOURS", MIN_CRON_HOURS))
|
||||||
CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
|
CRON_HOURS = max(MIN_CRON_HOURS, DEFAULT_CRON_HOURS)
|
||||||
SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
|
SYNC_COOLDOWN_MINUTES = int(os.getenv("SYNC_COOLDOWN_MINUTES", 30))
|
||||||
LLM_MODEL = os.getenv("LLM_MODEL", "mistral-nemo:12b")
|
LLM_MODEL = os.getenv("LLM_MODEL", "gemma2:9b")
|
||||||
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
|
LLM_TIMEOUT_SECONDS = int(os.getenv("LLM_TIMEOUT_SECONDS", 180))
|
||||||
OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
|
OLLAMA_API_TIMEOUT_SECONDS = int(os.getenv("OLLAMA_API_TIMEOUT_SECONDS", 10))
|
||||||
ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
|
ARTICLE_FETCH_TIMEOUT = int(os.getenv("ARTICLE_FETCH_TIMEOUT", 30))
|
||||||
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 10_000))
|
MAX_ARTICLE_LENGTH = int(os.getenv("MAX_ARTICLE_LENGTH", 40_000))
|
||||||
|
|
||||||
frontend_path = os.path.join(
|
frontend_path = os.path.join(
|
||||||
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
|
||||||
@@ -21,7 +21,7 @@ frontend_path = os.path.join(
|
|||||||
)
|
)
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.WARNING,
|
level=logging.DEBUG,
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
)
|
)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@@ -150,8 +150,6 @@ async def get_news(
|
|||||||
where_conditions.append("published BETWEEN ? AND ?")
|
where_conditions.append("published BETWEEN ? AND ?")
|
||||||
params.extend([from_ts, to_ts])
|
params.extend([from_ts, to_ts])
|
||||||
|
|
||||||
logger.info(f"Date range: {from_date} to {to_date} (UTC timestamps: {from_ts} to {to_ts})")
|
|
||||||
|
|
||||||
# Build the complete SQL query
|
# Build the complete SQL query
|
||||||
base_sql = """
|
base_sql = """
|
||||||
SELECT id, title, summary, url, published, country, created_at
|
SELECT id, title, summary, url, published, country, created_at
|
||||||
@@ -163,27 +161,13 @@ async def get_news(
|
|||||||
else:
|
else:
|
||||||
sql = base_sql
|
sql = base_sql
|
||||||
|
|
||||||
sql += " ORDER BY published DESC LIMIT 1000"
|
sql += " ORDER BY published DESC"
|
||||||
|
|
||||||
# Log query info
|
|
||||||
if all_countries and all_dates:
|
|
||||||
logger.info("Querying ALL news articles (no filters)")
|
|
||||||
elif all_countries:
|
|
||||||
logger.info(f"Querying news from ALL countries with date filter")
|
|
||||||
elif all_dates:
|
|
||||||
logger.info(f"Querying ALL dates for countries: {country}")
|
|
||||||
else:
|
|
||||||
logger.info(f"Querying news: countries={country}, timezone={timezone_name}")
|
|
||||||
|
|
||||||
logger.info(f"SQL: {sql}")
|
|
||||||
logger.info(f"Parameters: {params}")
|
|
||||||
|
|
||||||
# Execute the query
|
# Execute the query
|
||||||
db.execute(sql, params)
|
db.execute(sql, params)
|
||||||
rows = db.fetchall()
|
rows = db.fetchall()
|
||||||
result = [dict(row) for row in rows]
|
result = [dict(row) for row in rows]
|
||||||
|
|
||||||
logger.info(f"Found {len(result)} news articles")
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
@@ -124,7 +124,6 @@ class NewsFetcher:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def build_prompt(
|
def build_prompt(
|
||||||
url: str,
|
|
||||||
title: str = "",
|
title: str = "",
|
||||||
summary: str = "",
|
summary: str = "",
|
||||||
content: str = "") -> str:
|
content: str = "") -> str:
|
||||||
@@ -132,14 +131,13 @@ class NewsFetcher:
|
|||||||
Generate a prompt for the LLM to summarize an article.
|
Generate a prompt for the LLM to summarize an article.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
url: Public URL of the article to summarize
|
|
||||||
title: Article title from RSS feed (optional)
|
title: Article title from RSS feed (optional)
|
||||||
summary: Article summary from RSS feed (optional)
|
summary: Article summary from RSS feed (optional)
|
||||||
content: Extracted article content (optional)
|
content: Extracted article content (optional)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A formatted prompt string that instructs the LLM to generate
|
A formatted prompt string that instructs the LLM to generate
|
||||||
a JSON response with title and summaries in German and English
|
a JSON response with title, summary and tags in German
|
||||||
"""
|
"""
|
||||||
context_info = []
|
context_info = []
|
||||||
if title:
|
if title:
|
||||||
@@ -155,21 +153,36 @@ class NewsFetcher:
|
|||||||
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
|
context_info) if context_info else "Keine zusätzlichen Informationen verfügbar."
|
||||||
|
|
||||||
return (
|
return (
|
||||||
"### Aufgabe\n"
|
"### Vorliegende Informationen\n"
|
||||||
f"Du sollst eine Nachricht basierend auf der URL und den verfügbaren Informationen zusammenfassen.\n"
|
f"{context}\n\n"
|
||||||
f"URL: {url}\n"
|
"### Längenbegrenzungen\n"
|
||||||
f"Verfügbare Informationen:\n{context}\n\n"
|
"title: Format \"ORT: Titel\", max 100 Zeichen\n"
|
||||||
|
"location: nur der ORT-Teil, max 40 Zeichen\n"
|
||||||
|
"summary: 100–160 Wörter\n"
|
||||||
|
"tags: bis zu 6 Schlüsselwörter, durch Komma getrennt, alles Kleinbuchstaben.\n\n"
|
||||||
"### Regeln\n"
|
"### Regeln\n"
|
||||||
"1. Nutze VORRANGIG den Artikel-Inhalt falls verfügbar, ergänze mit RSS-Informationen\n"
|
"1. Nutze ausschließlich Informationen, die im bereitgestellten Material eindeutig vorkommen. Externes Wissen ist untersagt.\n"
|
||||||
"2. Falls kein Artikel-Inhalt verfügbar ist, nutze RSS-Titel und -Beschreibung\n"
|
"2. Liegt sowohl Artikel-Text als auch RSS-Metadaten vor, hat der Artikel-Text Vorrang; verwende RSS nur ergänzend.\n"
|
||||||
"3. Falls keine ausreichenden Informationen vorliegen, erstelle eine plausible Zusammenfassung basierend auf der URL\n"
|
"3. Liegt nur RSS-Titel und/oder -Beschreibung vor, stütze dich ausschließlich darauf.\n"
|
||||||
"4. Gib ausschließlich **gültiges minifiziertes JSON** zurück – kein Markdown, keine Kommentare\n"
|
"4. Sind die Informationen unzureichend, gib exakt {\"location\":\"\",\"title\":\"\",\"summary\":\"\",\"tags\":\"\"} zurück.\n"
|
||||||
"5. Struktur: {\"title\":\"…\",\"summary\":\"…\"}\n"
|
"5. Gib nur gültiges, minifiziertes JSON zurück – keine Zeilenumbrüche, kein Markdown, keine Kommentare.\n"
|
||||||
"6. title: Aussagekräftiger deutscher Titel (max 100 Zeichen)\n"
|
"6. Verwende keine hypothetischen Formulierungen (\"könnte\", \"möglicherweise\" etc.).\n"
|
||||||
"7. summary: Deutsche Zusammenfassung (zwischen 100 und 160 Wörter)\n"
|
"7. Wörtliche Zitate dürfen höchstens 15 % des Summary-Texts ausmachen.\n"
|
||||||
"8. Kein Text vor oder nach dem JSON\n\n"
|
"8. Kein Text vor oder nach dem JSON.\n\n"
|
||||||
"### Ausgabe\n"
|
"### Ausgabe\n"
|
||||||
"Jetzt antworte mit dem JSON:"
|
"Antworte jetzt ausschließlich mit dem JSON:\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def build_system_prompt():
|
||||||
|
return (
|
||||||
|
"Du bist ein hochpräziser JSON-Summarizer und Experte für die Zusammenfassung von Artikeln.\n\n"
|
||||||
|
"### Vorgehen\n"
|
||||||
|
"Schritt 1: Identifiziere Hauptthema und Zweck.\n"
|
||||||
|
"Schritt 2: Extrahiere die wichtigsten Fakten und Ergebnisse.\n"
|
||||||
|
"Schritt 3: Erkenne die zentralen Argumente und Standpunkte.\n"
|
||||||
|
"Schritt 4: Ordne die Informationen nach Wichtigkeit.\n"
|
||||||
|
"Schritt 5: Erstelle eine prägnante, klare und sachliche Zusammenfassung.\n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -193,26 +206,55 @@ class NewsFetcher:
|
|||||||
A dictionary containing the article title and summaries in German and English,
|
A dictionary containing the article title and summaries in German and English,
|
||||||
or None if summarization failed
|
or None if summarization failed
|
||||||
"""
|
"""
|
||||||
|
logger.debug("[AI] Fetching article content from: " + url)
|
||||||
|
|
||||||
article_content = await NewsFetcher.fetch_article_content(client, url)
|
article_content = await NewsFetcher.fetch_article_content(client, url)
|
||||||
|
|
||||||
if not article_content:
|
if not article_content:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"⚠️ Could not fetch article content, using RSS data only")
|
f"⚠️ Could not fetch article content, using RSS data only")
|
||||||
|
|
||||||
prompt = NewsFetcher.build_prompt(
|
prompt = NewsFetcher.build_prompt(title, summary, article_content)
|
||||||
url, title, summary, article_content)
|
system_prompt = NewsFetcher.build_system_prompt()
|
||||||
payload = {
|
payload = {
|
||||||
"model": LLM_MODEL,
|
"model": LLM_MODEL,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
|
"system": system_prompt,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"format": "json",
|
"format": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"title": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"location": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"summary": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"tags": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"title",
|
||||||
|
"summary",
|
||||||
|
"tags"
|
||||||
|
]
|
||||||
|
},
|
||||||
"options": {
|
"options": {
|
||||||
"num_gpu": 1, # Force GPU usage
|
"num_gpu": 1, # Force GPU usage
|
||||||
"num_ctx": 128_000, # Context size
|
"num_ctx": 8192, # Context size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.debug("[AI] Running summary generation...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await client.post(
|
response = await client.post(
|
||||||
f"{OLLAMA_HOST}/api/generate",
|
f"{OLLAMA_HOST}/api/generate",
|
||||||
@@ -224,6 +266,8 @@ class NewsFetcher:
|
|||||||
result = response.json()
|
result = response.json()
|
||||||
llm_response = result["response"]
|
llm_response = result["response"]
|
||||||
|
|
||||||
|
logger.debug("[AI] " + llm_response)
|
||||||
|
|
||||||
if isinstance(llm_response, str):
|
if isinstance(llm_response, str):
|
||||||
summary_data = json.loads(llm_response)
|
summary_data = json.loads(llm_response)
|
||||||
else:
|
else:
|
||||||
@@ -388,8 +432,6 @@ class NewsFetcher:
|
|||||||
summary=rss_summary
|
summary=rss_summary
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(summary)
|
|
||||||
|
|
||||||
if not summary:
|
if not summary:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"❌ Failed to get summary for article {i}: {article_url}")
|
f"❌ Failed to get summary for article {i}: {article_url}")
|
||||||
@@ -403,7 +445,8 @@ class NewsFetcher:
|
|||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
INSERT
|
INSERT
|
||||||
OR IGNORE INTO news
|
OR IGNORE
|
||||||
|
INTO news
|
||||||
(title, summary, url, published, country)
|
(title, summary, url, published, country)
|
||||||
VALUES (?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?)
|
||||||
""",
|
""",
|
||||||
|
@@ -11,10 +11,12 @@ MIN_CRON_HOURS=0.5
|
|||||||
SYNC_COOLDOWN_MINUTES=30
|
SYNC_COOLDOWN_MINUTES=30
|
||||||
|
|
||||||
# LLM model to use for summarization
|
# LLM model to use for summarization
|
||||||
LLM_MODEL=qwen2:7b-instruct-q4_K_M
|
LLM_MODEL=qwen2:7b-instruct-q4_K_M # ca 7-9GB (typisch 8GB)
|
||||||
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0
|
LLM_MODEL=phi3:3.8b-mini-128k-instruct-q4_0 # ca 6-8GB (langer kontext)
|
||||||
LLM_MODEL=mistral-nemo:12b
|
LLM_MODEL=mistral-nemo:12b # ca 16-24+GB
|
||||||
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S
|
LLM_MODEL=cnjack/mistral-samll-3.1:24b-it-q4_K_S # ca 22GB
|
||||||
|
LLM_MODEL=yarn-mistral:7b-64k-q4_K_M # ca 11GB
|
||||||
|
LLM_MODEL=gemma2:9b # ca 8GB
|
||||||
|
|
||||||
# Timeout in seconds for LLM requests
|
# Timeout in seconds for LLM requests
|
||||||
LLM_TIMEOUT_SECONDS=180
|
LLM_TIMEOUT_SECONDS=180
|
||||||
|
Binary file not shown.
@@ -1,8 +0,0 @@
|
|||||||
import { defineConfig } from 'cypress'
|
|
||||||
|
|
||||||
export default defineConfig({
|
|
||||||
e2e: {
|
|
||||||
specPattern: 'cypress/e2e/**/*.{cy,spec}.{js,jsx,ts,tsx}',
|
|
||||||
baseUrl: 'http://localhost:4173',
|
|
||||||
},
|
|
||||||
})
|
|
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"name": "owly-news-summariser",
|
"name": "owly-news",
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"license": "PolyForm-Noncommercial-1.0.0",
|
"license": "PolyForm-Noncommercial-1.0.0",
|
||||||
|
@@ -14,9 +14,10 @@
|
|||||||
|
|
||||||
<!-- Articles Grid -->
|
<!-- Articles Grid -->
|
||||||
<div v-else class="grid gap-4 sm:gap-6 md:grid-cols-2 xl:grid-cols-3">
|
<div v-else class="grid gap-4 sm:gap-6 md:grid-cols-2 xl:grid-cols-3">
|
||||||
|
<template v-for="article in news.articles"
|
||||||
|
:key="article.id">
|
||||||
<article
|
<article
|
||||||
v-for="article in news.articles"
|
v-if="isValidArticleContent(article)"
|
||||||
:key="article.id"
|
|
||||||
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
|
class="flex flex-col h-full bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 hover:shadow-md dark:hover:shadow-lg dark:hover:shadow-gray-800/50 transition-all duration-200 overflow-hidden group"
|
||||||
>
|
>
|
||||||
<!-- Article Header -->
|
<!-- Article Header -->
|
||||||
@@ -28,7 +29,7 @@
|
|||||||
</span>
|
</span>
|
||||||
<time
|
<time
|
||||||
:datetime="new Date(article.published * 1000).toISOString()"
|
:datetime="new Date(article.published * 1000).toISOString()"
|
||||||
:title="new Date(article.published * 1000).toLocaleString(userLocale.value, {
|
:title="new Date(article.published * 1000).toLocaleString(userLocale, {
|
||||||
dateStyle: 'full',
|
dateStyle: 'full',
|
||||||
timeStyle: 'long'
|
timeStyle: 'long'
|
||||||
})"
|
})"
|
||||||
@@ -83,6 +84,7 @@
|
|||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
|
</template>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Loading State & Load More Trigger -->
|
<!-- Loading State & Load More Trigger -->
|
||||||
@@ -129,6 +131,37 @@ const loadMoreArticles = async () => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
interface Article {
|
||||||
|
id: number;
|
||||||
|
title: string;
|
||||||
|
summary: string;
|
||||||
|
url: string;
|
||||||
|
published: number;
|
||||||
|
country: string;
|
||||||
|
created_at: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
const INVALID_MARKERS = ['---', '...', '…', 'Title', 'Summary', 'Titel', 'Zusammenfassung'] as const;
|
||||||
|
const REQUIRED_TEXT_FIELDS = ['title', 'summary', 'url'] as const;
|
||||||
|
|
||||||
|
const isValidArticleContent = (article: Article): boolean => {
|
||||||
|
const hasEmptyRequiredFields = REQUIRED_TEXT_FIELDS.some(
|
||||||
|
field => article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].length === 0
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasEmptyRequiredFields) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasInvalidMarkers = REQUIRED_TEXT_FIELDS.some(field =>
|
||||||
|
INVALID_MARKERS.some(marker =>
|
||||||
|
article[field as keyof Pick<Article, typeof REQUIRED_TEXT_FIELDS[number]>].includes(marker)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
return !hasInvalidMarkers;
|
||||||
|
};
|
||||||
|
|
||||||
const observer = ref<IntersectionObserver | null>(null);
|
const observer = ref<IntersectionObserver | null>(null);
|
||||||
const loadMoreTrigger = ref<HTMLElement | null>(null);
|
const loadMoreTrigger = ref<HTMLElement | null>(null);
|
||||||
|
|
||||||
|
@@ -6470,9 +6470,9 @@ __metadata:
|
|||||||
languageName: node
|
languageName: node
|
||||||
linkType: hard
|
linkType: hard
|
||||||
|
|
||||||
"owly-news-summariser@workspace:.":
|
"owly-news@workspace:.":
|
||||||
version: 0.0.0-use.local
|
version: 0.0.0-use.local
|
||||||
resolution: "owly-news-summariser@workspace:."
|
resolution: "owly-news@workspace:."
|
||||||
dependencies:
|
dependencies:
|
||||||
"@tailwindcss/vite": "npm:^4.1.11"
|
"@tailwindcss/vite": "npm:^4.1.11"
|
||||||
"@tsconfig/node22": "npm:^22.0.2"
|
"@tsconfig/node22": "npm:^22.0.2"
|
||||||
|
Reference in New Issue
Block a user