[update] added migration scripts for migrating news data to articles, geographic and category tagging, and default sharing templates

This commit is contained in:
2025-08-06 16:39:49 +02:00
parent 0aa8d9fa3a
commit c3b0c87bfa
12 changed files with 393 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
-- Drop articles table and its indexes
DROP INDEX IF EXISTS idx_articles_read_at;
DROP INDEX IF EXISTS idx_articles_source_type;
DROP INDEX IF EXISTS idx_articles_processing_status;
DROP INDEX IF EXISTS idx_articles_added_at;
DROP INDEX IF EXISTS idx_articles_published_at;
DROP TABLE IF EXISTS articles;

View File

@@ -0,0 +1,27 @@
-- Create enhanced articles table to replace news table structure
CREATE TABLE IF NOT EXISTS articles
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT NOT NULL,
source_type TEXT NOT NULL DEFAULT 'rss', -- 'rss', 'manual'
rss_content TEXT, -- RSS description/excerpt
full_content TEXT, -- Scraped full content
summary TEXT, -- AI-generated summary
processing_status TEXT NOT NULL DEFAULT 'pending', -- 'pending', 'processing', 'completed', 'failed'
published_at TIMESTAMP NOT NULL,
added_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
read_at TIMESTAMP,
read_count INTEGER NOT NULL DEFAULT 0,
reading_time INTEGER, -- in seconds
ai_enabled BOOLEAN NOT NULL DEFAULT 1,
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
updated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create indexes for performance
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles (published_at);
CREATE INDEX IF NOT EXISTS idx_articles_added_at ON articles (added_at);
CREATE INDEX IF NOT EXISTS idx_articles_processing_status ON articles (processing_status);
CREATE INDEX IF NOT EXISTS idx_articles_source_type ON articles (source_type);
CREATE INDEX IF NOT EXISTS idx_articles_read_at ON articles (read_at);

View File

@@ -0,0 +1,9 @@
-- Drop tag system tables and indexes
DROP INDEX IF EXISTS idx_article_tags_ai_generated;
DROP INDEX IF EXISTS idx_article_tags_tag_id;
DROP INDEX IF EXISTS idx_article_tags_article_id;
DROP INDEX IF EXISTS idx_tags_usage_count;
DROP INDEX IF EXISTS idx_tags_parent_id;
DROP INDEX IF EXISTS idx_tags_category;
DROP TABLE IF EXISTS article_tags;
DROP TABLE IF EXISTS tags;

View File

@@ -0,0 +1,31 @@
-- Create tags table with hierarchical support
CREATE TABLE IF NOT EXISTS tags
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
category TEXT NOT NULL, -- 'geographic', 'content', 'source', 'custom'
description TEXT,
color TEXT, -- Hex color for UI display
usage_count INTEGER NOT NULL DEFAULT 0,
parent_id INTEGER REFERENCES tags (id), -- For hierarchical tags (e.g., Country -> Region -> City)
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create article_tags junction table
CREATE TABLE IF NOT EXISTS article_tags
(
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
tag_id INTEGER NOT NULL REFERENCES tags (id) ON DELETE CASCADE,
confidence_score REAL DEFAULT 1.0, -- AI confidence (0.0-1.0)
ai_generated BOOLEAN NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (article_id, tag_id)
);
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags (category);
CREATE INDEX IF NOT EXISTS idx_tags_parent_id ON tags (parent_id);
CREATE INDEX IF NOT EXISTS idx_tags_usage_count ON tags (usage_count DESC);
CREATE INDEX IF NOT EXISTS idx_article_tags_article_id ON article_tags (article_id);
CREATE INDEX IF NOT EXISTS idx_article_tags_tag_id ON article_tags (tag_id);
CREATE INDEX IF NOT EXISTS idx_article_tags_ai_generated ON article_tags (ai_generated);

View File

@@ -0,0 +1,11 @@
-- Drop analytics system tables and indexes
DROP INDEX IF EXISTS idx_legacy_migration_old_filter_type;
DROP INDEX IF EXISTS idx_share_templates_format;
DROP INDEX IF EXISTS idx_filter_presets_user_id;
DROP INDEX IF EXISTS idx_reading_stats_read_at;
DROP INDEX IF EXISTS idx_reading_stats_article_id;
DROP INDEX IF EXISTS idx_reading_stats_user_id;
DROP TABLE IF EXISTS legacy_migration;
DROP TABLE IF EXISTS share_templates;
DROP TABLE IF EXISTS filter_presets;
DROP TABLE IF EXISTS reading_stats;

View File

@@ -0,0 +1,50 @@
-- Create reading statistics table
CREATE TABLE IF NOT EXISTS reading_stats
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER DEFAULT 1, -- For future multi-user support
article_id INTEGER NOT NULL REFERENCES articles (id) ON DELETE CASCADE,
read_at TIMESTAMP NOT NULL,
reading_time INTEGER, -- in seconds
completion_rate REAL DEFAULT 1.0, -- 0.0-1.0, how much of the article was read
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create filter presets table
CREATE TABLE IF NOT EXISTS filter_presets
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
filter_criteria TEXT NOT NULL, -- JSON string of filter parameters
user_id INTEGER DEFAULT 1, -- For future multi-user support
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create share templates table
CREATE TABLE IF NOT EXISTS share_templates
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
format TEXT NOT NULL, -- 'text', 'markdown', 'html', 'json'
template_content TEXT NOT NULL,
is_default BOOLEAN NOT NULL DEFAULT 0,
created_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create legacy migration tracking table
CREATE TABLE IF NOT EXISTS legacy_migration
(
id INTEGER PRIMARY KEY AUTOINCREMENT,
old_filter_type TEXT NOT NULL, -- 'country', 'category', etc.
old_value TEXT NOT NULL,
new_tag_ids TEXT, -- JSON array of tag IDs
migrated_at TIMESTAMP NOT NULL DEFAULT (datetime('now'))
);
-- Create indexes
CREATE INDEX IF NOT EXISTS idx_reading_stats_user_id ON reading_stats (user_id);
CREATE INDEX IF NOT EXISTS idx_reading_stats_article_id ON reading_stats (article_id);
CREATE INDEX IF NOT EXISTS idx_reading_stats_read_at ON reading_stats (read_at);
CREATE INDEX IF NOT EXISTS idx_filter_presets_user_id ON filter_presets (user_id);
CREATE INDEX IF NOT EXISTS idx_share_templates_format ON share_templates (format);
CREATE INDEX IF NOT EXISTS idx_legacy_migration_old_filter_type ON legacy_migration (old_filter_type);

View File

@@ -0,0 +1,18 @@
-- Remove enhanced settings columns and indexes
DROP INDEX IF EXISTS idx_settings_user_id;
DROP INDEX IF EXISTS idx_settings_category;
-- Note: SQLite doesn't support DROP COLUMN, so we recreate the table
CREATE TABLE settings_backup AS
SELECT key, val
FROM settings;
DROP TABLE settings;
CREATE TABLE settings
(
key TEXT PRIMARY KEY,
val TEXT NOT NULL
);
INSERT INTO settings
SELECT key, val
FROM settings_backup;
DROP TABLE settings_backup;

View File

@@ -0,0 +1,74 @@
-- Enhance settings table to support more structured configuration
ALTER TABLE settings
ADD COLUMN category TEXT DEFAULT 'general';
ALTER TABLE settings
ADD COLUMN user_id INTEGER DEFAULT 1;
ALTER TABLE settings
ADD COLUMN updated_at TIMESTAMP DEFAULT (datetime('now'));
-- Create index for better performance
CREATE INDEX IF NOT EXISTS idx_settings_category ON settings (category);
CREATE INDEX IF NOT EXISTS idx_settings_user_id ON settings (user_id);
-- Insert default settings based on roadmap configuration
INSERT OR IGNORE INTO settings (key, val, category)
VALUES
-- Display settings
('default_view', 'compact', 'display'),
('articles_per_page', '50', 'display'),
('show_reading_time', '1', 'display'),
('show_word_count', '0', 'display'),
('highlight_unread', '1', 'display'),
('theme', 'auto', 'display'),
-- Analytics settings
('analytics_enabled', '1', 'analytics'),
('track_reading_time', '1', 'analytics'),
('track_scroll_position', '1', 'analytics'),
('retention_days', '365', 'analytics'),
('aggregate_older_data', '1', 'analytics'),
-- Filtering settings
('enable_smart_suggestions', '1', 'filtering'),
('max_recent_filters', '10', 'filtering'),
('auto_save_filters', '1', 'filtering'),
('default_sort', 'added_desc', 'filtering'),
('enable_geographic_hierarchy', '1', 'filtering'),
('auto_migrate_country_filters', '1', 'filtering'),
-- Sharing settings
('default_share_format', 'text', 'sharing'),
('include_summary', '1', 'sharing'),
('include_tags', '1', 'sharing'),
('include_source', '1', 'sharing'),
('copy_to_clipboard', '1', 'sharing'),
-- AI settings
('ai_enabled', '1', 'ai'),
('ai_provider', 'ollama', 'ai'),
('ai_timeout_seconds', '120', 'ai'),
('ai_summary_enabled', '1', 'ai'),
('ai_summary_temperature', '0.1', 'ai'),
('ai_summary_max_tokens', '1000', 'ai'),
('ai_tagging_enabled', '1', 'ai'),
('ai_tagging_temperature', '0.3', 'ai'),
('ai_tagging_max_tokens', '200', 'ai'),
('max_tags_per_article', '10', 'ai'),
('min_confidence_threshold', '0.7', 'ai'),
('enable_geographic_tagging', '1', 'ai'),
('enable_category_tagging', '1', 'ai'),
('geographic_hierarchy_levels', '3', 'ai'),
-- Scraping settings
('scraping_timeout_seconds', '30', 'scraping'),
('scraping_max_retries', '3', 'scraping'),
('max_content_length', '50000', 'scraping'),
('respect_robots_txt', '1', 'scraping'),
('rate_limit_delay_ms', '1000', 'scraping'),
-- Processing settings
('batch_size', '10', 'processing'),
('max_concurrent', '5', 'processing'),
('retry_attempts', '3', 'processing'),
('priority_manual', '1', 'processing'),
('auto_mark_read_on_view', '0', 'processing');

View File

@@ -0,0 +1,39 @@
-- Remove migrated data (this will remove all articles and tags created from migration)
-- WARNING: This will delete all migrated data
-- Remove legacy migration records
DELETE
FROM legacy_migration
WHERE old_filter_type IN ('country', 'category');
-- Remove article-tag associations for migrated data (non-AI generated)
DELETE
FROM article_tags
WHERE ai_generated = 0;
-- Remove migrated geographic tags (only those created from country data)
DELETE
FROM tags
WHERE tags.category = 'geographic'
AND EXISTS (SELECT 1 FROM news WHERE news.country = tags.name);
-- Remove migrated content tags (only those created from category data)
DELETE
FROM tags
WHERE tags.category = 'content'
AND EXISTS (SELECT 1 FROM news WHERE news.category = tags.name);
-- Remove migrated articles (only those that match news entries)
DELETE
FROM articles
WHERE EXISTS (SELECT 1
FROM news
WHERE news.url = articles.url
AND news.title = articles.title
AND articles.source_type = 'rss');
-- Reset tag usage counts
UPDATE tags
SET usage_count = (SELECT COUNT(*)
FROM article_tags
WHERE tag_id = tags.id);

View File

@@ -0,0 +1,84 @@
-- Migrate data from old news table to new articles table
INSERT INTO articles (title, url, summary, published_at, added_at, source_type, processing_status)
SELECT title,
url,
summary,
published,
datetime(created_at, 'unixepoch'),
'rss',
CASE
WHEN summary IS NOT NULL AND summary != '' THEN 'completed'
ELSE 'pending'
END
FROM news;
-- Create geographic tags from existing country data
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
SELECT DISTINCT country,
'geographic',
'Geographic location: ' || country,
COUNT(*)
FROM news
WHERE country IS NOT NULL
AND country != ''
GROUP BY country;
-- Link articles to their geographic tags
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
SELECT a.id,
t.id,
0, -- Not AI generated, migrated from legacy data
1.0 -- Full confidence for existing data
FROM articles a
JOIN news n ON a.url = n.url AND a.title = n.title
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
WHERE n.country IS NOT NULL
AND n.country != '';
-- Create category tags if category column exists in news table
INSERT OR IGNORE INTO tags (name, category, description, usage_count)
SELECT DISTINCT n.category,
'content',
'Content category: ' || n.category,
COUNT(*)
FROM news n
WHERE n.category IS NOT NULL
AND n.category != ''
GROUP BY n.category;
-- Link articles to their category tags
INSERT OR IGNORE INTO article_tags (article_id, tag_id, ai_generated, confidence_score)
SELECT a.id,
t.id,
0, -- Not AI generated, migrated from legacy data
1.0 -- Full confidence for existing data
FROM articles a
JOIN news n ON a.url = n.url AND a.title = n.title
JOIN tags t ON t.name = n.category AND t.category = 'content'
WHERE n.category IS NOT NULL
AND n.category != '';
-- Record migration in legacy_migration table for countries
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
SELECT 'country',
n.country,
'[' || GROUP_CONCAT(t.id) || ']'
FROM (SELECT DISTINCT country FROM news WHERE country IS NOT NULL AND country != '') n
JOIN tags t ON t.name = n.country AND t.category = 'geographic'
GROUP BY n.country;
-- Record migration in legacy_migration table for categories (if they exist)
INSERT INTO legacy_migration (old_filter_type, old_value, new_tag_ids)
SELECT 'category',
n.category,
'[' || GROUP_CONCAT(t.id) || ']'
FROM (SELECT DISTINCT category FROM news WHERE category IS NOT NULL AND category != '') n
JOIN tags t ON t.name = n.category AND t.category = 'content'
GROUP BY n.category;
-- Update tag usage counts
UPDATE tags
SET usage_count = (SELECT COUNT(*)
FROM article_tags
WHERE tag_id = tags.id);

View File

@@ -0,0 +1,4 @@
-- Remove default sharing templates
DELETE
FROM share_templates
WHERE name IN ('Default Text', 'Markdown', 'Simple Text', 'HTML Email');

View File

@@ -0,0 +1,39 @@
-- Insert default sharing templates
INSERT INTO share_templates (name, format, template_content, is_default)
VALUES ('Default Text', 'text', '📰 {title}
{summary}
🏷️ Tags: {tags}
🌍 Location: {geographic_tags}
🔗 Source: {url}
📅 Published: {published_at}
Shared via Owly News Summariser', 1),
('Markdown', 'markdown', '# {title}
{summary}
**Tags:** {tags}
**Location:** {geographic_tags}
**Source:** [{url}]({url})
**Published:** {published_at}
---
*Shared via Owly News Summariser*', 1),
('Simple Text', 'text', '{title}
{summary}
Source: {url}', 0),
('HTML Email', 'html', '<h2>{title}</h2>
<p>{summary}</p>
<p><strong>Tags:</strong> {tags}<br>
<strong>Location:</strong> {geographic_tags}<br>
<strong>Source:</strong> <a href="{url}">{url}</a><br>
<strong>Published:</strong> {published_at}</p>
<hr>
<small>Shared via Owly News Summariser</small>', 0);