diff --git a/backend-rust/src/config.rs b/backend-rust/src/config.rs index 9f1ff6b..2684b30 100644 --- a/backend-rust/src/config.rs +++ b/backend-rust/src/config.rs @@ -5,18 +5,58 @@ use std::path::PathBuf; #[derive(Deserialize, Serialize, Debug, Clone)] #[serde(default)] pub struct ConfigFile { + #[serde(default)] pub server: Server, + #[serde(default)] pub display: ConfDisplay, + #[serde(default)] pub analytics: Analytics, + #[serde(default)] pub filtering: Filtering, + #[serde(default)] pub sharing: Sharing, + #[serde(default)] pub ai: Ai, + #[serde(default)] pub scraping: Scraping, + #[serde(default)] pub processing: Processing, + #[serde(default)] pub migration: ConfMigration, + #[serde(default)] pub cli: Cli, } +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct ContentQuality { + pub min_content_length: u32, + pub max_content_length: u32, + pub min_text_html_ratio: f32, + pub readability_threshold: f32, +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct DuplicateDetection { + pub enabled: bool, + pub title_similarity_threshold: f32, + pub content_similarity_threshold: f32, + pub check_historical_days: u32, + pub store_fingerprints: bool, +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct AdPrevention { + pub enabled: bool, + pub block_iframes: bool, + pub clean_content: bool, + pub ad_patterns: Vec, + pub preserved_elements: Vec, + pub removed_elements: Vec, +} + #[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub enum DefaultView { Compact, @@ -74,122 +114,457 @@ pub enum DefaultOutput { } #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct Server { + #[serde(default = "Server::default_host")] pub host: String, + #[serde(default = "Server::default_port")] pub port: u16, } +impl Server { + fn default_host() -> String { + Server::default().host + } + fn default_port() -> u16 { + Server::default().port + } +} #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct ConfDisplay { + #[serde(default)] pub default_view: DefaultView, + #[serde(default = "ConfDisplay::default_articles_per_page")] pub articles_per_page: u32, + #[serde(default = "ConfDisplay::default_show_reading_time")] pub show_reading_time: bool, + #[serde(default = "ConfDisplay::default_show_word_count")] pub show_word_count: bool, + #[serde(default = "ConfDisplay::default_highlight_unread")] pub highlight_unread: bool, + #[serde(default)] pub theme: Theme, } +impl ConfDisplay { + fn default_articles_per_page() -> u32 { + ConfDisplay::default().articles_per_page + } + fn default_show_reading_time() -> bool { + ConfDisplay::default().show_reading_time + } + fn default_show_word_count() -> bool { + ConfDisplay::default().show_word_count + } + fn default_highlight_unread() -> bool { + ConfDisplay::default().highlight_unread + } +} #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct Analytics { + #[serde(default = "Analytics::default_enabled")] pub enabled: bool, + #[serde(default = "Analytics::default_track_reading_time")] pub track_reading_time: bool, + #[serde(default = "Analytics::default_track_scroll_position")] pub track_scroll_position: bool, + #[serde(default = "Analytics::default_retention_days")] pub retention_days: u32, + #[serde(default = "Analytics::default_aggregate_older_data")] pub aggregate_older_data: bool, } +impl Analytics { + fn default_enabled() -> bool { + Analytics::default().enabled + } + fn default_track_reading_time() -> bool { + Analytics::default().track_reading_time + } + fn default_track_scroll_position() -> bool { + Analytics::default().track_scroll_position + } + fn default_retention_days() -> u32 { + Analytics::default().retention_days + } + fn default_aggregate_older_data() -> bool { + Analytics::default().aggregate_older_data + } +} #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct Filtering { + #[serde(default = "Filtering::default_enable_smart_suggestions")] pub enable_smart_suggestions: bool, + #[serde(default = "Filtering::default_max_recent_filters")] pub max_recent_filters: u32, + #[serde(default = "Filtering::default_auto_save_filters")] pub auto_save_filters: bool, + #[serde(default)] pub default_sort: DefaultSort, + #[serde(default = "Filtering::default_enable_geographic_hierarchy")] pub enable_geographic_hierarchy: bool, + #[serde(default = "Filtering::default_auto_migrate_country_filters")] pub auto_migrate_country_filters: bool, } - -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct Sharing { - pub default_format: DefaultFormat, - pub include_summary: bool, - pub include_tags: bool, - pub include_source: bool, - pub copy_to_clipboard: bool, - pub templates: SharingTemplates, +impl Filtering { + fn default_enable_smart_suggestions() -> bool { + Filtering::default().enable_smart_suggestions + } + fn default_max_recent_filters() -> u32 { + Filtering::default().max_recent_filters + } + fn default_auto_save_filters() -> bool { + Filtering::default().auto_save_filters + } + fn default_enable_geographic_hierarchy() -> bool { + Filtering::default().enable_geographic_hierarchy + } + fn default_auto_migrate_country_filters() -> bool { + Filtering::default().auto_migrate_country_filters + } } #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct Sharing { + #[serde(default)] + pub default_format: DefaultFormat, + #[serde(default = "Sharing::default_include_summary")] + pub include_summary: bool, + #[serde(default = "Sharing::default_include_tags")] + pub include_tags: bool, + #[serde(default = "Sharing::default_include_source")] + pub include_source: bool, + #[serde(default = "Sharing::default_copy_to_clipboard")] + pub copy_to_clipboard: bool, + #[serde(default)] + pub templates: SharingTemplates, +} +impl Sharing { + fn default_include_summary() -> bool { + Sharing::default().include_summary + } + fn default_include_tags() -> bool { + Sharing::default().include_tags + } + fn default_include_source() -> bool { + Sharing::default().include_source + } + fn default_copy_to_clipboard() -> bool { + Sharing::default().copy_to_clipboard + } +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct SharingTemplates { + #[serde(default = "SharingTemplate::default_text")] pub text: SharingTemplate, + #[serde(default = "SharingTemplate::default_markdown")] pub markdown: SharingTemplate, } #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct SharingTemplate { pub format: String, } +impl Default for SharingTemplate { + fn default() -> Self { + // Fallback empty only if used generically; specific fields use the richer defaults below. + Self { + format: String::new(), + } + } +} +impl SharingTemplate { + pub fn default_text() -> Self { + SharingTemplates::default().text + } + pub fn default_markdown() -> Self { + SharingTemplates::default().markdown + } +} #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct Ai { + #[serde(default = "Ai::default_enabled")] pub enabled: bool, + #[serde(default)] pub provider: AiProvider, + #[serde(default = "Ai::default_timeout_seconds")] pub timeout_seconds: u32, + #[serde(default)] pub summary: AiSummary, + #[serde(default)] pub tagging: AiTagging, } - -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct AiSummary { - pub enabled: bool, - pub temperature: f32, - pub max_tokens: u32, +impl Ai { + fn default_enabled() -> bool { + Ai::default().enabled + } + fn default_timeout_seconds() -> u32 { + Ai::default().timeout_seconds + } } #[derive(Deserialize, Serialize, Debug, Clone)] -pub struct AiTagging { +#[serde(default)] +pub struct AiSummary { + #[serde(default = "AiSummary::default_enabled")] pub enabled: bool, + #[serde(default = "AiSummary::default_temperature")] pub temperature: f32, + #[serde(default = "AiSummary::default_max_tokens")] pub max_tokens: u32, +} +impl AiSummary { + fn default_enabled() -> bool { + AiSummary::default().enabled + } + fn default_temperature() -> f32 { + AiSummary::default().temperature + } + fn default_max_tokens() -> u32 { + AiSummary::default().max_tokens + } +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct AiTagging { + #[serde(default = "AiTagging::default_enabled")] + pub enabled: bool, + #[serde(default = "AiTagging::default_temperature")] + pub temperature: f32, + #[serde(default = "AiTagging::default_max_tokens")] + pub max_tokens: u32, + #[serde(default = "AiTagging::default_max_tags_per_article")] pub max_tags_per_article: u32, + #[serde(default = "AiTagging::default_min_confidence_threshold")] pub min_confidence_threshold: f32, + #[serde(default = "AiTagging::default_enable_geographic_tagging")] pub enable_geographic_tagging: bool, + #[serde(default = "AiTagging::default_enable_category_tagging")] pub enable_category_tagging: bool, + #[serde(default = "AiTagging::default_geographic_hierarchy_levels")] pub geographic_hierarchy_levels: u32, } - -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct Scraping { - pub timeout_seconds: u32, - pub max_retries: u32, - pub max_content_length: u32, - pub respect_robots_txt: bool, - pub rate_limit_delay_ms: u32, +impl AiTagging { + fn default_enabled() -> bool { + AiTagging::default().enabled + } + fn default_temperature() -> f32 { + AiTagging::default().temperature + } + fn default_max_tokens() -> u32 { + AiTagging::default().max_tokens + } + fn default_max_tags_per_article() -> u32 { + AiTagging::default().max_tags_per_article + } + fn default_min_confidence_threshold() -> f32 { + AiTagging::default().min_confidence_threshold + } + fn default_enable_geographic_tagging() -> bool { + AiTagging::default().enable_geographic_tagging + } + fn default_enable_category_tagging() -> bool { + AiTagging::default().enable_category_tagging + } + fn default_geographic_hierarchy_levels() -> u32 { + AiTagging::default().geographic_hierarchy_levels + } } #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct Scraping { + #[serde(default = "Scraping::default_timeout_seconds")] + pub timeout_seconds: u32, + #[serde(default = "Scraping::default_max_retries")] + pub max_retries: u32, + #[serde(default = "Scraping::default_max_content_length")] + pub max_content_length: u32, + #[serde(default = "Scraping::default_respect_robots_txt")] + pub respect_robots_txt: bool, + #[serde(default = "Scraping::default_rate_limit_delay_ms")] + pub rate_limit_delay_ms: u32, + #[serde(default)] + pub content_quality: ContentQuality, + #[serde(default)] + pub duplicate_detection: DuplicateDetection, + #[serde(default)] + pub ad_prevention: AdPrevention, +} +impl Scraping { + fn default_timeout_seconds() -> u32 { + Scraping::default().timeout_seconds + } + fn default_max_retries() -> u32 { + Scraping::default().max_retries + } + fn default_max_content_length() -> u32 { + Scraping::default().max_content_length + } + fn default_respect_robots_txt() -> bool { + Scraping::default().respect_robots_txt + } + fn default_rate_limit_delay_ms() -> u32 { + Scraping::default().rate_limit_delay_ms + } +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct Processing { + #[serde(default = "Processing::default_batch_size")] pub batch_size: u32, + #[serde(default = "Processing::default_max_concurrent")] pub max_concurrent: u32, + #[serde(default = "Processing::default_retry_attempts")] pub retry_attempts: u32, + #[serde(default = "Processing::default_priority_manual")] pub priority_manual: bool, + #[serde(default = "Processing::default_auto_mark_read_on_view")] pub auto_mark_read_on_view: bool, } - -#[derive(Deserialize, Serialize, Debug, Clone)] -pub struct ConfMigration { - pub auto_convert_country_filters: bool, - pub preserve_legacy_data: bool, - pub migration_batch_size: u32, +impl Processing { + fn default_batch_size() -> u32 { + Processing::default().batch_size + } + fn default_max_concurrent() -> u32 { + Processing::default().max_concurrent + } + fn default_retry_attempts() -> u32 { + Processing::default().retry_attempts + } + fn default_priority_manual() -> bool { + Processing::default().priority_manual + } + fn default_auto_mark_read_on_view() -> bool { + Processing::default().auto_mark_read_on_view + } } #[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] +pub struct ConfMigration { + #[serde(default = "ConfMigration::default_auto_convert_country_filters")] + pub auto_convert_country_filters: bool, + #[serde(default = "ConfMigration::default_preserve_legacy_data")] + pub preserve_legacy_data: bool, + #[serde(default = "ConfMigration::default_migration_batch_size")] + pub migration_batch_size: u32, +} +impl ConfMigration { + fn default_auto_convert_country_filters() -> bool { + ConfMigration::default().auto_convert_country_filters + } + fn default_preserve_legacy_data() -> bool { + ConfMigration::default().preserve_legacy_data + } + fn default_migration_batch_size() -> u32 { + ConfMigration::default().migration_batch_size + } +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +#[serde(default)] pub struct Cli { + #[serde(default)] pub default_output: DefaultOutput, + #[serde(default = "Cli::default_pager_command")] pub pager_command: String, + #[serde(default = "Cli::default_show_progress")] pub show_progress: bool, + #[serde(default = "Cli::default_auto_confirm_bulk")] pub auto_confirm_bulk: bool, + #[serde(default = "Cli::default_show_geographic_hierarchy")] pub show_geographic_hierarchy: bool, } +impl Cli { + fn default_pager_command() -> String { + Cli::default().pager_command + } + fn default_show_progress() -> bool { + Cli::default().show_progress + } + fn default_auto_confirm_bulk() -> bool { + Cli::default().auto_confirm_bulk + } + fn default_show_geographic_hierarchy() -> bool { + Cli::default().show_geographic_hierarchy + } +} + +impl Default for ContentQuality { + fn default() -> Self { + Self { + min_content_length: 100, + max_content_length: 500_000, + min_text_html_ratio: 0.3, + readability_threshold: 0.6, + } + } +} + +impl Default for DuplicateDetection { + fn default() -> Self { + Self { + enabled: true, + title_similarity_threshold: 0.90, + content_similarity_threshold: 0.85, + check_historical_days: 30, + store_fingerprints: true, + } + } +} + +impl Default for AdPrevention { + fn default() -> Self { + Self { + enabled: true, + block_iframes: true, + clean_content: true, + ad_patterns: vec![ + "sponsored-content".to_string(), + "advertisement".to_string(), + "promoted-post".to_string(), + "partner-content".to_string(), + "ad-wrapper".to_string(), + "sponsored".to_string(), + ], + preserved_elements: vec![ + "article".to_string(), + "p".to_string(), + "h1".to_string(), + "h2".to_string(), + "h3".to_string(), + "img".to_string(), + "figure".to_string(), + "figcaption".to_string(), + ], + removed_elements: vec![ + "aside".to_string(), + "iframe".to_string(), + "script".to_string(), + "style".to_string(), + "ins".to_string(), + "form".to_string(), + "button".to_string(), + ".ad".to_string(), + "#ad".to_string(), + "[class*='ad-']".to_string(), + "[id*='ad-']".to_string(), + ], + } + } +} impl DefaultView { pub fn display_name(&self) -> &str { @@ -364,6 +739,9 @@ impl Default for Scraping { max_content_length: 100_000, respect_robots_txt: true, rate_limit_delay_ms: 1000, + content_quality: ContentQuality::default(), + duplicate_detection: DuplicateDetection::default(), + ad_prevention: AdPrevention::default(), } } }