From 16167d18ff7d9cacc2e4af12bd56df79c1412836 Mon Sep 17 00:00:00 2001 From: vikingowl Date: Wed, 20 Aug 2025 08:51:38 +0200 Subject: [PATCH] [feat] modularized backend with plugin architecture, added `module-api`, `module-host`, and `summarizer` crates, and integrated dynamic module loading into `main.rs` --- backend-rust/Cargo.lock | 62 ++++++++++ backend-rust/Cargo.toml | 14 ++- backend-rust/TODO.md | 72 +++++++++++ backend-rust/crates/app/Cargo.toml | 7 +- backend-rust/crates/app/src/main.rs | 38 +++++- backend-rust/crates/module-api/Cargo.toml | 12 ++ backend-rust/crates/module-api/src/lib.rs | 30 +++++ backend-rust/crates/module-host/Cargo.toml | 17 +++ backend-rust/crates/module-host/src/lib.rs | 114 ++++++++++++++++++ .../crates/modules/summarizer/Cargo.toml | 14 +++ .../crates/modules/summarizer/src/lib.rs | 50 ++++++++ 11 files changed, 420 insertions(+), 10 deletions(-) create mode 100644 backend-rust/TODO.md create mode 100644 backend-rust/crates/module-api/Cargo.toml create mode 100644 backend-rust/crates/module-api/src/lib.rs create mode 100644 backend-rust/crates/module-host/Cargo.toml create mode 100644 backend-rust/crates/module-host/src/lib.rs create mode 100644 backend-rust/crates/modules/summarizer/Cargo.toml create mode 100644 backend-rust/crates/modules/summarizer/src/lib.rs diff --git a/backend-rust/Cargo.lock b/backend-rust/Cargo.lock index c44f4b2..4280232 100644 --- a/backend-rust/Cargo.lock +++ b/backend-rust/Cargo.lock @@ -797,6 +797,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hex" version = "0.4.3" @@ -1191,6 +1197,16 @@ version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +[[package]] +name = "libloading" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + [[package]] name = "libm" version = "0.2.15" @@ -1436,6 +1452,16 @@ dependencies = [ "libm", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "object" version = "0.36.7" @@ -1506,7 +1532,10 @@ name = "owly-news" version = "0.1.0" dependencies = [ "anyhow", + "num_cpus", "owly-news-api", + "owly-news-module-host", + "serde_json", "tokio", "tracing", "tracing-subscriber", @@ -1535,6 +1564,39 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "owly-news-module-api" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "serde_json", +] + +[[package]] +name = "owly-news-module-host" +version = "0.1.0" +dependencies = [ + "anyhow", + "libloading", + "once_cell", + "owly-news-module-api", + "serde", + "serde_json", + "tokio", + "tracing", +] + +[[package]] +name = "owly-news-module-summarizer" +version = "0.1.0" +dependencies = [ + "anyhow", + "owly-news-module-api", + "serde", + "serde_json", +] + [[package]] name = "parking" version = "2.2.1" diff --git a/backend-rust/Cargo.toml b/backend-rust/Cargo.toml index cf49a90..a1d4b88 100644 --- a/backend-rust/Cargo.toml +++ b/backend-rust/Cargo.toml @@ -1,5 +1,11 @@ [workspace] -members = ["crates/app", "crates/api"] +members = [ + "crates/app", + "crates/api", + "crates/module-api", + "crates/module-host", + "crates/modules/summarizer", +] resolver = "2" [workspace.package] @@ -22,7 +28,11 @@ sha2 = "0.10.9" hex = "0.4.3" readability = "0.3.0" scraper = "0.23.1" +libloading = "0.8" +async-trait = "0.1" +once_cell = "1.19" +num_cpus = "1.16" -# Dev-only deps centralized (optional; you can also keep these inside each member) +# Dev-only deps centralized (optional) tokio-test = "0.4" axum-test = "17.3" diff --git a/backend-rust/TODO.md b/backend-rust/TODO.md new file mode 100644 index 0000000..bf1c298 --- /dev/null +++ b/backend-rust/TODO.md @@ -0,0 +1,72 @@ +## CPU and resource limiting +- Tokio worker threads + - Decide thread policy: + - Option A: set TOKIO_WORKER_THREADS in the environment for deployments. + - Option B: build a custom runtime with tokio::runtime::Builder::new_multi_thread().worker_threads(n). + + - Document your default policy (e.g., 50% of physical cores). + +- Concurrency guard for CPU-heavy tasks + - Create a global tokio::sync::Semaphore with N permits (N = allowed concurrent heavy tasks). + - Acquire a permit before invoking heavy module operations; release automatically on drop. + - Expose the semaphore in app state so handlers/jobs can share it. + +- HTTP backpressure and rate limiting (if using API) + - Add tower::limit::ConcurrencyLimitLayer to cap in-flight requests. + - Add tower::limit::RateLimitLayer or request-size/timeouts as needed. + - Optionally add tower::timeout::TimeoutLayer to bound handler latency. + +- Stronger isolation (optional, later) + - Evaluate running certain modules as separate processes for strict CPU caps. + - Use cgroups v2 (Linux) or Job Objects (Windows) to bound CPU/memory per process. + - Reuse the same JSON interface over IPC (e.g., stdio or a local socket). + +## Build and run +- Build all crates + - Run: cargo build --workspace + +- Build each plugin as cdylib + - Example: cd crates/modules/summarizer && cargo build --release + +- Stage plugin libraries for the host to find + - Create a modules directory the daemon will read, e.g. target/modules + - Copy the built artifact into that directory: + - Linux: copy target/release/libsummarizer.so -> target/modules/libsummarizer.so + - macOS: copy target/release/libsummarizer.dylib -> target/modules/libsummarizer.dylib + - Windows: copy target/release/summarizer.dll -> target/modules/summarizer.dll + + - Alternatively set OWLY_MODULES_DIR to your chosen directory. + +- Run the daemon + - cargo run -p owly-news + - Optionally set: + - OWLY_MODULES_DIR=/absolute/path/to/modules + - TOKIO_WORKER_THREADS=N + +## Wire into the API +- Share ModuleHost in app state + - Create a struct AppState { host: Arc, cpu_sem: Arc , ... }. + - Add AppState to Axum with .with_state(state). + +- In a handler (example: POST /summarize) + - Parse payload as JSON. + - Acquire a permit from cpu_sem before heavy work. + - host.get("summarizer").await? to lazily load the module. + - Call module.invoke_json("summarize", payload_value)?. + - Map success to 200 with JSON; map errors to appropriate status codes. + +- Error handling and observability + - Use thiserror/anyhow to classify operational vs. client errors. + - Add tracing spans around module loading and invocation; include module name and op. + - Return structured error JSON when module reports an error. + +- Configuration + - Decide env vars and defaults: OWLY_MODULES_DIR, TOKIO_WORKER_THREADS, concurrency permits, rate limits. + - Optionally add a config file (toml) and load via figment or config crate. + +- Health and lifecycle + - Add a /health route that checks: + - Tokio is responsive. + - Optional: preflight-check that required modules are present (or skip to keep lazy). + + - Graceful shutdown: listen for SIGINT/SIGTERM and drain in-flight requests before exit. diff --git a/backend-rust/crates/app/Cargo.toml b/backend-rust/crates/app/Cargo.toml index 4e2a220..0f88f4e 100644 --- a/backend-rust/crates/app/Cargo.toml +++ b/backend-rust/crates/app/Cargo.toml @@ -5,7 +5,10 @@ edition.workspace = true [dependencies] owly-news-api = { path = "../api" } -tokio = { workspace = true, features = ["full"] } +owly-news-module-host = { path = "../module-host" } +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] } tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter", "json"] } -anyhow = "1.0.99" +anyhow = { workspace = true } +serde_json = { workspace = true } +num_cpus = { workspace = true } diff --git a/backend-rust/crates/app/src/main.rs b/backend-rust/crates/app/src/main.rs index de4c184..50f9c30 100644 --- a/backend-rust/crates/app/src/main.rs +++ b/backend-rust/crates/app/src/main.rs @@ -1,8 +1,8 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; -#[tokio::main] +#[tokio::main(flavor = "multi_thread")] async fn main() -> anyhow::Result<()> { - // Basic tracing setup (adjust as needed) + // Tracing setup tracing_subscriber::registry() .with( tracing_subscriber::EnvFilter::try_from_default_env() @@ -11,9 +11,35 @@ async fn main() -> anyhow::Result<()> { .with(tracing_subscriber::fmt::layer()) .init(); - // TODO: invoke your API server bootstrap here. - // For example, if you have a function like `owly-news-api::run_server().await` - // call it here. This is just a placeholder: - tracing::info!("owly-news app starting..."); + // Limit worker threads for CPU control (can be tuned via env) + // Note: When using #[tokio::main], configure via env TOKIO_WORKER_THREADS. + // Alternatively, build a Runtime manually for stricter control. + if let Ok(threads) = std::env::var("TOKIO_WORKER_THREADS") { + tracing::warn!( + "TOKIO_WORKER_THREADS is set to {threads}, ensure it matches deployment requirements" + ); + } else { + // Provide a sane default via env if not set + let default_threads = std::cmp::max(1, num_cpus::get_physical() / 2); + unsafe { std::env::set_var("TOKIO_WORKER_THREADS", default_threads.to_string()); } + tracing::info!("Defaulting worker threads to {}", default_threads); + } + + // Example: lazily load and invoke the "summarizer" module when needed + let host = owly_news_module_host::ModuleHost::default(); + + // Simulate an on-demand call (e.g., from an HTTP handler) + let summarizer = host.get("summarizer").await?; + let resp = summarizer.invoke_json( + "summarize", + serde_json::json!({ + "text": "Rust enables fearless concurrency with strong guarantees over memory safety.", + "ratio": 0.3 + }), + )?; + tracing::info!(?resp, "summarizer response"); + + // TODO: wire this into your API routes/handlers, using the host.get("").await when needed. + tracing::info!("owly-news daemon running"); Ok(()) } diff --git a/backend-rust/crates/module-api/Cargo.toml b/backend-rust/crates/module-api/Cargo.toml new file mode 100644 index 0000000..5fd31d3 --- /dev/null +++ b/backend-rust/crates/module-api/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "owly-news-module-api" +version.workspace = true +edition.workspace = true + +[lib] +path = "src/lib.rs" + +[dependencies] +anyhow = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } diff --git a/backend-rust/crates/module-api/src/lib.rs b/backend-rust/crates/module-api/src/lib.rs new file mode 100644 index 0000000..aa9c52f --- /dev/null +++ b/backend-rust/crates/module-api/src/lib.rs @@ -0,0 +1,30 @@ +use std::ffi::{CStr, CString}; +use std::os::raw::c_char; + +// Symbols every module must export with `extern "C"` and `#[no_mangle]`. +// Signature: fn module_name() -> *const c_char +// Signature: fn module_invoke(op: *const c_char, payload: *const c_char) -> *mut c_char +pub const SYMBOL_NAME: &str = "module_name"; +pub const SYMBOL_INVOKE: &str = "module_invoke"; + +// Helper to convert C char* to &str +pub unsafe fn cstr_to_str<'a>(ptr: *const c_char) -> anyhow::Result<&'a str> { + if ptr.is_null() { + anyhow::bail!("null pointer"); + } + Ok(CStr::from_ptr(ptr).to_str()?) +} + +// Helper to allocate a CString for return across FFI boundary (module side) +pub fn string_to_cstring_ptr(s: String) -> *mut c_char { + CString::new(s).unwrap().into_raw() +} + +// Helper to take back ownership of a CString (host side), then free by letting CString drop +pub unsafe fn take_cstring(ptr: *mut c_char) -> anyhow::Result { + if ptr.is_null() { + anyhow::bail!("null pointer"); + } + let s = CString::from_raw(ptr); + Ok(s.into_string()?) +} diff --git a/backend-rust/crates/module-host/Cargo.toml b/backend-rust/crates/module-host/Cargo.toml new file mode 100644 index 0000000..efe26ed --- /dev/null +++ b/backend-rust/crates/module-host/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "owly-news-module-host" +version.workspace = true +edition.workspace = true + +[lib] +path = "src/lib.rs" + +[dependencies] +anyhow = { workspace = true } +libloading = { workspace = true } +once_cell = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "sync"] } +tracing = { workspace = true } +owly-news-module-api = { path = "../module-api" } diff --git a/backend-rust/crates/module-host/src/lib.rs b/backend-rust/crates/module-host/src/lib.rs new file mode 100644 index 0000000..3d02169 --- /dev/null +++ b/backend-rust/crates/module-host/src/lib.rs @@ -0,0 +1,114 @@ +use anyhow::Context; +use libloading::{Library, Symbol}; +use once_cell::sync::OnceCell; +use owly_news_module_api::{take_cstring, SYMBOL_INVOKE, SYMBOL_NAME}; +use std::collections::HashMap; +use std::ffi::CString; +use std::os::raw::c_char; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tokio::sync::Mutex; +use tracing::info; + +type ModuleNameFn = unsafe extern "C" fn() -> *const c_char; +type ModuleInvokeFn = unsafe extern "C" fn(*const c_char, *const c_char) -> *mut c_char; + +pub struct ModuleHandle { + _lib: Arc, + invoke: ModuleInvokeFn, +} + +impl ModuleHandle { + pub fn invoke_json(&self, op: &str, payload: serde_json::Value) -> anyhow::Result { + let op_c = CString::new(op)?; + let payload_c = CString::new(serde_json::to_string(&payload)?)?; + + let out_ptr = unsafe { (self.invoke)(op_c.as_ptr(), payload_c.as_ptr()) }; + let out = unsafe { take_cstring(out_ptr) }?; + let val = serde_json::from_str(&out).context("module returned invalid JSON")?; + Ok(val) + } +} + +pub struct ModuleHost { + // Lazy cache of loaded modules by logical name + loaded: Mutex>>, + modules_dir: PathBuf, +} + +static DEFAULT_HOST: OnceCell> = OnceCell::new(); + +impl ModuleHost { + pub fn default() -> Arc { + DEFAULT_HOST + .get_or_init(|| { + Arc::new(Self::new( + std::env::var_os("OWLY_MODULES_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("target/modules")), // default location + )) + }) + .clone() + } + + pub fn new(modules_dir: PathBuf) -> Self { + Self { + loaded: Mutex::new(HashMap::new()), + modules_dir, + } + } + + pub async fn get(&self, name: &str) -> anyhow::Result> { + if let Some(h) = self.loaded.lock().await.get(name).cloned() { + return Ok(h); + } + let handle = Arc::new(self.load_module(name)?); + self.loaded.lock().await.insert(name.to_string(), handle.clone()); + Ok(handle) + } + + fn load_module(&self, name: &str) -> anyhow::Result { + let lib_path = resolve_module_path(&self.modules_dir, name)?; + info!(module = name, path = %lib_path.display(), "loading module"); + + // SAFETY: we keep Library alive in ModuleHandle to ensure symbols remain valid + let lib = unsafe { Library::new(lib_path) }.with_context(|| "failed to load module library")?; + + // Validate and bind symbols + let name_fn: Symbol = unsafe { lib.get(SYMBOL_NAME.as_bytes()) } + .with_context(|| "missing symbol `module_name`")?; + let invoke_fn: Symbol = unsafe { lib.get(SYMBOL_INVOKE.as_bytes()) } + .with_context(|| "missing symbol `module_invoke`")?; + + // Optional: verify reported name matches requested + let c_name_ptr = unsafe { name_fn() }; + let c_name = unsafe { std::ffi::CStr::from_ptr(c_name_ptr) }.to_string_lossy().into_owned(); + if c_name != name { + anyhow::bail!("module reported name `{c_name}`, expected `{name}`"); + } + + // Copy the function pointer before moving the library + let invoke_fn_copy = *invoke_fn; + + Ok(ModuleHandle { + _lib: Arc::new(lib), + invoke: invoke_fn_copy, + }) + } +} + +fn resolve_module_path(dir: &Path, name: &str) -> anyhow::Result { + #[cfg(target_os = "windows")] + const EXT: &str = "dll"; + #[cfg(target_os = "macos")] + const EXT: &str = "dylib"; + #[cfg(all(unix, not(target_os = "macos")))] + const EXT: &str = "so"; + + let fname = format!("lib{name}.{EXT}"); + let path = dir.join(fname); + if !path.exists() { + anyhow::bail!("module `{name}` not found at {}", path.display()); + } + Ok(path) +} diff --git a/backend-rust/crates/modules/summarizer/Cargo.toml b/backend-rust/crates/modules/summarizer/Cargo.toml new file mode 100644 index 0000000..84859e8 --- /dev/null +++ b/backend-rust/crates/modules/summarizer/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "owly-news-module-summarizer" +version.workspace = true +edition.workspace = true + +[lib] +crate-type = ["cdylib"] +path = "src/lib.rs" + +[dependencies] +anyhow = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +owly-news-module-api = { path = "../../module-api" } diff --git a/backend-rust/crates/modules/summarizer/src/lib.rs b/backend-rust/crates/modules/summarizer/src/lib.rs new file mode 100644 index 0000000..71d57b4 --- /dev/null +++ b/backend-rust/crates/modules/summarizer/src/lib.rs @@ -0,0 +1,50 @@ +use owly_news_module_api::{cstr_to_str, string_to_cstring_ptr}; +use serde::{Deserialize, Serialize}; +use std::os::raw::c_char; + +#[derive(Deserialize)] +struct SummarizeReq { + text: String, + #[serde(default = "default_ratio")] + ratio: f32, +} + +fn default_ratio() -> f32 { 0.2 } + +#[derive(Serialize)] +struct SummarizeResp { + summary: String, +} + +#[unsafe(no_mangle)] +pub extern "C" fn module_name() -> *const c_char { + // IMPORTANT: string must live forever; use a const C string + static NAME: &str = "summarizer\0"; + NAME.as_ptr() as *const c_char +} + +#[unsafe(no_mangle)] +pub extern "C" fn module_invoke(op: *const c_char, payload: *const c_char) -> *mut c_char { + // SAFETY: called by trusted host with valid pointers + let res = (|| -> anyhow::Result { + let op = unsafe { cstr_to_str(op)? }; + let payload = unsafe { cstr_to_str(payload)? }; + + match op { + "summarize" => { + let req: SummarizeReq = serde_json::from_str(payload)?; + // Placeholder summarization logic. Replace with real algorithm. + let words: Vec<&str> = req.text.split_whitespace().collect(); + let take = ((words.len() as f32) * req.ratio).max(1.0).round() as usize; + let summary = words.into_iter().take(take).collect::>().join(" "); + let resp = SummarizeResp { summary }; + Ok(serde_json::to_string(&resp)?) + } + _ => anyhow::bail!("unknown op: {op}"), + } + })(); + + let json = res.unwrap_or_else(|e| serde_json::json!({ "error": e.to_string() }).to_string()); + + string_to_cstring_ptr(json) +}