Compare commits

...

4 Commits

22 changed files with 999 additions and 753 deletions

33
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,33 @@
name: CI
on:
push:
branches: [ dev, main ]
pull_request:
branches: [ dev, main ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Rust
uses: dtolnay/rust-toolchain@stable
- name: Cache cargo registry and target
uses: Swatinem/rust-cache@v2
- name: Install components
run: rustup component add clippy rustfmt
- name: Cargo fmt
run: cargo fmt --all -- --check
- name: Clippy
run: cargo clippy --workspace --all-targets -- -D warnings
- name: Test
run: cargo test --workspace --all --locked

14
CHANGELOG.md Normal file
View File

@@ -0,0 +1,14 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on Keep a Changelog, and this project adheres to Semantic Versioning.
## Unreleased
### Changed
- Docs: Replace `--download-models`/`--update-models` flags with `models download`/`models update` subcommands in `README.md`, `docs/usage.md`, and `docs/development.md`.
- Host: Plugin discovery now scans `$XDG_DATA_HOME/polyscribe/plugins` (platform equivalent via `directories`) in addition to `PATH`.
- CI: Add GitHub Actions workflow to run fmt, clippy (warnings as errors), and tests for pushes and PRs.

83
Cargo.lock generated
View File

@@ -97,6 +97,22 @@ version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
[[package]]
name = "assert_cmd"
version = "2.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bd389a4b2970a01282ee455294913c0a43724daedcd1a24c3eb0ec1c1320b66"
dependencies = [
"anstyle",
"bstr",
"doc-comment",
"libc",
"predicates",
"predicates-core",
"predicates-tree",
"wait-timeout",
]
[[package]]
name = "async-compression"
version = "0.4.27"
@@ -172,6 +188,17 @@ dependencies = [
"generic-array",
]
[[package]]
name = "bstr"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"regex-automata 0.4.9",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.19.0"
@@ -374,6 +401,12 @@ dependencies = [
"typenum",
]
[[package]]
name = "difflib"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
[[package]]
name = "digest"
version = "0.10.7"
@@ -416,6 +449,12 @@ dependencies = [
"syn",
]
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "either"
version = "1.15.0"
@@ -1113,6 +1152,7 @@ name = "polyscribe-cli"
version = "0.1.0"
dependencies = [
"anyhow",
"assert_cmd",
"clap",
"clap_complete",
"clap_mangen",
@@ -1153,6 +1193,7 @@ name = "polyscribe-host"
version = "0.1.0"
dependencies = [
"anyhow",
"directories",
"serde",
"serde_json",
"tokio",
@@ -1191,6 +1232,33 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "predicates"
version = "3.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573"
dependencies = [
"anstyle",
"difflib",
"predicates-core",
]
[[package]]
name = "predicates-core"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa"
[[package]]
name = "predicates-tree"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c"
dependencies = [
"predicates-core",
"termtree",
]
[[package]]
name = "prettyplease"
version = "0.2.36"
@@ -1699,6 +1767,12 @@ dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "termtree"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
[[package]]
name = "textwrap"
version = "0.16.2"
@@ -2055,6 +2129,15 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "wait-timeout"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11"
dependencies = [
"libc",
]
[[package]]
name = "want"
version = "0.3.1"

149
README.md
View File

@@ -1,121 +1,68 @@
# PolyScribe
PolyScribe is a fast, local-first CLI for transcribing audio/video and merging existing JSON transcripts. It uses whisper-rs under the hood, can discover and download Whisper models automatically, and supports CPU and optional GPU backends (CUDA, ROCm/HIP, Vulkan).
Local-first transcription and plugins.
Key features
- Transcribe audio and common video files using ffmpeg for audio extraction.
- Merge multiple JSON transcripts, or merge and also keep per-file outputs.
- Model management: interactive downloader and non-interactive updater with hash verification.
- GPU backend selection at runtime; auto-detects available accelerators.
- Clean outputs (JSON and SRT), speaker naming prompts, and useful logging controls.
## Features
Prerequisites
- Rust toolchain (rustup recommended)
- ffmpeg available on PATH
- Optional for GPU acceleration at runtime: CUDA, ROCm/HIP, or Vulkan drivers (match your build features)
- **Local-first**: Works offline with downloaded models
- **Multiple backends**: CPU, CUDA, ROCm/HIP, and Vulkan support
- **Plugin system**: Extensible via JSON-RPC plugins
- **Model management**: Automatic download and verification of Whisper models
- **Manifest caching**: Local cache for Hugging Face model manifests to reduce network requests
Installation
- Build from source (CPU-only by default):
- rustup install stable
- rustup default stable
- cargo build --release
- Binary path: ./target/release/polyscribe
- GPU builds (optional): build with features
- CUDA: cargo build --release --features gpu-cuda
- HIP: cargo build --release --features gpu-hip
- Vulkan: cargo build --release --features gpu-vulkan
## Model Management
Quickstart
1) Download a model (first run can prompt you):
- ./target/release/polyscribe --download-models
- In the interactive picker, use Up/Down to navigate, Space to toggle selections, and Enter to confirm. Models are grouped by base (e.g., tiny, base, small).
PolyScribe automatically manages Whisper models from Hugging Face:
2) Transcribe a file:
- ./target/release/polyscribe -v -o output my_audio.mp3
This writes JSON and SRT into the output directory with a date prefix.
```bash
# Download models interactively
polyscribe models download
Shell completions and man page
- Completions: ./target/release/polyscribe completions <bash|zsh|fish|powershell|elvish> > polyscribe.<ext>
- Then install into your shells completion directory.
- Man page: ./target/release/polyscribe man > polyscribe.1 (then copy to your manpath)
# Update existing models
polyscribe models update
Model locations
- Development (debug builds): ./models next to the project.
- Packaged/release builds: $XDG_DATA_HOME/polyscribe/models or ~/.local/share/polyscribe/models.
- Override via env var: POLYSCRIBE_MODELS_DIR=/path/to/models.
- Force a specific model file via env var: WHISPER_MODEL=/path/to/model.bin.
# Clear manifest cache (force fresh fetch)
polyscribe models clear-cache
```
Most-used CLI flags
- -o, --output FILE_OR_DIR: Output path base (date prefix added). If omitted, JSON prints to stdout.
- -m, --merge: Merge all inputs into one output; otherwise one output per input.
- --merge-and-separate: Write both merged output and separate per-input outputs (requires -o dir).
- --set-speaker-names: Prompt for a speaker label per input file.
- --update-models: Verify/update local models by size/hash against the upstream manifest.
- --download-models: Interactive model list + multi-select download.
- --language LANG: Language code hint (e.g., en, de). English-only models reject non-en hints.
- --gpu-backend [auto|cpu|cuda|hip|vulkan]: Select backend (auto by default).
- --gpu-layers N: Offload N layers to GPU when supported.
- -v/--verbose (repeatable): Increase log verbosity. -vv shows very detailed logs.
- -q/--quiet: Suppress non-error logs (stderr); does not silence stdout results.
- --no-interaction: Never prompt; suitable for CI.
### Manifest Caching
Minimal usage examples
- Transcribe an audio file to JSON/SRT:
- ./target/release/polyscribe -o output samples/podcast_clip.mp3
- Merge multiple transcripts into one:
- ./target/release/polyscribe -m -o output merged input/a.json input/b.json
- Update local models non-interactively (good for CI):
- ./target/release/polyscribe --update-models --no-interaction -q
- Download models interactively:
- ./target/release/polyscribe --download-models
The Hugging Face model manifest is cached locally to avoid repeated network requests:
Troubleshooting & docs
- docs/faq.md common issues and solutions (missing ffmpeg, GPU selection, model paths)
- docs/usage.md complete CLI reference and workflows
- docs/development.md build, run, and contribute locally
- docs/design.md architecture overview and decisions
- docs/release-packaging.md packaging notes for distributions
- CONTRIBUTING.md PR checklist and CI workflow
- **Default TTL**: 24 hours
- **Cache location**: `$XDG_CACHE_HOME/polyscribe/manifest/` (or platform equivalent)
- **Environment variables**:
- `POLYSCRIBE_NO_CACHE_MANIFEST=1`: Disable caching
- `POLYSCRIBE_MANIFEST_TTL_SECONDS=3600`: Set custom TTL (in seconds)
CI status: [CI badge placeholder]
## Installation
License
-------
This project is licensed under the MIT License — see the LICENSE file for details.
```bash
cargo install --path .
```
---
## Usage
Workspace layout
- This repo is a Cargo workspace using resolver = "2".
- Members:
- crates/polyscribe-core — types, errors, config service, core helpers.
- crates/polyscribe-protocol — PSP/1 serde types for NDJSON over stdio.
- crates/polyscribe-host — plugin discovery/runner, progress forwarding.
- crates/polyscribe-cli — the CLI, using host + core.
- plugins/polyscribe-plugin-tubescribe — stub plugin used for verification.
```bash
# Transcribe audio/video
polyscribe transcribe input.mp4
Build and run
- Build all: cargo build --workspace --all-targets
- CLI help: cargo run -p polyscribe-cli -- --help
# Merge multiple transcripts
polyscribe transcribe --merge input1.json input2.json
Plugins
- Build and link the example plugin into your XDG data plugin dir:
- make -C plugins/polyscribe-plugin-tubescribe link
- This creates a symlink at: $XDG_DATA_HOME/polyscribe/plugins/polyscribe-plugin-tubescribe (defaults to ~/.local/share on Linux).
- Discover installed plugins:
- cargo run -p polyscribe-cli -- plugins list
- Show a plugin's capabilities:
- cargo run -p polyscribe-cli -- plugins info tubescribe
- Run a plugin command (JSON-RPC over NDJSON via stdio):
- cargo run -p polyscribe-cli -- plugins run tubescribe generate_metadata --json '{"input":{"kind":"text","summary":"hello world"}}'
# Use specific GPU backend
polyscribe transcribe --gpu-backend cuda input.mp4
```
Verification commands
- The above commands are used for acceptance; expected behavior:
- plugins list shows "tubescribe" once linked.
- plugins info tubescribe prints JSON capabilities.
- plugins run ... prints progress events and a JSON result.
## Development
Notes
- No absolute paths are hardcoded; config and plugin dirs respect XDG on Linux and platform equivalents via directories.
- Plugins must be non-interactive (no TTY prompts). All interaction stays in the host/CLI.
- Config files are written atomically and support env overrides: POLYSCRIBE__SECTION__KEY=value.
```bash
# Build
cargo build
# Run tests
cargo test
# Run with verbose logging
cargo run -- --verbose transcribe input.mp4
```

View File

@@ -3,6 +3,10 @@ name = "polyscribe-cli"
version = "0.1.0"
edition = "2024"
[[bin]]
name = "polyscribe"
path = "src/main.rs"
[dependencies]
anyhow = "1.0.99"
clap = { version = "4.5.44", features = ["derive"] }
@@ -23,3 +27,6 @@ polyscribe-protocol = { path = "../polyscribe-protocol" }
[features]
# Optional GPU-specific flags can be forwarded down to core/host if needed
default = []
[dev-dependencies]
assert_cmd = "2.0.16"

View File

@@ -11,7 +11,11 @@ pub enum GpuBackend {
}
#[derive(Debug, Parser)]
#[command(name = "polyscribe", version, about = "PolyScribe local-first transcription and plugins")]
#[command(
name = "polyscribe",
version,
about = "PolyScribe local-first transcription and plugins"
)]
pub struct Cli {
/// Increase verbosity (-v, -vv)
#[arg(short, long, action = clap::ArgAction::Count)]
@@ -25,6 +29,10 @@ pub struct Cli {
#[arg(long, default_value_t = false)]
pub no_interaction: bool,
/// Disable progress bars/spinners
#[arg(long, default_value_t = false)]
pub no_progress: bool,
#[command(subcommand)]
pub command: Commands,
}
@@ -95,6 +103,8 @@ pub enum ModelsCmd {
Update,
/// Interactive multi-select downloader
Download,
/// Clear the cached Hugging Face manifest
ClearCache,
}
#[derive(Debug, Subcommand)]
@@ -116,4 +126,4 @@ pub enum PluginsCmd {
#[arg(long)]
json: Option<String>,
},
}
}

View File

@@ -1,16 +1,16 @@
mod cli;
use anyhow::{anyhow, Context, Result};
use clap::{Parser, CommandFactory};
use anyhow::{Context, Result, anyhow};
use clap::{CommandFactory, Parser};
use cli::{Cli, Commands, GpuBackend, ModelsCmd, PluginsCmd};
use polyscribe_core::{config::ConfigService, ui::progress::ProgressReporter};
use polyscribe_core::models; // Added: call into core models
use polyscribe_core::models;
use polyscribe_core::ui::progress::ProgressReporter;
use polyscribe_host::PluginManager;
use tokio::io::AsyncWriteExt;
use tracing_subscriber::EnvFilter;
fn init_tracing(quiet: bool, verbose: u8) {
let level = if quiet {
let log_level = if quiet {
"error"
} else {
match verbose {
@@ -20,7 +20,7 @@ fn init_tracing(quiet: bool, verbose: u8) {
}
};
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level));
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(log_level));
tracing_subscriber::fmt()
.with_env_filter(filter)
.with_target(false)
@@ -35,23 +35,17 @@ async fn main() -> Result<()> {
init_tracing(args.quiet, args.verbose);
// Optionally propagate quiet/no-interaction/verbosity to core if your lib exposes setters.
// polyscribe_core::set_quiet(args.quiet);
// polyscribe_core::set_no_interaction(args.no_interaction);
// polyscribe_core::set_verbose(args.verbose);
let _cfg = ConfigService::load_or_default().context("loading configuration")?;
polyscribe_core::set_quiet(args.quiet);
polyscribe_core::set_no_interaction(args.no_interaction);
polyscribe_core::set_verbose(args.verbose);
polyscribe_core::set_no_progress(args.no_progress);
match args.command {
Commands::Transcribe {
output: _output,
merge: _merge,
merge_and_separate: _merge_and_separate,
language: _language,
set_speaker_names: _set_speaker_names,
gpu_backend,
gpu_layers,
inputs,
..
} => {
polyscribe_core::ui::info("starting transcription workflow");
let mut progress = ProgressReporter::new(args.no_interaction);
@@ -80,44 +74,57 @@ async fn main() -> Result<()> {
match cmd {
ModelsCmd::Update => {
polyscribe_core::ui::info("verifying/updating local models");
tokio::task::spawn_blocking(|| models::update_local_models())
tokio::task::spawn_blocking(models::update_local_models)
.await
.map_err(|e| anyhow!("blocking task join error: {e}"))?
.context("updating models")?;
}
ModelsCmd::Download => {
polyscribe_core::ui::info("interactive model selection and download");
tokio::task::spawn_blocking(|| models::run_interactive_model_downloader())
tokio::task::spawn_blocking(models::run_interactive_model_downloader)
.await
.map_err(|e| anyhow!("blocking task join error: {e}"))?
.context("running downloader")?;
polyscribe_core::ui::success("Model download complete.");
}
ModelsCmd::ClearCache => {
polyscribe_core::ui::info("clearing manifest cache");
tokio::task::spawn_blocking(models::clear_manifest_cache)
.await
.map_err(|e| anyhow!("blocking task join error: {e}"))?
.context("clearing cache")?;
polyscribe_core::ui::success("Manifest cache cleared.");
}
}
Ok(())
}
Commands::Plugins { cmd } => {
let pm = PluginManager::default();
let plugin_manager = PluginManager;
match cmd {
PluginsCmd::List => {
let list = pm.list().context("discovering plugins")?;
let list = plugin_manager.list().context("discovering plugins")?;
for item in list {
polyscribe_core::ui::info(item.name);
}
Ok(())
}
PluginsCmd::Info { name } => {
let info = pm.info(&name).with_context(|| format!("getting info for {}", name))?;
let s = serde_json::to_string_pretty(&info)?;
polyscribe_core::ui::info(s);
let info = plugin_manager
.info(&name)
.with_context(|| format!("getting info for {}", name))?;
let info_json = serde_json::to_string_pretty(&info)?;
polyscribe_core::ui::info(info_json);
Ok(())
}
PluginsCmd::Run { name, command, json } => {
PluginsCmd::Run {
name,
command,
json,
} => {
let payload = json.unwrap_or_else(|| "{}".to_string());
let mut child = pm
let mut child = plugin_manager
.spawn(&name, &command)
.with_context(|| format!("spawning plugin {name} {command}"))?;
@@ -128,9 +135,12 @@ async fn main() -> Result<()> {
.context("writing JSON payload to plugin stdin")?;
}
let status = pm.forward_stdio(&mut child).await?;
let status = plugin_manager.forward_stdio(&mut child).await?;
if !status.success() {
polyscribe_core::ui::error(format!("plugin returned non-zero exit code: {}", status));
polyscribe_core::ui::error(format!(
"plugin returned non-zero exit code: {}",
status
));
return Err(anyhow!("plugin failed"));
}
Ok(())

View File

@@ -1,11 +1,11 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
use assert_cmd::cargo::cargo_bin;
use std::process::Command;
fn bin() -> String {
std::env::var("CARGO_BIN_EXE_polyscribe")
.unwrap_or_else(|_| "polyscribe".to_string())
fn bin() -> std::path::PathBuf {
cargo_bin("polyscribe")
}
#[test]

View File

@@ -1,12 +1,14 @@
// SPDX-License-Identifier: MIT
// Move original build.rs behavior into core crate
fn main() {
// Only run special build steps when gpu-vulkan feature is enabled.
let vulkan_enabled = std::env::var("CARGO_FEATURE_GPU_VULKAN").is_ok();
println!("cargo:rerun-if-changed=extern/whisper.cpp");
if !vulkan_enabled {
println!(
"cargo:warning=gpu-vulkan feature is disabled; skipping Vulkan-dependent build steps."
);
return;
}
println!("cargo:rerun-if-changed=extern/whisper.cpp");
println!(
"cargo:warning=Building with gpu-vulkan: ensure Vulkan SDK/loader are installed. Future versions will compile whisper.cpp via CMake."
);

View File

@@ -1,34 +1,23 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
//! Transcription backend selection and implementations (CPU/GPU) used by PolyScribe.
use crate::OutputEntry;
use crate::prelude::*;
use crate::{decode_audio_to_pcm_f32_ffmpeg, find_model_file};
use anyhow::{Context, Result, anyhow};
use anyhow::{Context, anyhow};
use std::env;
use std::path::Path;
// Re-export a public enum for CLI parsing usage
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// Kind of transcription backend to use.
pub enum BackendKind {
/// Automatically detect the best available backend (CUDA > HIP > Vulkan > CPU).
Auto,
/// Pure CPU backend using whisper-rs.
Cpu,
/// NVIDIA CUDA backend (requires CUDA runtime available at load time and proper feature build).
Cuda,
/// AMD ROCm/HIP backend (requires hip/rocBLAS libraries available and proper feature build).
Hip,
/// Vulkan backend (experimental; requires Vulkan loader/SDK and feature build).
Vulkan,
}
/// Abstraction for a transcription backend.
pub trait TranscribeBackend {
/// Backend kind implemented by this type.
fn kind(&self) -> BackendKind;
/// Transcribe the given audio and return transcript entries.
fn transcribe(
&self,
audio_path: &Path,
@@ -39,15 +28,13 @@ pub trait TranscribeBackend {
) -> Result<Vec<OutputEntry>>;
}
fn check_lib(_names: &[&str]) -> bool {
fn is_library_available(_names: &[&str]) -> bool {
#[cfg(test)]
{
// During unit tests, avoid touching system libs to prevent loader crashes in CI.
false
}
#[cfg(not(test))]
{
// Disabled runtime dlopen probing to avoid loader instability; rely on environment overrides.
false
}
}
@@ -56,7 +43,7 @@ fn cuda_available() -> bool {
if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_CUDA") {
return x == "1";
}
check_lib(&[
is_library_available(&[
"libcudart.so",
"libcudart.so.12",
"libcudart.so.11",
@@ -69,33 +56,31 @@ fn hip_available() -> bool {
if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_HIP") {
return x == "1";
}
check_lib(&["libhipblas.so", "librocblas.so"])
is_library_available(&["libhipblas.so", "librocblas.so"])
}
fn vulkan_available() -> bool {
if let Ok(x) = env::var("POLYSCRIBE_TEST_FORCE_VULKAN") {
return x == "1";
}
check_lib(&["libvulkan.so.1", "libvulkan.so"])
is_library_available(&["libvulkan.so.1", "libvulkan.so"])
}
/// CPU-based transcription backend using whisper-rs.
#[derive(Default)]
pub struct CpuBackend;
/// CUDA-accelerated transcription backend for NVIDIA GPUs.
#[derive(Default)]
pub struct CudaBackend;
/// ROCm/HIP-accelerated transcription backend for AMD GPUs.
#[derive(Default)]
pub struct HipBackend;
/// Vulkan-based transcription backend (experimental/incomplete).
#[derive(Default)]
pub struct VulkanBackend;
macro_rules! impl_whisper_backend {
($ty:ty, $kind:expr) => {
impl TranscribeBackend for $ty {
fn kind(&self) -> BackendKind { $kind }
fn kind(&self) -> BackendKind {
$kind
}
fn transcribe(
&self,
audio_path: &Path,
@@ -128,29 +113,17 @@ impl TranscribeBackend for VulkanBackend {
) -> Result<Vec<OutputEntry>> {
Err(anyhow!(
"Vulkan backend not yet wired to whisper.cpp FFI. Build with --features gpu-vulkan and ensure Vulkan SDK is installed. How to fix: install Vulkan loader (libvulkan), set VULKAN_SDK, and run cargo build --features gpu-vulkan."
))
).into())
}
}
/// Result of choosing a transcription backend.
pub struct SelectionResult {
/// The constructed backend instance to perform transcription with.
pub struct BackendSelection {
pub backend: Box<dyn TranscribeBackend + Send + Sync>,
/// Which backend kind was ultimately selected.
pub chosen: BackendKind,
/// Which backend kinds were detected as available on this system.
pub detected: Vec<BackendKind>,
}
/// Select an appropriate backend based on user request and system detection.
///
/// If `requested` is `BackendKind::Auto`, the function prefers CUDA, then HIP,
/// then Vulkan, falling back to CPU when no GPU backend is detected. When a
/// specific GPU backend is requested but unavailable, an error is returned with
/// guidance on how to enable it.
///
/// Set `verbose` to true to print detection/selection info to stderr.
pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<SelectionResult> {
pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<BackendSelection> {
let mut detected = Vec::new();
if cuda_available() {
detected.push(BackendKind::Cuda);
@@ -164,11 +137,11 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
let instantiate_backend = |k: BackendKind| -> Box<dyn TranscribeBackend + Send + Sync> {
match k {
BackendKind::Cpu => Box::new(CpuBackend::default()),
BackendKind::Cuda => Box::new(CudaBackend::default()),
BackendKind::Hip => Box::new(HipBackend::default()),
BackendKind::Vulkan => Box::new(VulkanBackend::default()),
BackendKind::Auto => Box::new(CpuBackend::default()), // placeholder for Auto
BackendKind::Cpu => Box::new(CpuBackend),
BackendKind::Cuda => Box::new(CudaBackend),
BackendKind::Hip => Box::new(HipBackend),
BackendKind::Vulkan => Box::new(VulkanBackend),
BackendKind::Auto => Box::new(CpuBackend),
}
};
@@ -190,7 +163,7 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
} else {
return Err(anyhow!(
"Requested CUDA backend but CUDA libraries/devices not detected. How to fix: install NVIDIA driver + CUDA toolkit, ensure libcudart/libcublas are in loader path, and build with --features gpu-cuda."
));
).into());
}
}
BackendKind::Hip => {
@@ -199,7 +172,7 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
} else {
return Err(anyhow!(
"Requested ROCm/HIP backend but libraries/devices not detected. How to fix: install ROCm hipBLAS/rocBLAS, ensure libs are in loader path, and build with --features gpu-hip."
));
).into());
}
}
BackendKind::Vulkan => {
@@ -208,7 +181,7 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
} else {
return Err(anyhow!(
"Requested Vulkan backend but libvulkan not detected. How to fix: install Vulkan loader/SDK and build with --features gpu-vulkan."
));
).into());
}
}
BackendKind::Cpu => BackendKind::Cpu,
@@ -219,14 +192,13 @@ pub fn select_backend(requested: BackendKind, verbose: bool) -> Result<Selection
crate::dlog!(1, "Selected backend: {:?}", chosen);
}
Ok(SelectionResult {
Ok(BackendSelection {
backend: instantiate_backend(chosen),
chosen,
detected,
})
}
// Internal helper: transcription using whisper-rs with CPU/GPU (depending on build features)
#[allow(clippy::too_many_arguments)]
pub(crate) fn transcribe_with_whisper_rs(
audio_path: &Path,
@@ -235,7 +207,9 @@ pub(crate) fn transcribe_with_whisper_rs(
progress: Option<&(dyn Fn(i32) + Send + Sync)>,
) -> Result<Vec<OutputEntry>> {
let report = |p: i32| {
if let Some(cb) = progress { cb(p); }
if let Some(cb) = progress {
cb(p);
}
};
report(0);
@@ -248,21 +222,21 @@ pub(crate) fn transcribe_with_whisper_rs(
.and_then(|s| s.to_str())
.map(|s| s.contains(".en.") || s.ends_with(".en.bin"))
.unwrap_or(false);
if let Some(lang) = language {
if english_only_model && lang != "en" {
return Err(anyhow!(
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
model_path.display(),
lang
));
}
if let Some(lang) = language
&& english_only_model
&& lang != "en"
{
return Err(anyhow!(
"Selected model is English-only ({}), but a non-English language hint '{}' was provided. Please use a multilingual model or set WHISPER_MODEL.",
model_path.display(),
lang
).into());
}
let model_path_str = model_path
.to_str()
.ok_or_else(|| anyhow!("Model path not valid UTF-8: {}", model_path.display()))?;
if crate::verbose_level() < 2 {
// Some builds of whisper/ggml expect these env vars; harmless if unknown
unsafe {
std::env::set_var("GGML_LOG_LEVEL", "0");
std::env::set_var("WHISPER_PRINT_PROGRESS", "0");

View File

@@ -1,108 +1,104 @@
use crate::prelude::*;
use directories::ProjectDirs;
// SPDX-License-Identifier: MIT
use serde::{Deserialize, Serialize};
use std::{fs, path::PathBuf};
use std::env;
use std::path::PathBuf;
const ENV_PREFIX: &str = "POLYSCRIBE";
/// Configuration for the Polyscribe application
///
/// Contains paths to models and plugins directories that can be customized
/// through configuration files or environment variables.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
/// Directory path where ML models are stored
pub models_dir: Option<PathBuf>,
/// Directory path where plugins are stored
pub plugins_dir: Option<PathBuf>,
}
impl Default for Config {
fn default() -> Self {
Self {
models_dir: None,
plugins_dir: None,
}
}
}
/// Service for managing Polyscribe configuration
///
/// Provides functionality to load, save, and access configuration settings
/// from disk or environment variables.
pub struct ConfigService;
impl ConfigService {
/// Loads configuration from disk or returns default values if not found
///
/// This function attempts to read the configuration file from disk. If the file
/// doesn't exist or can't be parsed, it falls back to default values.
/// Environment variable overrides are then applied to the configuration.
pub fn load_or_default() -> Result<Config> {
let mut cfg = Self::read_disk().unwrap_or_default();
Self::apply_env_overrides(&mut cfg)?;
Ok(cfg)
pub const ENV_NO_CACHE_MANIFEST: &'static str = "POLYSCRIBE_NO_CACHE_MANIFEST";
pub const ENV_MANIFEST_TTL_SECONDS: &'static str = "POLYSCRIBE_MANIFEST_TTL_SECONDS";
pub const ENV_MODELS_DIR: &'static str = "POLYSCRIBE_MODELS_DIR";
pub const ENV_USER_AGENT: &'static str = "POLYSCRIBE_USER_AGENT";
pub const ENV_HTTP_TIMEOUT_SECS: &'static str = "POLYSCRIBE_HTTP_TIMEOUT_SECS";
pub const ENV_HF_REPO: &'static str = "POLYSCRIBE_HF_REPO";
pub const ENV_CACHE_FILENAME: &'static str = "POLYSCRIBE_MANIFEST_CACHE_FILENAME";
pub const DEFAULT_USER_AGENT: &'static str = "polyscribe/0.1";
pub const DEFAULT_DOWNLOADER_UA: &'static str = "polyscribe-model-downloader/1";
pub const DEFAULT_HF_REPO: &'static str = "ggerganov/whisper.cpp";
pub const DEFAULT_CACHE_FILENAME: &'static str = "hf_manifest_whisper_cpp.json";
pub const DEFAULT_HTTP_TIMEOUT_SECS: u64 = 8;
pub const DEFAULT_MANIFEST_CACHE_TTL_SECONDS: u64 = 24 * 60 * 60;
pub fn project_dirs() -> Option<directories::ProjectDirs> {
directories::ProjectDirs::from("dev", "polyscribe", "polyscribe")
}
/// Saves the configuration to disk
///
/// This function serializes the configuration to TOML format and writes it
/// to the standard configuration directory for the application.
/// Returns an error if writing fails or if project directories cannot be determined.
pub fn save(cfg: &Config) -> Result<()> {
let Some(dirs) = Self::dirs() else {
return Err(Error::Other("unable to get project dirs".into()));
};
let cfg_dir = dirs.config_dir();
fs::create_dir_all(cfg_dir)?;
let path = cfg_dir.join("config.toml");
let s = toml::to_string_pretty(cfg)?;
fs::write(path, s)?;
Ok(())
}
fn read_disk() -> Option<Config> {
let dirs = Self::dirs()?;
let path = dirs.config_dir().join("config.toml");
let s = fs::read_to_string(path).ok()?;
toml::from_str(&s).ok()
}
fn apply_env_overrides(cfg: &mut Config) -> Result<()> {
// POLYSCRIBE__SECTION__KEY format reserved for future nested config.
if let Ok(v) = std::env::var(format!("{ENV_PREFIX}_MODELS_DIR")) {
cfg.models_dir = Some(PathBuf::from(v));
}
if let Ok(v) = std::env::var(format!("{ENV_PREFIX}_PLUGINS_DIR")) {
cfg.plugins_dir = Some(PathBuf::from(v));
}
Ok(())
}
/// Returns the standard project directories for the application
///
/// This function creates a ProjectDirs instance with the appropriate
/// organization and application names for Polyscribe.
/// Returns None if the project directories cannot be determined.
pub fn dirs() -> Option<ProjectDirs> {
ProjectDirs::from("dev", "polyscribe", "polyscribe")
}
/// Returns the default directory path for storing ML models
///
/// This function determines the standard data directory for the application
/// and appends a 'models' subdirectory to it.
/// Returns None if the project directories cannot be determined.
pub fn default_models_dir() -> Option<PathBuf> {
Self::dirs().map(|d| d.data_dir().join("models"))
Self::project_dirs().map(|d| d.data_dir().join("models"))
}
/// Returns the default directory path for storing plugins
///
/// This function determines the standard data directory for the application
/// and appends a 'plugins' subdirectory to it.
/// Returns None if the project directories cannot be determined.
pub fn default_plugins_dir() -> Option<PathBuf> {
Self::dirs().map(|d| d.data_dir().join("plugins"))
Self::project_dirs().map(|d| d.data_dir().join("plugins"))
}
pub fn manifest_cache_dir() -> Option<PathBuf> {
Self::project_dirs().map(|d| d.cache_dir().join("manifest"))
}
pub fn bypass_manifest_cache() -> bool {
env::var(Self::ENV_NO_CACHE_MANIFEST).is_ok()
}
pub fn manifest_cache_ttl_seconds() -> u64 {
env::var(Self::ENV_MANIFEST_TTL_SECONDS)
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(Self::DEFAULT_MANIFEST_CACHE_TTL_SECONDS)
}
pub fn manifest_cache_filename() -> String {
env::var(Self::ENV_CACHE_FILENAME)
.unwrap_or_else(|_| Self::DEFAULT_CACHE_FILENAME.to_string())
}
pub fn models_dir(cfg: Option<&Config>) -> Option<PathBuf> {
if let Ok(env_dir) = env::var(Self::ENV_MODELS_DIR) {
if !env_dir.is_empty() {
return Some(PathBuf::from(env_dir));
}
}
if let Some(c) = cfg {
if let Some(dir) = c.models_dir.clone() {
return Some(dir);
}
}
Self::default_models_dir()
}
pub fn user_agent() -> String {
env::var(Self::ENV_USER_AGENT).unwrap_or_else(|_| Self::DEFAULT_USER_AGENT.to_string())
}
pub fn downloader_user_agent() -> String {
env::var(Self::ENV_USER_AGENT).unwrap_or_else(|_| Self::DEFAULT_DOWNLOADER_UA.to_string())
}
pub fn http_timeout_secs() -> u64 {
env::var(Self::ENV_HTTP_TIMEOUT_SECS)
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(Self::DEFAULT_HTTP_TIMEOUT_SECS)
}
pub fn hf_repo() -> String {
env::var(Self::ENV_HF_REPO).unwrap_or_else(|_| Self::DEFAULT_HF_REPO.to_string())
}
pub fn hf_api_base_for(repo: &str) -> String {
format!("https://huggingface.co/api/models/{}", repo)
}
pub fn manifest_cache_path() -> Option<PathBuf> {
let dir = Self::manifest_cache_dir()?;
Some(dir.join(Self::manifest_cache_filename()))
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Config {
pub models_dir: Option<PathBuf>,
pub plugins_dir: Option<PathBuf>,
}

View File

@@ -1,34 +1,26 @@
use thiserror::Error;
#[derive(Debug, Error)]
/// Error types for the polyscribe-core crate.
///
/// This enum represents various error conditions that can occur during
/// operations in this crate, including I/O errors, serialization/deserialization
/// errors, and environment variable access errors.
pub enum Error {
#[error("I/O error: {0}")]
/// Represents an I/O error that occurred during file or stream operations
Io(#[from] std::io::Error),
#[error("serde error: {0}")]
/// Represents a JSON serialization or deserialization error
Serde(#[from] serde_json::Error),
#[error("toml error: {0}")]
/// Represents a TOML deserialization error
Toml(#[from] toml::de::Error),
#[error("toml ser error: {0}")]
/// Represents a TOML serialization error
TomlSer(#[from] toml::ser::Error),
#[error("env var error: {0}")]
/// Represents an error that occurred during environment variable access
EnvVar(#[from] std::env::VarError),
#[error("http error: {0}")]
Http(#[from] reqwest::Error),
#[error("other: {0}")]
/// Represents a general error condition with a custom message
Other(String),
}

View File

@@ -1,18 +1,13 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
#![forbid(elided_lifetimes_in_paths)]
#![forbid(unused_must_use)]
#![deny(missing_docs)]
#![warn(clippy::all)]
//! PolyScribe library: business logic and core types.
//!
//! This crate exposes the reusable parts of the PolyScribe CLI as a library.
//! The binary entry point (main.rs) remains a thin CLI wrapper.
use std::sync::atomic::{AtomicBool, AtomicU8, Ordering};
use anyhow::{anyhow, Context, Result};
use crate::prelude::*;
use anyhow::{Context, anyhow};
use chrono::Local;
use std::env;
use std::path::{Path, PathBuf};
@@ -21,56 +16,44 @@ use std::process::Command;
#[cfg(unix)]
use libc::{O_WRONLY, close, dup, dup2, open};
/// Global runtime flags
static QUIET: AtomicBool = AtomicBool::new(false);
static NO_INTERACTION: AtomicBool = AtomicBool::new(false);
static VERBOSE: AtomicU8 = AtomicU8::new(0);
static NO_PROGRESS: AtomicBool = AtomicBool::new(false);
/// Set quiet mode: when true, non-interactive logs should be suppressed.
pub fn set_quiet(enabled: bool) {
QUIET.store(enabled, Ordering::Relaxed);
}
/// Return current quiet mode state.
pub fn is_quiet() -> bool {
QUIET.load(Ordering::Relaxed)
}
/// Set non-interactive mode: when true, interactive prompts must be skipped.
pub fn set_no_interaction(enabled: bool) {
NO_INTERACTION.store(enabled, Ordering::Relaxed);
}
/// Return current non-interactive state.
pub fn is_no_interaction() -> bool {
NO_INTERACTION.load(Ordering::Relaxed)
}
/// Set verbose level (0 = normal, 1 = verbose, 2 = super-verbose)
pub fn set_verbose(level: u8) {
VERBOSE.store(level, Ordering::Relaxed);
}
/// Get current verbose level.
pub fn verbose_level() -> u8 {
VERBOSE.load(Ordering::Relaxed)
}
/// Disable interactive progress indicators (bars/spinners)
pub fn set_no_progress(enabled: bool) {
NO_PROGRESS.store(enabled, Ordering::Relaxed);
}
/// Return current no-progress state
pub fn is_no_progress() -> bool {
NO_PROGRESS.load(Ordering::Relaxed)
}
/// Check whether stdin is connected to a TTY. Used to avoid blocking prompts when not interactive.
pub fn stdin_is_tty() -> bool {
use std::io::IsTerminal as _;
std::io::stdin().is_terminal()
}
/// A guard that temporarily redirects stderr to /dev/null on Unix when quiet mode is active.
/// No-op on non-Unix or when quiet is disabled. Restores stderr on drop.
pub struct StderrSilencer {
#[cfg(unix)]
old_stderr_fd: i32,
@@ -80,7 +63,6 @@ pub struct StderrSilencer {
}
impl StderrSilencer {
/// Activate stderr silencing if quiet is set and on Unix; otherwise returns a no-op guard.
pub fn activate_if_quiet() -> Self {
if !is_quiet() {
return Self {
@@ -94,7 +76,6 @@ impl StderrSilencer {
Self::activate()
}
/// Activate stderr silencing unconditionally (used internally); no-op on non-Unix.
pub fn activate() -> Self {
#[cfg(unix)]
unsafe {
@@ -106,7 +87,6 @@ impl StderrSilencer {
devnull_fd: -1,
};
}
// Open /dev/null for writing
let devnull_cstr = std::ffi::CString::new("/dev/null").unwrap();
let devnull_fd = open(devnull_cstr.as_ptr(), O_WRONLY);
if devnull_fd < 0 {
@@ -153,7 +133,6 @@ impl Drop for StderrSilencer {
}
}
/// Run the given closure with stderr temporarily silenced (Unix-only). Returns the closure result.
pub fn with_suppressed_stderr<F, T>(f: F) -> T
where
F: FnOnce() -> T,
@@ -164,13 +143,11 @@ where
result
}
/// Log an error line (always printed).
#[macro_export]
macro_rules! elog {
($($arg:tt)*) => {{ $crate::ui::error(format!($($arg)*)); }}
}
/// Log an informational line using the UI helper unless quiet mode is enabled.
#[macro_export]
macro_rules! ilog {
($($arg:tt)*) => {{
@@ -178,7 +155,6 @@ macro_rules! ilog {
}}
}
/// Log a debug/trace line when verbose level is at least the given level (u8).
#[macro_export]
macro_rules! dlog {
($lvl:expr, $($arg:tt)*) => {{
@@ -186,44 +162,28 @@ macro_rules! dlog {
}}
}
/// Backward-compatibility: map old qlog! to ilog!
#[macro_export]
macro_rules! qlog {
($($arg:tt)*) => {{ $crate::ilog!($($arg)*); }}
}
pub mod backend;
pub mod models;
/// Configuration handling for PolyScribe
pub mod config;
// Use the file-backed ui.rs module, which also declares its own `progress` submodule.
pub mod ui;
/// Error definitions for the PolyScribe library
pub mod models;
pub mod error;
pub mod ui;
pub use error::Error;
pub mod prelude;
/// Transcript entry for a single segment.
#[derive(Debug, serde::Serialize, Clone)]
pub struct OutputEntry {
/// Sequential id in output ordering.
pub id: u64,
/// Speaker label associated with the segment.
pub speaker: String,
/// Start time in seconds.
pub start: f64,
/// End time in seconds.
pub end: f64,
/// Text content.
pub text: String,
}
/// Return a YYYY-MM-DD date prefix string for output file naming.
pub fn date_prefix() -> String {
Local::now().format("%Y-%m-%d").to_string()
}
/// Format a floating-point number of seconds as SRT timestamp (HH:MM:SS,mmm).
pub fn format_srt_time(seconds: f64) -> String {
let total_ms = (seconds * 1000.0).round() as i64;
let ms = total_ms % 1000;
@@ -234,7 +194,6 @@ pub fn format_srt_time(seconds: f64) -> String {
format!("{hour:02}:{min:02}:{sec:02},{ms:03}")
}
/// Render a list of transcript entries to SRT format.
pub fn render_srt(entries: &[OutputEntry]) -> String {
let mut srt = String::new();
for (index, entry) in entries.iter().enumerate() {
@@ -255,7 +214,6 @@ pub fn render_srt(entries: &[OutputEntry]) -> String {
srt
}
/// Determine the default models directory, honoring POLYSCRIBE_MODELS_DIR override.
pub fn models_dir_path() -> PathBuf {
if let Ok(env_val) = env::var("POLYSCRIBE_MODELS_DIR") {
let env_path = PathBuf::from(env_val);
@@ -266,24 +224,23 @@ pub fn models_dir_path() -> PathBuf {
if cfg!(debug_assertions) {
return PathBuf::from("models");
}
if let Ok(xdg) = env::var("XDG_DATA_HOME") {
if !xdg.is_empty() {
return PathBuf::from(xdg).join("polyscribe").join("models");
}
if let Ok(xdg) = env::var("XDG_DATA_HOME")
&& !xdg.is_empty()
{
return PathBuf::from(xdg).join("polyscribe").join("models");
}
if let Ok(home) = env::var("HOME") {
if !home.is_empty() {
return PathBuf::from(home)
.join(".local")
.join("share")
.join("polyscribe")
.join("models");
}
if let Ok(home) = env::var("HOME")
&& !home.is_empty()
{
return PathBuf::from(home)
.join(".local")
.join("share")
.join("polyscribe")
.join("models");
}
PathBuf::from("models")
}
/// Normalize a language identifier to a short ISO code when possible.
pub fn normalize_lang_code(input: &str) -> Option<String> {
let mut lang = input.trim().to_lowercase();
if lang.is_empty() || lang == "auto" || lang == "c" || lang == "posix" {
@@ -355,47 +312,48 @@ pub fn normalize_lang_code(input: &str) -> Option<String> {
Some(code.to_string())
}
/// Find the Whisper model file path to use.
pub fn find_model_file() -> Result<PathBuf> {
// 1) Explicit override via environment
if let Ok(path) = env::var("WHISPER_MODEL") {
let p = PathBuf::from(path);
if !p.exists() {
return Err(anyhow!(
"WHISPER_MODEL points to a non-existing path: {}",
p.display()
));
)
.into());
}
if !p.is_file() {
return Err(anyhow!(
"WHISPER_MODEL must point to a file, but is not: {}",
p.display()
));
)
.into());
}
return Ok(p);
}
// 2) Resolve models directory and ensure it exists and is a directory
let models_dir = models_dir_path();
if models_dir.exists() && !models_dir.is_dir() {
return Err(anyhow!(
"Models path exists but is not a directory: {}",
models_dir.display()
));
)
.into());
}
std::fs::create_dir_all(&models_dir).with_context(|| {
format!("Failed to ensure models dir exists: {}", models_dir.display())
format!(
"Failed to ensure models dir exists: {}",
models_dir.display()
)
})?;
// 3) Gather candidate .bin files (regular files only), prefer largest
let mut candidates = Vec::new();
for entry in std::fs::read_dir(&models_dir).with_context(|| {
format!("Failed to read models dir: {}", models_dir.display())
})? {
for entry in std::fs::read_dir(&models_dir)
.with_context(|| format!("Failed to read models dir: {}", models_dir.display()))?
{
let entry = entry?;
let path = entry.path();
// Only consider .bin files
let is_bin = path
.extension()
.and_then(|s| s.to_str())
@@ -404,7 +362,6 @@ pub fn find_model_file() -> Result<PathBuf> {
continue;
}
// Only consider regular files
let md = match std::fs::metadata(&path) {
Ok(m) if m.is_file() => m,
_ => continue,
@@ -414,7 +371,6 @@ pub fn find_model_file() -> Result<PathBuf> {
}
if candidates.is_empty() {
// 4) Fallback to known tiny English model if present
let fallback = models_dir.join("ggml-tiny.en.bin");
if fallback.is_file() {
return Ok(fallback);
@@ -423,7 +379,8 @@ pub fn find_model_file() -> Result<PathBuf> {
"No Whisper model files (*.bin) found in {}. \
Please download a model or set WHISPER_MODEL.",
models_dir.display()
));
)
.into());
}
candidates.sort_by_key(|(size, _)| *size);
@@ -431,19 +388,16 @@ pub fn find_model_file() -> Result<PathBuf> {
Ok(path)
}
/// Decode an audio file into PCM f32 samples using ffmpeg (ffmpeg executable required).
pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
let in_path = audio_path
.to_str()
.ok_or_else(|| anyhow!("Audio path must be valid UTF-8: {}", audio_path.display()))?;
// Use a raw f32le file to match the -f f32le output format.
let tmp_raw = std::env::temp_dir().join("polyscribe_tmp_input.f32le");
let tmp_raw_str = tmp_raw
.to_str()
.ok_or_else(|| anyhow!("Temp path not valid UTF-8: {}", tmp_raw.display()))?;
// ffmpeg -i input -f f32le -ac 1 -ar 16000 -y /tmp/tmp.f32le
let status = Command::new("ffmpeg")
.arg("-hide_banner")
.arg("-loglevel")
@@ -465,21 +419,17 @@ pub fn decode_audio_to_pcm_f32_ffmpeg(audio_path: &Path) -> Result<Vec<f32>> {
return Err(anyhow!(
"ffmpeg exited with non-zero status when decoding {}",
in_path
));
)
.into());
}
let raw = std::fs::read(&tmp_raw)
.with_context(|| format!("Failed to read temp PCM file: {}", tmp_raw.display()))?;
// Best-effort cleanup of the temp file
let _ = std::fs::remove_file(&tmp_raw);
// Interpret raw bytes as f32 little-endian
if raw.len() % 4 != 0 {
return Err(anyhow!(
"Decoded PCM file length not multiple of 4: {}",
raw.len()
));
return Err(anyhow!("Decoded PCM file length not multiple of 4: {}", raw.len()).into());
}
let mut samples = Vec::with_capacity(raw.len() / 4);
for chunk in raw.chunks_exact(4) {

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +1,7 @@
// rust
//! Commonly used exports for convenient glob-imports in binaries and tests.
//! Usage: `use polyscribe_core::prelude::*;`
pub use crate::backend::*;
pub use crate::config::*;
pub use crate::error::Error;
pub use crate::models::*;
// If you frequently use UI helpers across binaries/tests, export them too.
// Keep this lean to avoid pulling UI everywhere unintentionally.
#[allow(unused_imports)]
pub use crate::ui::*;
/// A convenient alias for `std::result::Result` with the error type defaulting to [`Error`].
pub type Result<T, E = Error> = std::result::Result<T, E>;

View File

@@ -1,79 +1,76 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
//! UI helpers powered by cliclack for interactive console experiences.
//! Centralizes prompts, logging, and progress primitives.
/// Progress indicators and reporting tools for displaying task completion.
pub mod progress;
use std::io;
use std::io::IsTerminal;
/// Log an informational message.
pub fn info(msg: impl AsRef<str>) {
let m = msg.as_ref();
let _ = cliclack::log::info(m);
}
/// Log a warning message.
pub fn warn(msg: impl AsRef<str>) {
let m = msg.as_ref();
let _ = cliclack::log::warning(m);
}
/// Log an error message.
pub fn error(msg: impl AsRef<str>) {
let m = msg.as_ref();
let _ = cliclack::log::error(m);
}
/// Log a success message.
pub fn success(msg: impl AsRef<str>) {
let m = msg.as_ref();
let _ = cliclack::log::success(m);
}
/// Log a note message with a prompt and a message.
pub fn note(prompt: impl AsRef<str>, message: impl AsRef<str>) {
let _ = cliclack::note(prompt.as_ref(), message.as_ref());
}
/// Print a short intro header.
pub fn intro(title: impl AsRef<str>) {
let _ = cliclack::intro(title.as_ref());
}
/// Print a short outro footer.
pub fn outro(msg: impl AsRef<str>) {
let _ = cliclack::outro(msg.as_ref());
}
/// Print a line that should appear above any progress indicators.
pub fn println_above_bars(line: impl AsRef<str>) {
let _ = cliclack::log::info(line.as_ref());
}
/// Prompt for input on stdin using cliclack's input component.
/// Returns default if provided and user enters empty string.
/// In non-interactive workflows, callers should skip prompt based on their flags.
pub fn prompt_input(prompt: &str, default: Option<&str>) -> io::Result<String> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Ok(default.unwrap_or("").to_string());
}
let mut q = cliclack::input(prompt);
if let Some(def) = default { q = q.default_input(def); }
q.interact().map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))
if let Some(def) = default {
q = q.default_input(def);
}
q.interact().map_err(|e| io::Error::other(e.to_string()))
}
/// Present a single-choice selector and return the selected index.
pub fn prompt_select<'a>(prompt: &str, items: &[&'a str]) -> io::Result<usize> {
pub fn prompt_select(prompt: &str, items: &[&str]) -> io::Result<usize> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Err(io::Error::other("interactive prompt disabled"));
}
let mut sel = cliclack::select::<usize>(prompt);
for (idx, label) in items.iter().enumerate() {
sel = sel.item(idx, *label, "");
}
sel.interact()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))
sel.interact().map_err(|e| io::Error::other(e.to_string()))
}
/// Present a multi-choice selector and return indices of selected items.
pub fn prompt_multi_select<'a>(prompt: &str, items: &[&'a str], defaults: Option<&[bool]>) -> io::Result<Vec<usize>> {
pub fn prompt_multi_select(
prompt: &str,
items: &[&str],
defaults: Option<&[bool]>,
) -> io::Result<Vec<usize>> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Err(io::Error::other("interactive prompt disabled"));
}
let mut ms = cliclack::multiselect::<usize>(prompt);
for (idx, label) in items.iter().enumerate() {
ms = ms.item(idx, *label, "");
@@ -88,37 +85,130 @@ pub fn prompt_multi_select<'a>(prompt: &str, items: &[&'a str], defaults: Option
ms = ms.initial_values(selected);
}
}
ms.interact()
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))
ms.interact().map_err(|e| io::Error::other(e.to_string()))
}
pub fn prompt_confirm(prompt: &str, default: bool) -> io::Result<bool> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Ok(default);
}
let mut q = cliclack::confirm(prompt);
q.interact().map_err(|e| io::Error::other(e.to_string()))
}
pub fn prompt_password(prompt: &str) -> io::Result<String> {
if crate::is_no_interaction() || !crate::stdin_is_tty() {
return Err(io::Error::other(
"password prompt disabled in non-interactive mode",
));
}
let mut q = cliclack::password(prompt);
q.interact().map_err(|e| io::Error::other(e.to_string()))
}
pub fn prompt_input_validated<F>(
prompt: &str,
default: Option<&str>,
validate: F,
) -> io::Result<String>
where
F: Fn(&str) -> Result<(), String> + 'static,
{
if crate::is_no_interaction() || !crate::stdin_is_tty() {
if let Some(def) = default {
return Ok(def.to_string());
}
return Err(io::Error::other("interactive prompt disabled"));
}
let mut q = cliclack::input(prompt);
if let Some(def) = default {
q = q.default_input(def);
}
q.validate(move |s: &String| validate(s))
.interact()
.map_err(|e| io::Error::other(e.to_string()))
}
/// A simple spinner wrapper built on top of `cliclack::spinner()`.
///
/// This wrapper provides a minimal API with start/stop/success/error methods
/// to standardize spinner usage across the project.
pub struct Spinner(cliclack::ProgressBar);
impl Spinner {
/// Creates and starts a new spinner with the provided status text.
pub fn start(text: impl AsRef<str>) -> Self {
let s = cliclack::spinner();
s.start(text.as_ref());
Self(s)
if crate::is_no_progress() || crate::is_no_interaction() || !std::io::stderr().is_terminal()
{
let _ = cliclack::log::info(text.as_ref());
let s = cliclack::spinner();
Self(s)
} else {
let s = cliclack::spinner();
s.start(text.as_ref());
Self(s)
}
}
/// Stops the spinner with a submitted/completed style and message.
pub fn stop(self, text: impl AsRef<str>) {
let s = self.0;
s.stop(text.as_ref());
if crate::is_no_progress() {
let _ = cliclack::log::info(text.as_ref());
} else {
s.stop(text.as_ref());
}
}
/// Marks the spinner as successfully finished (alias for `stop`).
pub fn success(self, text: impl AsRef<str>) {
let s = self.0;
// cliclack progress bar uses `stop` for successful completion styling
s.stop(text.as_ref());
if crate::is_no_progress() {
let _ = cliclack::log::success(text.as_ref());
} else {
s.stop(text.as_ref());
}
}
/// Marks the spinner as failed with an error style and message.
pub fn error(self, text: impl AsRef<str>) {
let s = self.0;
s.error(text.as_ref());
if crate::is_no_progress() {
let _ = cliclack::log::error(text.as_ref());
} else {
s.error(text.as_ref());
}
}
}
pub struct BytesProgress(Option<cliclack::ProgressBar>);
impl BytesProgress {
pub fn start(total: u64, text: &str, initial: u64) -> Self {
if crate::is_no_progress()
|| crate::is_no_interaction()
|| !std::io::stderr().is_terminal()
|| total == 0
{
let _ = cliclack::log::info(text);
return Self(None);
}
let b = cliclack::progress_bar(total);
b.start(text);
if initial > 0 {
b.inc(initial);
}
Self(Some(b))
}
pub fn inc(&mut self, delta: u64) {
if let Some(b) = self.0.as_mut() {
b.inc(delta);
}
}
pub fn stop(mut self, text: &str) {
if let Some(b) = self.0.take() {
b.stop(text);
} else {
let _ = cliclack::log::info(text);
}
}
pub fn error(mut self, text: &str) {
if let Some(b) = self.0.take() {
b.error(text);
} else {
let _ = cliclack::log::error(text);
}
}
}

View File

@@ -1,104 +1,109 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 <COPYRIGHT HOLDER>. All rights reserved.
use std::io::IsTerminal as _;
/// Manages a set of per-file progress bars plus a top aggregate bar using cliclack.
pub struct ProgressManager {
pub struct FileProgress {
enabled: bool,
per: Vec<cliclack::ProgressBar>,
total: Option<cliclack::ProgressBar>,
file_bars: Vec<cliclack::ProgressBar>,
total_bar: Option<cliclack::ProgressBar>,
completed: usize,
total_len: usize,
total_file_count: usize,
}
impl ProgressManager {
/// Create a new manager with the given enabled flag.
impl FileProgress {
pub fn new(enabled: bool) -> Self {
Self { enabled, per: Vec::new(), total: None, completed: 0, total_len: 0 }
Self {
enabled,
file_bars: Vec::new(),
total_bar: None,
completed: 0,
total_file_count: 0,
}
}
/// Create a manager that enables bars when `n > 1`, stderr is a TTY, and not quiet.
pub fn default_for_files(n: usize) -> Self {
let enabled = n > 1 && std::io::stderr().is_terminal() && !crate::is_quiet() && !crate::is_no_progress();
pub fn default_for_files(file_count: usize) -> Self {
let enabled = file_count > 1
&& std::io::stderr().is_terminal()
&& !crate::is_quiet()
&& !crate::is_no_progress();
Self::new(enabled)
}
/// Initialize bars for the given file labels. If disabled or single file, no-op.
pub fn init_files(&mut self, labels: &[String]) {
self.total_len = labels.len();
self.total_file_count = labels.len();
if !self.enabled || labels.len() <= 1 {
// No bars in single-file mode or when disabled
self.enabled = false;
return;
}
// Aggregate bar at the top
let mut total = cliclack::progress_bar(labels.len() as u64);
let total = cliclack::progress_bar(labels.len() as u64);
total.start("Total");
self.total = Some(total);
// Per-file bars (100% scale for each)
self.total_bar = Some(total);
for label in labels {
let mut pb = cliclack::progress_bar(100);
let pb = cliclack::progress_bar(100);
pb.start(label);
self.per.push(pb);
self.file_bars.push(pb);
}
}
/// Returns true when bars are enabled (multi-file TTY mode).
pub fn is_enabled(&self) -> bool { self.enabled }
pub fn is_enabled(&self) -> bool {
self.enabled
}
/// Update a per-file bar message.
pub fn set_per_message(&mut self, idx: usize, message: &str) {
if !self.enabled { return; }
if let Some(pb) = self.per.get_mut(idx) {
pub fn set_file_message(&mut self, idx: usize, message: &str) {
if !self.enabled {
return;
}
if let Some(pb) = self.file_bars.get_mut(idx) {
pb.set_message(message);
}
}
/// Update a per-file bar percent (0..=100).
pub fn set_per_percent(&mut self, idx: usize, percent: u64) {
if !self.enabled { return; }
if let Some(pb) = self.per.get_mut(idx) {
pub fn set_file_percent(&mut self, idx: usize, percent: u64) {
if !self.enabled {
return;
}
if let Some(pb) = self.file_bars.get_mut(idx) {
let p = percent.min(100);
pb.set_message(&format!("{p}%"));
pb.set_message(format!("{p}%"));
}
}
/// Mark a file as finished (set to 100% and update total counter).
pub fn mark_file_done(&mut self, idx: usize) {
if !self.enabled { return; }
if let Some(pb) = self.per.get_mut(idx) {
if !self.enabled {
return;
}
if let Some(pb) = self.file_bars.get_mut(idx) {
pb.stop("done");
}
self.completed += 1;
if let Some(total) = &mut self.total {
if let Some(total) = &mut self.total_bar {
total.inc(1);
if self.completed >= self.total_len {
if self.completed >= self.total_file_count {
total.stop("all done");
}
}
}
/// Finish the aggregate bar with a custom message.
pub fn finish_total(&mut self, message: &str) {
if !self.enabled { return; }
if let Some(total) = &mut self.total {
if !self.enabled {
return;
}
if let Some(total) = &mut self.total_bar {
total.stop(message);
}
}
}
/// A simple reporter for displaying progress messages using cliclack logging.
#[derive(Debug)]
pub struct ProgressReporter {
non_interactive: bool,
}
impl ProgressReporter {
/// Creates a new progress reporter.
pub fn new(non_interactive: bool) -> Self { Self { non_interactive } }
pub fn new(non_interactive: bool) -> Self {
Self { non_interactive }
}
/// Displays a progress step message.
pub fn step(&mut self, message: &str) {
if self.non_interactive {
let _ = cliclack::log::info(format!("[..] {message}"));
@@ -107,7 +112,6 @@ impl ProgressReporter {
}
}
/// Displays a completion message.
pub fn finish_with_message(&mut self, message: &str) {
if self.non_interactive {
let _ = cliclack::log::info(format!("[ok] {message}"));

View File

@@ -9,3 +9,4 @@ serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.142"
tokio = { version = "1.47.1", features = ["rt-multi-thread", "process", "io-util"] }
which = "6.0.3"
directories = { workspace = true }

View File

@@ -1,8 +1,7 @@
use anyhow::{Context, Result};
use serde::Deserialize;
use std::process::Stdio;
use std::{
env,
fs,
env, fs,
os::unix::fs::PermissionsExt,
path::Path,
};
@@ -10,7 +9,6 @@ use tokio::{
io::{AsyncBufReadExt, BufReader},
process::{Child as TokioChild, Command},
};
use std::process::Stdio;
#[derive(Debug, Clone)]
pub struct PluginInfo {
@@ -25,27 +23,19 @@ impl PluginManager {
pub fn list(&self) -> Result<Vec<PluginInfo>> {
let mut plugins = Vec::new();
// Scan PATH entries for executables starting with "polyscribe-plugin-"
if let Ok(path) = env::var("PATH") {
for dir in env::split_paths(&path) {
if let Ok(read_dir) = fs::read_dir(&dir) {
for entry in read_dir.flatten() {
let path = entry.path();
if let Some(fname) = path.file_name().and_then(|s| s.to_str()) {
if fname.starts_with("polyscribe-plugin-") && is_executable(&path) {
let name = fname.trim_start_matches("polyscribe-plugin-").to_string();
plugins.push(PluginInfo {
name,
path: path.to_string_lossy().to_string(),
});
}
}
}
}
scan_dir_for_plugins(&dir, &mut plugins);
}
}
// TODO: also scan XDG data plugins dir for symlinks/binaries
if let Some(dirs) = directories::ProjectDirs::from("dev", "polyscribe", "polyscribe") {
let plugin_dir = dirs.data_dir().join("plugins");
scan_dir_for_plugins(&plugin_dir, &mut plugins);
}
plugins.sort_by(|a, b| a.path.cmp(&b.path));
plugins.dedup_by(|a, b| a.path == b.path);
Ok(plugins)
}
@@ -89,7 +79,8 @@ impl PluginManager {
fn resolve(&self, name: &str) -> Result<String> {
let bin = format!("polyscribe-plugin-{name}");
let path = which::which(&bin).with_context(|| format!("plugin not found in PATH: {bin}"))?;
let path =
which::which(&bin).with_context(|| format!("plugin not found in PATH: {bin}"))?;
Ok(path.to_string_lossy().to_string())
}
}
@@ -102,17 +93,27 @@ fn is_executable(path: &Path) -> bool {
{
if let Ok(meta) = fs::metadata(path) {
let mode = meta.permissions().mode();
// if any execute bit is set
return mode & 0o111 != 0;
}
}
// Fallback for non-unix (treat files as candidates)
true
}
#[allow(dead_code)]
#[derive(Debug, Deserialize)]
struct Capability {
command: String,
summary: String,
fn scan_dir_for_plugins(dir: &Path, out: &mut Vec<PluginInfo>) {
if let Ok(read_dir) = fs::read_dir(dir) {
for entry in read_dir.flatten() {
let path = entry.path();
if let Some(fname) = path.file_name().and_then(|s| s.to_str())
&& fname.starts_with("polyscribe-plugin-")
&& is_executable(&path)
{
let name = fname.trim_start_matches("polyscribe-plugin-").to_string();
out.push(PluginInfo {
name,
path: path.to_string_lossy().to_string(),
});
}
}
}
}

View File

@@ -32,18 +32,20 @@ Run locally
Models during development
- Interactive downloader:
- cargo run -- --download-models
- cargo run -- models download
- Non-interactive update (checks sizes/hashes, downloads if missing):
- cargo run -- --update-models --no-interaction -q
- cargo run -- models update --no-interaction -q
Tests
- Run all tests:
- cargo test
- The test suite includes CLI-oriented integration tests and unit tests. Some tests simulate GPU detection using env vars (POLYSCRIBE_TEST_FORCE_*). Do not rely on these flags in production code.
Clippy
Clippy & formatting
- Run lint checks and treat warnings as errors:
- cargo clippy --all-targets -- -D warnings
- Check formatting:
- cargo fmt --all -- --check
- Common warnings can often be fixed by simplifying code, removing unused imports, and following idiomatic patterns.
Code layout
@@ -61,10 +63,10 @@ Adding a feature
Running the model downloader
- Interactive:
- cargo run -- --download-models
- cargo run -- models download
- Non-interactive suggestions for CI:
- POLYSCRIBE_MODELS_DIR=$PWD/models \
cargo run -- --update-models --no-interaction -q
cargo run -- models update --no-interaction -q
Env var examples for local testing
- Use a local copy of models and a specific model file:

View File

@@ -30,10 +30,10 @@ CLI reference
- Choose runtime backend. Default is auto (prefers CUDA → HIP → Vulkan → CPU), depending on detection.
- --gpu-layers N
- Number of layers to offload to the GPU when supported.
- --download-models
- models download
- Launch interactive model downloader (lists Hugging Face models; multi-select to download).
- Controls: Use Up/Down to navigate, Space to toggle selections, and Enter to confirm. Models are grouped by base (e.g., tiny, base, small).
- --update-models
- models update
- Verify/update local models by comparing sizes and hashes with the upstream manifest.
- -v, --verbose (repeatable)
- Increase log verbosity; use -vv for very detailed logs.
@@ -42,6 +42,9 @@ CLI reference
- --no-interaction
- Disable all interactive prompts (for CI). Combine with env vars to control behavior.
- Subcommands:
- models download: Launch interactive model downloader.
- models update: Verify/update local models (non-interactive).
- plugins list|info|run: Discover and run plugins.
- completions <shell>: Write shell completion script to stdout.
- man: Write a man page to stdout.

View File

@@ -1,5 +1,4 @@
// SPDX-License-Identifier: MIT
// Stub plugin: tubescribe
use anyhow::{Context, Result};
use clap::Parser;
@@ -36,7 +35,6 @@ fn main() -> Result<()> {
serve_once()?;
return Ok(());
}
// Default: show capabilities (friendly behavior if run without flags)
let caps = psp::Capabilities {
name: "tubescribe".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
@@ -49,14 +47,12 @@ fn main() -> Result<()> {
}
fn serve_once() -> Result<()> {
// Read exactly one line (one request)
let stdin = std::io::stdin();
let mut reader = BufReader::new(stdin.lock());
let mut line = String::new();
reader.read_line(&mut line).context("failed to read request line")?;
let req: psp::JsonRpcRequest = serde_json::from_str(line.trim()).context("invalid JSON-RPC request")?;
// Simulate doing some work with progress
emit(&psp::StreamItem::progress(5, Some("start".into()), Some("initializing".into())))?;
std::thread::sleep(std::time::Duration::from_millis(50));
emit(&psp::StreamItem::progress(25, Some("probe".into()), Some("probing sources".into())))?;
@@ -65,7 +61,6 @@ fn serve_once() -> Result<()> {
std::thread::sleep(std::time::Duration::from_millis(50));
emit(&psp::StreamItem::progress(90, Some("finalize".into()), Some("finalizing".into())))?;
// Handle method and produce result
let result = match req.method.as_str() {
"generate_metadata" => {
let title = "Canned title";
@@ -78,7 +73,6 @@ fn serve_once() -> Result<()> {
})
}
other => {
// Unknown method
let err = psp::StreamItem::err(req.id.clone(), -32601, format!("Method not found: {}", other), None);
emit(&err)?;
return Ok(());