Initial project documentation for workflow-miner — a Rust CLI + zsh plugin that mines recurring command workflows from Atuin shell history.
368 lines
13 KiB
Markdown
368 lines
13 KiB
Markdown
# Architecture
|
|
|
|
## Overview
|
|
|
|
A Rust CLI + zsh plugin that reads Atuin's shell history SQLite database, mines recurring multi-step command workflows using sequential pattern mining, and surfaces them as reusable recipes.
|
|
|
|
## System Diagram
|
|
|
|
```
|
|
┌─────────────────────────────────────────────────────┐
|
|
│ CLI binary (wfm) │
|
|
├──────────┬──────────┬───────────┬───────────────────┤
|
|
│ ingest │ mine │ store │ output │
|
|
│ │ │ │ │
|
|
│ read │ prefix │ own │ ┌─ tui dashboard │
|
|
│ atuin │ span │ sqlite │ ├─ zsh widget │
|
|
│ sqlite │ + BIDE │ for │ ├─ justfile gen │
|
|
│ (r/o) │ │ patterns │ ├─ json/yaml │
|
|
│ │ cluster │ │ └─ llm describe │
|
|
│ abstract│ │ │ (optional) │
|
|
│ commands│ │ │ │
|
|
└──────────┴──────────┴───────────┴───────────────────┘
|
|
▲ │
|
|
│ ▼
|
|
~/.local/share/atuin/history.db ~/.config/wfm/
|
|
~/.local/share/wfm/
|
|
workflows.db
|
|
justfile (gen)
|
|
```
|
|
|
|
## Crate Structure (Cargo workspace)
|
|
|
|
```
|
|
wfm/
|
|
├── Cargo.toml # workspace root
|
|
├── crates/
|
|
│ ├── wfm-core/ # domain logic, no I/O
|
|
│ │ ├── src/
|
|
│ │ │ ├── lib.rs
|
|
│ │ │ ├── abstraction.rs # command normalization
|
|
│ │ │ ├── prefixspan.rs # PrefixSpan algorithm
|
|
│ │ │ ├── clustering.rs # similar pattern dedup
|
|
│ │ │ ├── context.rs # project/cwd/git inference
|
|
│ │ │ └── workflow.rs # Workflow struct + scoring
|
|
│ │ └── Cargo.toml
|
|
│ ├── wfm-store/ # persistence (read atuin, write own db)
|
|
│ │ ├── src/
|
|
│ │ │ ├── lib.rs
|
|
│ │ │ ├── atuin.rs # read atuin history.db
|
|
│ │ │ ├── workflows.rs # read/write workflows.db
|
|
│ │ │ └── models.rs # shared DB models
|
|
│ │ └── Cargo.toml
|
|
│ ├── wfm-cli/ # CLI binary
|
|
│ │ ├── src/
|
|
│ │ │ ├── main.rs
|
|
│ │ │ ├── commands/
|
|
│ │ │ │ ├── mod.rs
|
|
│ │ │ │ ├── scan.rs # mine workflows from history
|
|
│ │ │ │ ├── list.rs # list discovered workflows
|
|
│ │ │ │ ├── show.rs # show workflow detail
|
|
│ │ │ │ ├── export.rs # justfile / json / yaml export
|
|
│ │ │ │ ├── suggest.rs # suggest next command (for zsh)
|
|
│ │ │ │ ├── forget.rs # dismiss/hide a workflow
|
|
│ │ │ │ └── describe.rs # LLM naming (optional)
|
|
│ │ │ └── tui/
|
|
│ │ │ ├── mod.rs
|
|
│ │ │ ├── app.rs # ratatui app state
|
|
│ │ │ ├── dashboard.rs # main view
|
|
│ │ │ ├── detail.rs # workflow detail view
|
|
│ │ │ └── export.rs # export dialog
|
|
│ │ └── Cargo.toml
|
|
│ └── wfm-zsh/ # zsh plugin (shell scripts, not Rust)
|
|
│ ├── wfm.plugin.zsh # main plugin entry
|
|
│ ├── functions/
|
|
│ │ └── _wfm # completions
|
|
│ └── README.md
|
|
└── README.md
|
|
```
|
|
|
|
## Core Data Flow
|
|
|
|
### Phase 1: Ingest
|
|
|
|
```
|
|
atuin history.db
|
|
│
|
|
▼
|
|
SELECT command, timestamp, cwd, session, duration, exit
|
|
FROM history WHERE deleted_at IS NULL
|
|
ORDER BY session, timestamp
|
|
│
|
|
▼
|
|
Group by session → Vec<Session { commands: Vec<Entry> }>
|
|
│
|
|
▼
|
|
Abstract each command:
|
|
"git commit -m 'fix typo'" → ["git", "commit", "-m", "<arg>"]
|
|
"docker compose up -d" → ["docker", "compose", "up", "-d"]
|
|
"cargo test -- --nocapture"→ ["cargo", "test", "--", "<arg>"]
|
|
│
|
|
▼
|
|
Optionally enrich with context:
|
|
- infer project root from cwd (walk up for .git, Cargo.toml, package.json)
|
|
- record git branch if .git exists (shell out to git rev-parse)
|
|
```
|
|
|
|
### Phase 2: Mine
|
|
|
|
```
|
|
Abstracted session sequences
|
|
│
|
|
▼
|
|
PrefixSpan(min_support=2, max_gap=5, min_length=2, max_length=20)
|
|
│
|
|
▼
|
|
Frequent sequential patterns with support counts
|
|
│
|
|
▼
|
|
BIDE filter → closed patterns only (remove redundant sub-patterns)
|
|
│
|
|
▼
|
|
Cluster similar patterns (edit distance on abstract command sequences)
|
|
│
|
|
▼
|
|
Score patterns:
|
|
- frequency (how often)
|
|
- recency (when last seen)
|
|
- consistency (same order every time? or variations?)
|
|
- context specificity (always in same cwd/project? or global?)
|
|
│
|
|
▼
|
|
Ranked list of Workflow candidates
|
|
```
|
|
|
|
### Phase 3: Store
|
|
|
|
Own SQLite database at `~/.local/share/wfm/workflows.db`:
|
|
|
|
```sql
|
|
CREATE TABLE workflows (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT, -- auto-generated or LLM-named
|
|
description TEXT, -- optional, LLM-generated
|
|
commands TEXT NOT NULL, -- JSON array of abstracted commands
|
|
raw_example TEXT NOT NULL, -- JSON: one concrete instance
|
|
support INTEGER NOT NULL, -- how many sessions contain this
|
|
first_seen INTEGER NOT NULL, -- timestamp
|
|
last_seen INTEGER NOT NULL, -- timestamp
|
|
avg_duration INTEGER, -- total workflow duration
|
|
contexts TEXT, -- JSON array of {cwd, project, branch}
|
|
score REAL NOT NULL, -- composite ranking score
|
|
status TEXT DEFAULT 'active', -- active | dismissed | exported
|
|
created_at INTEGER NOT NULL,
|
|
updated_at INTEGER NOT NULL
|
|
);
|
|
|
|
CREATE TABLE workflow_instances (
|
|
id INTEGER PRIMARY KEY,
|
|
workflow_id INTEGER REFERENCES workflows(id),
|
|
session TEXT NOT NULL,
|
|
timestamp INTEGER NOT NULL,
|
|
commands TEXT NOT NULL, -- JSON: concrete commands for this instance
|
|
cwd TEXT,
|
|
duration INTEGER
|
|
);
|
|
|
|
CREATE INDEX idx_workflows_score ON workflows(score DESC);
|
|
CREATE INDEX idx_workflows_status ON workflows(status);
|
|
CREATE INDEX idx_instances_workflow ON workflow_instances(workflow_id);
|
|
```
|
|
|
|
### Phase 4: Output
|
|
|
|
#### CLI commands
|
|
|
|
```
|
|
wfm scan # mine workflows from atuin history
|
|
wfm scan --since 30d # only recent history
|
|
wfm scan --project . # only current project context
|
|
wfm list # list discovered workflows, ranked
|
|
wfm list --context . # workflows relevant to current dir
|
|
wfm show <id> # detail view of a workflow
|
|
wfm export justfile # generate Justfile from top workflows
|
|
wfm export json # JSON catalog
|
|
wfm export yaml # YAML catalog
|
|
wfm suggest # what should I do next? (context-aware)
|
|
wfm forget <id> # dismiss a workflow
|
|
wfm describe <id> # name/describe with LLM (optional)
|
|
wfm describe --all # batch LLM naming
|
|
wfm tui # interactive dashboard
|
|
```
|
|
|
|
## Command Abstraction Strategy
|
|
|
|
The abstractor converts concrete commands into mineable tokens:
|
|
|
|
```
|
|
Level 0 (raw): git commit -m "fix: handle null response"
|
|
Level 1 (words): ["git", "commit", "-m", "fix: handle null response"]
|
|
Level 2 (abstract):["git", "commit", "-m", "<arg>"]
|
|
Level 3 (verb): ["git", "commit"]
|
|
```
|
|
|
|
Mining uses Level 2 by default (preserves flags, abstracts values). Level 3 is a fallback for very noisy histories.
|
|
|
|
Special handling:
|
|
- Pipelines: split on `|` — abstract each segment independently
|
|
- Compound: split on `&&`, `||`, `;` — treat as sub-sequence
|
|
- Subshells/redirections: strip `>`, `>>`, `2>&1`, `<()` for pattern matching
|
|
- Common tools with known flag semantics: git, docker, kubectl, cargo, npm — maintain a small registry of "flags that take values" for better abstraction
|
|
|
|
## Scoring Formula
|
|
|
|
```
|
|
score = w_freq * log(support)
|
|
+ w_recency * decay(days_since_last_seen)
|
|
+ w_consistency * (1 - variance_in_ordering)
|
|
+ w_specificity * context_specificity_ratio
|
|
```
|
|
|
|
Default weights: `w_freq=0.4, w_recency=0.3, w_consistency=0.2, w_specificity=0.1`
|
|
|
|
## TUI Dashboard (ratatui)
|
|
|
|
```
|
|
┌─ Workflow Miner ─────────────────────────────────────┐
|
|
│ [Workflows] [Contexts] [Export] │
|
|
├──────────────────────────────────────────────────────┤
|
|
│ # │ Name │ Steps │ Freq │ Last Seen │
|
|
│ 1 │ deploy-staging │ 5 │ 23 │ 2h ago │
|
|
│ 2 │ test-and-commit │ 3 │ 87 │ 15m ago │
|
|
│ 3 │ docker-rebuild │ 4 │ 12 │ 1d ago │
|
|
│ 4 │ debug-api-logs │ 6 │ 8 │ 3d ago │
|
|
│ ►5 │ db-migration │ 4 │ 5 │ 1w ago │
|
|
├──────────────────────────────────────────────────────┤
|
|
│ db-migration (5 occurrences, avg 45s) │
|
|
│ │
|
|
│ 1. cargo sqlx prepare │
|
|
│ 2. cargo sqlx migrate run │
|
|
│ 3. cargo test --test db_tests │
|
|
│ 4. git add migrations/ │
|
|
│ │
|
|
│ Context: ~/projects/myapp (branch: main, feature/*) │
|
|
│ │
|
|
│ [e]xport [d]escribe [f]orget [Enter] instances │
|
|
└──────────────────────────────────────────────────────┘
|
|
```
|
|
|
|
## zsh Plugin
|
|
|
|
### Suggestion modes (configurable)
|
|
|
|
- **keybinding** (default): User presses `Ctrl+Space` to request a suggestion. Zero overhead.
|
|
- **ambient**: `precmd` hook calls `wfm suggest` after every command. Must stay under 50ms.
|
|
- **both**: Ambient display + keybinding to accept.
|
|
|
|
### Widget sketch
|
|
|
|
```zsh
|
|
_wfm_suggest() {
|
|
local suggestion
|
|
suggestion=$("${WFM_BIN:-wfm}" suggest \
|
|
--cwd "$PWD" \
|
|
--last-cmd "$_wfm_last_cmd" \
|
|
--session "$_wfm_session" \
|
|
--format oneline 2>/dev/null)
|
|
|
|
if [[ -n "$suggestion" ]]; then
|
|
_wfm_suggestion="$suggestion"
|
|
zle reset-prompt
|
|
fi
|
|
}
|
|
|
|
_wfm_accept() {
|
|
if [[ -n "$_wfm_suggestion" ]]; then
|
|
BUFFER="$_wfm_suggestion"
|
|
CURSOR=${#BUFFER}
|
|
_wfm_suggestion=""
|
|
zle reset-prompt
|
|
fi
|
|
}
|
|
|
|
zle -N _wfm_suggest
|
|
zle -N _wfm_accept
|
|
bindkey '^@' _wfm_accept # Ctrl+Space
|
|
```
|
|
|
|
## Configuration
|
|
|
|
`~/.config/wfm/config.toml`:
|
|
|
|
```toml
|
|
[source]
|
|
atuin_db = "~/.local/share/atuin/history.db" # auto-detected
|
|
|
|
[mining]
|
|
min_support = 2 # minimum occurrences to be a workflow
|
|
max_gap = 5 # max intervening commands between steps
|
|
min_length = 2 # minimum workflow steps
|
|
max_length = 20 # maximum workflow steps
|
|
scan_window = "90d" # how far back to look by default
|
|
|
|
[scoring]
|
|
freq_weight = 0.4
|
|
recency_weight = 0.3
|
|
consistency_weight = 0.2
|
|
specificity_weight = 0.1
|
|
|
|
[suggest]
|
|
enabled = true
|
|
mode = "keybinding" # keybinding | ambient | both
|
|
min_score = 0.5 # don't suggest low-confidence workflows
|
|
show_in = "rprompt" # rprompt | inline | notification
|
|
|
|
[llm]
|
|
enabled = false # opt-in
|
|
command = "llm" # or "ollama run llama3.2"
|
|
model = "" # provider-specific model name
|
|
|
|
[export]
|
|
justfile_path = "./Justfile.wfm"
|
|
```
|
|
|
|
## Dependencies
|
|
|
|
```toml
|
|
[workspace.dependencies]
|
|
rusqlite = { version = "0.38", features = ["bundled"] }
|
|
shell-words = "1"
|
|
ratatui = "0.30"
|
|
crossterm = "0.28"
|
|
chrono = { version = "0.4", features = ["serde"] }
|
|
clap = { version = "4", features = ["derive"] }
|
|
serde = { version = "1", features = ["derive"] }
|
|
serde_json = "1"
|
|
serde_yaml = "0.9"
|
|
anyhow = "1"
|
|
dirs = "6"
|
|
```
|
|
|
|
## Implementation Phases
|
|
|
|
### Phase 1: Foundation (MVP)
|
|
1. Cargo workspace setup
|
|
2. Atuin DB reader (rusqlite, read-only)
|
|
3. Command abstractor (shell-words + custom normalizer)
|
|
4. PrefixSpan implementation
|
|
5. Own SQLite storage for discovered workflows
|
|
6. `wfm scan` and `wfm list` commands
|
|
|
|
### Phase 2: Output
|
|
7. `wfm show <id>` with detailed view
|
|
8. `wfm export justfile` — generate Justfile recipes
|
|
9. `wfm export json` / `wfm export yaml`
|
|
10. Scoring and ranking
|
|
|
|
### Phase 3: Interactive
|
|
11. TUI dashboard (ratatui)
|
|
12. `wfm suggest` command
|
|
13. zsh plugin with precmd hook + suggestion widget
|
|
14. `wfm forget` for dismissing irrelevant patterns
|
|
|
|
### Phase 4: Enrichment
|
|
15. BIDE closed pattern filter (reduce noise)
|
|
16. Pattern clustering (edit distance dedup)
|
|
17. Project/git context inference
|
|
18. Optional LLM integration (`wfm describe`)
|