owlen/crates/owlen-core/tests/phase9_remoting.rs

//! Integration tests for Phase 9: Remoting / Cloud Hybrid Deployment
//!
//! Tests WebSocket transport, failover mechanisms, and health checking.

use owlen_core::mcp::failover::{FailoverConfig, FailoverMcpClient, ServerEntry, ServerHealth};
use owlen_core::mcp::{McpClient, McpToolCall, McpToolDescriptor};
use owlen_core::{Error, Result};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Duration;

/// Mock MCP client for testing failover behavior
struct MockMcpClient {
    name: String,
    fail_count: AtomicUsize,
    max_failures: usize,
}

impl MockMcpClient {
    fn new(name: &str, max_failures: usize) -> Self {
        Self {
            name: name.to_string(),
            fail_count: AtomicUsize::new(0),
            max_failures,
        }
    }

    fn always_healthy(name: &str) -> Self {
        Self::new(name, 0)
    }

    fn fail_n_times(name: &str, n: usize) -> Self {
        Self::new(name, n)
    }
}

#[async_trait::async_trait]
impl McpClient for MockMcpClient {
    async fn list_tools(&self) -> Result<Vec<McpToolDescriptor>> {
        let current = self.fail_count.fetch_add(1, Ordering::SeqCst);
        if current < self.max_failures {
            Err(Error::Network(format!(
                "Mock failure {} from '{}'",
                current + 1,
                self.name
            )))
        } else {
            Ok(vec![McpToolDescriptor {
                name: format!("test_tool_{}", self.name),
                description: format!("Tool from {}", self.name),
                input_schema: serde_json::json!({}),
                requires_network: false,
                requires_filesystem: vec![],
            }])
        }
    }

    async fn call_tool(&self, call: McpToolCall) -> Result<owlen_core::mcp::McpToolResponse> {
        let current = self.fail_count.load(Ordering::SeqCst);
        if current < self.max_failures {
            Err(Error::Network(format!("Mock failure from '{}'", self.name)))
        } else {
            Ok(owlen_core::mcp::McpToolResponse {
                name: call.name,
                success: true,
                output: serde_json::json!({ "server": self.name }),
                metadata: std::collections::HashMap::new(),
                duration_ms: 0,
            })
        }
    }
}

#[tokio::test]
async fn test_failover_basic_priority() {
    // Create two healthy servers with different priorities
    let primary = Arc::new(MockMcpClient::always_healthy("primary"));
    let backup = Arc::new(MockMcpClient::always_healthy("backup"));

    let servers = vec![
        ServerEntry::new("primary".to_string(), primary as Arc<dyn McpClient>, 1),
        ServerEntry::new("backup".to_string(), backup as Arc<dyn McpClient>, 2),
    ];

    let client = FailoverMcpClient::with_servers(servers);

    // Should use primary (lower priority number)
    let tools = client.list_tools().await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].name, "test_tool_primary");
}

#[tokio::test]
async fn test_failover_with_retry() {
    // Primary fails 2 times, then succeeds
    let primary = Arc::new(MockMcpClient::fail_n_times("primary", 2));
    let backup = Arc::new(MockMcpClient::always_healthy("backup"));

    let servers = vec![
        ServerEntry::new("primary".to_string(), primary as Arc<dyn McpClient>, 1),
        ServerEntry::new("backup".to_string(), backup as Arc<dyn McpClient>, 2),
    ];

    let config = FailoverConfig {
        max_retries: 3,
        base_retry_delay: Duration::from_millis(10),
        health_check_interval: Duration::from_secs(30),
        health_check_timeout: Duration::from_secs(5),
        circuit_breaker_threshold: 5,
    };

    let client = FailoverMcpClient::new(servers, config);

    // Should eventually succeed after retries
    let tools = client.list_tools().await.unwrap();
    assert_eq!(tools.len(), 1);
    // After 2 failures and 1 success, should get the tool
    assert!(tools[0].name.contains("test_tool"));
}

#[tokio::test]
async fn test_failover_to_backup() {
    // Primary always fails, backup always succeeds
    let primary = Arc::new(MockMcpClient::fail_n_times("primary", 999));
    let backup = Arc::new(MockMcpClient::always_healthy("backup"));

    let servers = vec![
        ServerEntry::new("primary".to_string(), primary as Arc<dyn McpClient>, 1),
        ServerEntry::new("backup".to_string(), backup as Arc<dyn McpClient>, 2),
    ];

    let config = FailoverConfig {
        max_retries: 5,
        base_retry_delay: Duration::from_millis(5),
        health_check_interval: Duration::from_secs(30),
        health_check_timeout: Duration::from_secs(5),
        circuit_breaker_threshold: 3,
    };

    let client = FailoverMcpClient::new(servers, config);

    // Should failover to backup after exhausting retries on primary
    let tools = client.list_tools().await.unwrap();
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].name, "test_tool_backup");
}

#[tokio::test]
async fn test_server_health_tracking() {
    let client = Arc::new(MockMcpClient::always_healthy("test"));
    let entry = ServerEntry::new("test".to_string(), client, 1);

    // Initial state should be healthy
    assert!(entry.is_available().await);
    assert_eq!(entry.get_health().await, ServerHealth::Healthy);

    // Mark as degraded
    entry.mark_degraded().await;
    assert!(!entry.is_available().await);
    match entry.get_health().await {
        ServerHealth::Degraded { .. } => {}
        _ => panic!("Expected Degraded state"),
    }

    // Mark as down
    entry.mark_down().await;
    assert!(!entry.is_available().await);
    match entry.get_health().await {
        ServerHealth::Down { .. } => {}
        _ => panic!("Expected Down state"),
    }

    // Recover to healthy
    entry.mark_healthy().await;
    assert!(entry.is_available().await);
    assert_eq!(entry.get_health().await, ServerHealth::Healthy);
}

#[tokio::test]
async fn test_health_check_all() {
    let healthy = Arc::new(MockMcpClient::always_healthy("healthy"));
    let unhealthy = Arc::new(MockMcpClient::fail_n_times("unhealthy", 999));

    let servers = vec![
        ServerEntry::new("healthy".to_string(), healthy as Arc<dyn McpClient>, 1),
        ServerEntry::new("unhealthy".to_string(), unhealthy as Arc<dyn McpClient>, 2),
    ];

    let client = FailoverMcpClient::with_servers(servers);

    // Run health check
    client.health_check_all().await;

    // Give spawned tasks time to complete
    tokio::time::sleep(Duration::from_millis(100)).await;

    // Check server status
    let status = client.get_server_status().await;
    assert_eq!(status.len(), 2);

    // Healthy server should be healthy
    let healthy_status = status.iter().find(|(name, _)| name == "healthy").unwrap();
    assert_eq!(healthy_status.1, ServerHealth::Healthy);

    // Unhealthy server should be down
    let unhealthy_status = status.iter().find(|(name, _)| name == "unhealthy").unwrap();
    match unhealthy_status.1 {
        ServerHealth::Down { .. } => {}
        _ => panic!("Expected unhealthy server to be Down"),
    }
}

#[tokio::test]
async fn test_call_tool_failover() {
    // Primary fails, backup succeeds
    let primary = Arc::new(MockMcpClient::fail_n_times("primary", 999));
    let backup = Arc::new(MockMcpClient::always_healthy("backup"));

    let servers = vec![
        ServerEntry::new("primary".to_string(), primary as Arc<dyn McpClient>, 1),
        ServerEntry::new("backup".to_string(), backup as Arc<dyn McpClient>, 2),
    ];

    let config = FailoverConfig {
        max_retries: 5,
        base_retry_delay: Duration::from_millis(5),
        ..Default::default()
    };

    let client = FailoverMcpClient::new(servers, config);

    // Call a tool - should failover to backup
    let call = McpToolCall {
        name: "test_tool".to_string(),
        arguments: serde_json::json!({}),
    };

    let response = client.call_tool(call).await.unwrap();
    assert!(response.success);
    assert_eq!(response.output["server"], "backup");
}

#[tokio::test]
async fn test_exponential_backoff() {
    // Test that retry delays increase exponentially
    let client = Arc::new(MockMcpClient::fail_n_times("test", 2));
    let entry = ServerEntry::new("test".to_string(), client, 1);

    let config = FailoverConfig {
        max_retries: 3,
        base_retry_delay: Duration::from_millis(10),
        ..Default::default()
    };

    let failover = FailoverMcpClient::new(vec![entry], config);

    let start = std::time::Instant::now();
    let _ = failover.list_tools().await;
    let elapsed = start.elapsed();

    // With base delay of 10ms and 2 retries:
    // Attempt 1: immediate
    // Attempt 2: 10ms delay (2^0 * 10)
    // Attempt 3: 20ms delay (2^1 * 10)
    // Total should be at least 30ms
    assert!(
        elapsed >= Duration::from_millis(30),
        "Expected at least 30ms, got {:?}",
        elapsed
    );
}

#[tokio::test]
async fn test_no_servers_configured() {
    let config = FailoverConfig::default();
    let client = FailoverMcpClient::new(vec![], config);

    let result = client.list_tools().await;
    assert!(result.is_err());
    match result {
        Err(Error::Network(msg)) => assert!(msg.contains("No servers configured")),
        _ => panic!("Expected Network error"),
    }
}

#[tokio::test]
async fn test_all_servers_fail() {
    // Both servers always fail
    let primary = Arc::new(MockMcpClient::fail_n_times("primary", 999));
    let backup = Arc::new(MockMcpClient::fail_n_times("backup", 999));

    let servers = vec![
        ServerEntry::new("primary".to_string(), primary as Arc<dyn McpClient>, 1),
        ServerEntry::new("backup".to_string(), backup as Arc<dyn McpClient>, 2),
    ];

    let config = FailoverConfig {
        max_retries: 2,
        base_retry_delay: Duration::from_millis(5),
        ..Default::default()
    };

    let client = FailoverMcpClient::new(servers, config);

    let result = client.list_tools().await;
    assert!(result.is_err());
    match result {
        Err(Error::Network(_)) => {} // Expected
        _ => panic!("Expected Network error"),
    }
}