Skip to main content

perspt_agent/
test_runner.rs

1//! Verification Runners
2//!
3//! Provides test, syntax-check, build, and lint execution for language plugins.
4//!
5//! - `PythonTestRunner`: pytest-specific runner with detailed output parsing.
6//! - `RustTestRunner`: cargo-based runner with test output parsing.
7//! - `PluginVerifierRunner` (PSP-5 Phase 4): generic runner driven entirely by
8//!   a plugin's `VerifierProfile`. It executes whatever commands the profile
9//!   declares, including fallback commands, without hardcoding language details.
10//!
11//! The `TestRunnerTrait` is the unified async interface consumed by the orchestrator.
12
13use anyhow::{Context, Result};
14use std::path::{Path, PathBuf};
15use std::process::Stdio;
16use tokio::process::Command;
17
18use crate::types::{BehavioralContract, Criticality};
19use perspt_core::plugin::{VerifierProfile, VerifierStage};
20
21/// Result of a test run
22#[derive(Debug, Clone, Default)]
23pub struct TestResults {
24    /// Number of passed tests
25    pub passed: usize,
26    /// Number of failed tests
27    pub failed: usize,
28    /// Number of skipped tests
29    pub skipped: usize,
30    /// Total tests run
31    pub total: usize,
32    /// Detailed failure information
33    pub failures: Vec<TestFailure>,
34    /// Duration in milliseconds
35    pub duration_ms: u64,
36    /// Raw output
37    pub output: String,
38    /// Whether the test run was successful (no infrastructure errors)
39    pub run_succeeded: bool,
40}
41
42impl TestResults {
43    /// Check if all tests passed
44    pub fn all_passed(&self) -> bool {
45        self.run_succeeded && self.failed == 0
46    }
47
48    /// Get pass rate as percentage
49    pub fn pass_rate(&self) -> f32 {
50        if self.total == 0 {
51            1.0
52        } else {
53            (self.passed as f32) / (self.total as f32)
54        }
55    }
56}
57
58/// Information about a single test failure
59#[derive(Debug, Clone)]
60pub struct TestFailure {
61    /// Test name (e.g., "test_divide_by_zero")
62    pub name: String,
63    /// Test file path
64    pub file: Option<String>,
65    /// Line number where failure occurred
66    pub line: Option<u32>,
67    /// Error message
68    pub message: String,
69    /// Criticality (from weighted tests if matched)
70    pub criticality: Criticality,
71}
72
73fn force_failure_on_nonzero_exit(
74    results: &mut TestResults,
75    command_name: &str,
76    exit_code: Option<i32>,
77    output: &str,
78) {
79    if results.failed == 0 {
80        results.failed = 1;
81    }
82    if results.total == 0 {
83        results.total = results.passed + results.failed + results.skipped;
84    }
85    if results.failures.is_empty() {
86        results.failures.push(TestFailure {
87            name: command_name.to_string(),
88            file: None,
89            line: None,
90            message: format!(
91                "{} exited with code {:?} without a parseable success summary. Output:\n{}",
92                command_name, exit_code, output
93            ),
94            criticality: Criticality::High,
95        });
96    }
97}
98
99/// Python test runner using uv and pytest
100///
101/// Handles:
102/// 1. Checking for pyproject.toml
103/// 2. Setting up Python environment via uv
104/// 3. Running pytest
105/// 4. Parsing results for V_log calculation
106pub struct PythonTestRunner {
107    /// Working directory (workspace root)
108    working_dir: PathBuf,
109    /// Timeout in seconds
110    timeout_secs: u64,
111    /// Whether to auto-setup if no pyproject.toml
112    auto_setup: bool,
113}
114
115impl PythonTestRunner {
116    /// Create a new Python test runner
117    pub fn new(working_dir: PathBuf) -> Self {
118        Self {
119            working_dir,
120            timeout_secs: 300, // 5 minute default timeout
121            auto_setup: true,
122        }
123    }
124
125    /// Set timeout
126    pub fn with_timeout(mut self, secs: u64) -> Self {
127        self.timeout_secs = secs;
128        self
129    }
130
131    /// Disable auto-setup (don't create pyproject.toml if missing)
132    pub fn without_auto_setup(mut self) -> Self {
133        self.auto_setup = false;
134        self
135    }
136
137    /// Check if workspace has a Python project setup
138    pub fn has_pyproject(&self) -> bool {
139        self.working_dir.join("pyproject.toml").exists()
140    }
141
142    /// Check if workspace has pytest configured
143    pub async fn has_pytest(&self) -> bool {
144        // Check if pytest is in pyproject.toml or can be run
145        let result = Command::new("uv")
146            .args(["run", "pytest", "--version"])
147            .current_dir(&self.working_dir)
148            .env_remove("VIRTUAL_ENV")
149            .stdout(Stdio::null())
150            .stderr(Stdio::null())
151            .status()
152            .await;
153
154        result.map(|s| s.success()).unwrap_or(false)
155    }
156
157    /// Initialize the Python environment with uv
158    /// NOTE: This assumes pyproject.toml already exists (created by orchestrator's step_init_project)
159    pub async fn setup_environment(&self) -> Result<()> {
160        log::info!("Setting up Python environment with uv");
161
162        // Check if pyproject.toml exists; if not, warn and try to proceed
163        if !self.has_pyproject() {
164            if self.auto_setup {
165                log::warn!(
166                    "No pyproject.toml found. Project should be initialized via 'uv init' first."
167                );
168                log::info!("Attempting to run 'uv init --lib' as fallback...");
169                let init_output = Command::new("uv")
170                    .args(["init", "--lib"])
171                    .current_dir(&self.working_dir)
172                    .env_remove("VIRTUAL_ENV")
173                    .stdout(Stdio::piped())
174                    .stderr(Stdio::piped())
175                    .output()
176                    .await
177                    .context("Failed to run uv init")?;
178
179                if !init_output.status.success() {
180                    let stderr = String::from_utf8_lossy(&init_output.stderr);
181                    log::warn!("uv init failed: {}", stderr);
182                    return self.install_pytest_directly().await;
183                }
184            } else {
185                anyhow::bail!(
186                    "No pyproject.toml found and auto_setup is disabled. Run 'uv init' first."
187                );
188            }
189        }
190
191        // Sync dependencies (this creates venv and installs deps)
192        let output = Command::new("uv")
193            .args(["sync", "--dev"])
194            .current_dir(&self.working_dir)
195            .env_remove("VIRTUAL_ENV")
196            .stdout(Stdio::piped())
197            .stderr(Stdio::piped())
198            .output()
199            .await
200            .context("Failed to run uv sync")?;
201
202        if !output.status.success() {
203            let stderr = String::from_utf8_lossy(&output.stderr);
204            log::warn!("uv sync failed: {}", stderr);
205            // Try just installing pytest directly
206            return self.install_pytest_directly().await;
207        }
208
209        // Ensure pytest is available as a dev dependency.
210        // `uv sync --dev` only installs what's already in pyproject.toml;
211        // for freshly-generated projects pytest may not be declared yet.
212        if !self.has_pytest().await {
213            log::info!("pytest not available after sync — adding as dev dependency");
214            let add_output = Command::new("uv")
215                .args(["add", "--dev", "pytest"])
216                .current_dir(&self.working_dir)
217                .env_remove("VIRTUAL_ENV")
218                .stdout(Stdio::piped())
219                .stderr(Stdio::piped())
220                .output()
221                .await;
222            match add_output {
223                Ok(o) if o.status.success() => {
224                    log::info!("Added pytest as dev dependency");
225                }
226                Ok(o) => {
227                    let stderr = String::from_utf8_lossy(&o.stderr);
228                    log::warn!("uv add --dev pytest failed: {}", stderr);
229                    // Last resort: install directly
230                    return self.install_pytest_directly().await;
231                }
232                Err(e) => {
233                    log::warn!("Failed to run uv add --dev pytest: {}", e);
234                    return self.install_pytest_directly().await;
235                }
236            }
237        }
238
239        log::info!("Python environment ready");
240        Ok(())
241    }
242
243    /// Install pytest directly without a full project setup
244    async fn install_pytest_directly(&self) -> Result<()> {
245        log::info!("Installing pytest via uv pip");
246
247        let output = Command::new("uv")
248            .args(["pip", "install", "pytest"])
249            .current_dir(&self.working_dir)
250            .env_remove("VIRTUAL_ENV")
251            .stdout(Stdio::piped())
252            .stderr(Stdio::piped())
253            .output()
254            .await
255            .context("Failed to install pytest")?;
256
257        if !output.status.success() {
258            let stderr = String::from_utf8_lossy(&output.stderr);
259            anyhow::bail!("Failed to install pytest: {}", stderr);
260        }
261
262        Ok(())
263    }
264
265    /// Run pytest and parse results
266    ///
267    /// If environment is not set up, will attempt to set it up first.
268    pub async fn run_pytest(&self, test_args: &[&str]) -> Result<TestResults> {
269        log::info!("Running pytest in {}", self.working_dir.display());
270
271        // Ensure environment is set up
272        if !self.has_pytest().await {
273            self.setup_environment().await?;
274        }
275
276        // Build pytest command
277        let mut args = vec!["run", "pytest", "-v", "--tb=short"];
278        args.extend(test_args);
279
280        let start = std::time::Instant::now();
281
282        let output = Command::new("uv")
283            .args(&args)
284            .current_dir(&self.working_dir)
285            .env_remove("VIRTUAL_ENV")
286            .stdout(Stdio::piped())
287            .stderr(Stdio::piped())
288            .output()
289            .await
290            .context("Failed to run pytest")?;
291
292        let duration_ms = start.elapsed().as_millis() as u64;
293        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
294        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
295        let combined = format!("{}\n{}", stdout, stderr);
296
297        log::debug!("pytest exit code: {:?}", output.status.code());
298        if !stdout.is_empty() {
299            log::debug!("pytest stdout:\n{}", stdout);
300        }
301
302        let mut results = self.parse_pytest_output(&combined, duration_ms);
303        results.run_succeeded = true; // We got output, run worked
304
305        // pytest exit code 5 means "no tests were collected". This is NOT
306        // a failure — it simply means the sandbox has no test files. Treat
307        // it as a vacuous pass so implementation nodes without tests are
308        // not penalized.
309        let exit_code = output.status.code();
310        if exit_code == Some(5) {
311            log::info!("pytest exit code 5 — no tests collected (vacuous pass)");
312            results.passed = 0;
313            results.failed = 0;
314            results.total = 0;
315            results.failures.clear();
316        } else if !output.status.success() {
317            force_failure_on_nonzero_exit(&mut results, "pytest", exit_code, &combined);
318        }
319
320        // Log summary
321        if results.all_passed() {
322            log::info!("✅ Tests passed: {}/{}", results.passed, results.total);
323        } else {
324            log::info!(
325                "❌ Tests failed: {} passed, {} failed",
326                results.passed,
327                results.failed
328            );
329        }
330
331        Ok(results)
332    }
333
334    /// Run pytest on specific test files
335    pub async fn run_test_files(&self, test_files: &[&Path]) -> Result<TestResults> {
336        let file_args: Vec<&str> = test_files.iter().filter_map(|p| p.to_str()).collect();
337
338        self.run_pytest(&file_args).await
339    }
340
341    /// Parse pytest output into TestResults
342    fn parse_pytest_output(&self, output: &str, duration_ms: u64) -> TestResults {
343        let mut results = TestResults {
344            duration_ms,
345            output: output.to_string(),
346            ..Default::default()
347        };
348
349        // Parse summary line: "X passed, Y failed, Z skipped in 0.12s"
350        for line in output.lines() {
351            let line = line.trim();
352
353            // Look for summary patterns (usually starts with = signs)
354            if (line.contains("passed") || line.contains("failed") || line.contains("error"))
355                && (line.contains(" in ") || line.starts_with('='))
356            {
357                let parts: Vec<&str> = line.split_whitespace().collect();
358                for i in 0..parts.len() {
359                    if parts[i] == "passed" || parts[i] == "passed," {
360                        if i > 0 {
361                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
362                                results.passed = n;
363                            }
364                        }
365                    } else if parts[i] == "failed" || parts[i] == "failed," {
366                        if i > 0 {
367                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
368                                results.failed = n;
369                            }
370                        }
371                    } else if parts[i] == "skipped" || parts[i] == "skipped," {
372                        if i > 0 {
373                            if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
374                                results.skipped = n;
375                            }
376                        }
377                    } else if (parts[i] == "error" || parts[i] == "errors") && i > 0 {
378                        if let Ok(n) = parts[i - 1].trim_matches(',').parse::<usize>() {
379                            results.failed += n;
380                        }
381                    }
382                }
383            }
384
385            // Parse individual test failures
386            // "FAILED test_file.py::TestClass::test_method - AssertionError"
387            if line.starts_with("FAILED ") {
388                let failure = self.parse_failure_line(line);
389                results.failures.push(failure);
390            }
391        }
392
393        results.total = results.passed + results.failed + results.skipped;
394        results
395    }
396
397    /// Parse a pytest FAILED line
398    fn parse_failure_line(&self, line: &str) -> TestFailure {
399        // Format: "FAILED test_file.py::TestClass::test_method - Error message"
400        let rest = line.strip_prefix("FAILED ").unwrap_or(line);
401
402        let (test_path, message) = if let Some(idx) = rest.find(" - ") {
403            (&rest[..idx], rest[idx + 3..].to_string())
404        } else {
405            (rest, String::new())
406        };
407
408        // Parse test path (file::class::method or file::method)
409        let parts: Vec<&str> = test_path.split("::").collect();
410        let (file, name) = if parts.len() >= 2 {
411            (
412                Some(parts[0].to_string()),
413                parts.last().unwrap_or(&"").to_string(),
414            )
415        } else {
416            (None, test_path.to_string())
417        };
418
419        TestFailure {
420            name,
421            file,
422            line: None,
423            message,
424            criticality: Criticality::High, // Default, will be updated by match_weighted_tests
425        }
426    }
427
428    /// Calculate V_log (Logic Energy) from test results and behavioral contract
429    /// Uses weighted tests from the contract to determine criticality
430    pub fn calculate_v_log(&self, results: &TestResults, contract: &BehavioralContract) -> f32 {
431        let gamma = contract.gamma(); // Default 2.0
432        let mut v_log = 0.0;
433
434        for failure in &results.failures {
435            // Find matching weighted test from contract
436            let weight = contract
437                .weighted_tests
438                .iter()
439                .find(|wt| {
440                    failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
441                })
442                .map(|wt| wt.criticality.weight())
443                .unwrap_or(Criticality::High.weight()); // Default to High if no match
444
445            v_log += gamma * weight;
446        }
447
448        v_log
449    }
450
451    /// Match test failures with weighted tests from contract to set criticality
452    pub fn match_weighted_tests(&self, results: &mut TestResults, contract: &BehavioralContract) {
453        for failure in &mut results.failures {
454            if let Some(wt) = contract.weighted_tests.iter().find(|wt| {
455                failure.name.contains(&wt.test_name) || wt.test_name.contains(&failure.name)
456            }) {
457                failure.criticality = wt.criticality;
458            }
459        }
460    }
461}
462
463// =============================================================================
464// PSP-5: Generic Test Runner Trait
465// =============================================================================
466
467/// PSP-5: Language-agnostic test runner trait
468///
469/// Allows the orchestrator to run verification steps through any language's
470/// toolchain without hardcoding Python paths.
471#[async_trait::async_trait]
472pub trait TestRunnerTrait: Send + Sync {
473    /// Run syntax/type check (e.g., `cargo check`, `uv run ty check .`)
474    async fn run_syntax_check(&self) -> Result<TestResults>;
475
476    /// Run the test suite (e.g., `cargo test`, `uv run pytest`)
477    async fn run_tests(&self) -> Result<TestResults>;
478
479    /// Run build check (e.g., `cargo build`)
480    async fn run_build_check(&self) -> Result<TestResults>;
481
482    /// Run lint check (e.g., `cargo clippy`, `uv run ruff check .`)
483    ///
484    /// Default: returns a no-op pass for plugins without a lint stage.
485    async fn run_lint(&self) -> Result<TestResults> {
486        Ok(TestResults {
487            passed: 1,
488            total: 1,
489            run_succeeded: true,
490            output: "No lint stage configured".to_string(),
491            ..Default::default()
492        })
493    }
494
495    /// Run a specific verifier stage by enum variant.
496    ///
497    /// Dispatches to the appropriate method. Convenience for generic callers.
498    async fn run_stage(&self, stage: VerifierStage) -> Result<TestResults> {
499        match stage {
500            VerifierStage::SyntaxCheck => self.run_syntax_check().await,
501            VerifierStage::Build => self.run_build_check().await,
502            VerifierStage::Test => self.run_tests().await,
503            VerifierStage::Lint => self.run_lint().await,
504        }
505    }
506
507    /// Name of the runner (for logging)
508    fn name(&self) -> &str;
509}
510
511#[async_trait::async_trait]
512impl TestRunnerTrait for PythonTestRunner {
513    async fn run_syntax_check(&self) -> Result<TestResults> {
514        // Use ty via uvx (standalone tool runner) — ty is not a project
515        // dependency, so `uv run ty` would fail with "Failed to spawn".
516        let output = Command::new("uvx")
517            .args(["ty", "check", "."])
518            .current_dir(&self.working_dir)
519            .env_remove("VIRTUAL_ENV")
520            .stdout(Stdio::piped())
521            .stderr(Stdio::piped())
522            .output()
523            .await
524            .context("Failed to run ty check")?;
525
526        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
527        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
528
529        Ok(TestResults {
530            passed: if output.status.success() { 1 } else { 0 },
531            failed: if output.status.success() { 0 } else { 1 },
532            total: 1,
533            run_succeeded: true,
534            output: format!("{}\n{}", stdout, stderr),
535            ..Default::default()
536        })
537    }
538
539    async fn run_tests(&self) -> Result<TestResults> {
540        self.run_pytest(&[]).await
541    }
542
543    async fn run_build_check(&self) -> Result<TestResults> {
544        // Python doesn't have a separate build step
545        Ok(TestResults {
546            passed: 1,
547            total: 1,
548            run_succeeded: true,
549            output: "No build step for Python".to_string(),
550            ..Default::default()
551        })
552    }
553
554    async fn run_lint(&self) -> Result<TestResults> {
555        let output = Command::new("uv")
556            .args(["run", "ruff", "check", "."])
557            .current_dir(&self.working_dir)
558            .env_remove("VIRTUAL_ENV")
559            .stdout(Stdio::piped())
560            .stderr(Stdio::piped())
561            .output()
562            .await
563            .context("Failed to run ruff check")?;
564
565        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
566        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
567
568        Ok(TestResults {
569            passed: if output.status.success() { 1 } else { 0 },
570            failed: if output.status.success() { 0 } else { 1 },
571            total: 1,
572            run_succeeded: true,
573            output: format!("{}\n{}", stdout, stderr),
574            ..Default::default()
575        })
576    }
577
578    fn name(&self) -> &str {
579        "python"
580    }
581}
582
583/// PSP-5: Rust test runner using cargo
584pub struct RustTestRunner {
585    /// Working directory (workspace root)
586    working_dir: PathBuf,
587}
588
589impl RustTestRunner {
590    /// Create a new Rust test runner
591    pub fn new(working_dir: PathBuf) -> Self {
592        Self { working_dir }
593    }
594
595    /// Parse `cargo test` output for pass/fail counts
596    fn parse_cargo_test_output(&self, output: &str) -> TestResults {
597        let mut results = TestResults {
598            output: output.to_string(),
599            run_succeeded: true,
600            ..Default::default()
601        };
602
603        for line in output.lines() {
604            let line = line.trim();
605
606            // Parse "test result: ok. X passed; Y failed; Z ignored"
607            if line.starts_with("test result:") {
608                let parts: Vec<&str> = line.split_whitespace().collect();
609                for i in 0..parts.len() {
610                    if (parts[i] == "passed;" || parts[i] == "passed") && i > 0 {
611                        if let Ok(n) = parts[i - 1].parse::<usize>() {
612                            results.passed = n;
613                        }
614                    } else if (parts[i] == "failed;" || parts[i] == "failed") && i > 0 {
615                        if let Ok(n) = parts[i - 1].parse::<usize>() {
616                            results.failed = n;
617                        }
618                    } else if (parts[i] == "ignored;" || parts[i] == "ignored") && i > 0 {
619                        if let Ok(n) = parts[i - 1].parse::<usize>() {
620                            results.skipped = n;
621                        }
622                    }
623                }
624            }
625        }
626
627        results.total = results.passed + results.failed + results.skipped;
628        results
629    }
630}
631
632#[async_trait::async_trait]
633impl TestRunnerTrait for RustTestRunner {
634    async fn run_syntax_check(&self) -> Result<TestResults> {
635        let output = Command::new("cargo")
636            .args(["check"])
637            .current_dir(&self.working_dir)
638            .stdout(Stdio::piped())
639            .stderr(Stdio::piped())
640            .output()
641            .await
642            .context("Failed to run cargo check")?;
643
644        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
645
646        Ok(TestResults {
647            passed: if output.status.success() { 1 } else { 0 },
648            failed: if output.status.success() { 0 } else { 1 },
649            total: 1,
650            run_succeeded: true,
651            output: stderr,
652            ..Default::default()
653        })
654    }
655
656    async fn run_tests(&self) -> Result<TestResults> {
657        let output = Command::new("cargo")
658            .args(["test"])
659            .current_dir(&self.working_dir)
660            .stdout(Stdio::piped())
661            .stderr(Stdio::piped())
662            .output()
663            .await
664            .context("Failed to run cargo test")?;
665
666        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
667        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
668        let combined = format!("{}\n{}", stdout, stderr);
669
670        let mut results = self.parse_cargo_test_output(&combined);
671        results.run_succeeded = true;
672        if !output.status.success() {
673            force_failure_on_nonzero_exit(
674                &mut results,
675                "cargo test",
676                output.status.code(),
677                &combined,
678            );
679        }
680        Ok(results)
681    }
682
683    async fn run_build_check(&self) -> Result<TestResults> {
684        let output = Command::new("cargo")
685            .args(["build"])
686            .current_dir(&self.working_dir)
687            .stdout(Stdio::piped())
688            .stderr(Stdio::piped())
689            .output()
690            .await
691            .context("Failed to run cargo build")?;
692
693        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
694
695        Ok(TestResults {
696            passed: if output.status.success() { 1 } else { 0 },
697            failed: if output.status.success() { 0 } else { 1 },
698            total: 1,
699            run_succeeded: true,
700            output: stderr,
701            ..Default::default()
702        })
703    }
704
705    async fn run_lint(&self) -> Result<TestResults> {
706        let output = Command::new("cargo")
707            .args(["clippy", "--", "-D", "warnings"])
708            .current_dir(&self.working_dir)
709            .stdout(Stdio::piped())
710            .stderr(Stdio::piped())
711            .output()
712            .await
713            .context("Failed to run cargo clippy")?;
714
715        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
716
717        Ok(TestResults {
718            passed: if output.status.success() { 1 } else { 0 },
719            failed: if output.status.success() { 0 } else { 1 },
720            total: 1,
721            run_succeeded: true,
722            output: stderr,
723            ..Default::default()
724        })
725    }
726
727    fn name(&self) -> &str {
728        "rust"
729    }
730}
731
732// =============================================================================
733// PSP-5 Phase 4: Plugin-Driven Verifier Runner
734// =============================================================================
735
736/// Generic verifier runner driven by a plugin's `VerifierProfile`.
737///
738/// Instead of hardcoding language-specific commands, this runner reads the
739/// profile's `VerifierCapability` entries and executes the best available
740/// command (primary → fallback → skip) for each stage.
741///
742/// For languages with detailed output parsers (e.g., pytest, cargo test),
743/// prefer the language-specific runners. `PluginVerifierRunner` is the
744/// fallback for plugins that don't have a dedicated runner or when the
745/// orchestrator wants uniform dispatch across all detected plugins.
746pub struct PluginVerifierRunner {
747    /// Working directory for command execution.
748    working_dir: PathBuf,
749    /// Snapshot of the plugin's verifier capabilities.
750    profile: VerifierProfile,
751}
752
753impl PluginVerifierRunner {
754    /// Create a new runner from a plugin's verifier profile.
755    pub fn new(working_dir: PathBuf, profile: VerifierProfile) -> Self {
756        Self {
757            working_dir,
758            profile,
759        }
760    }
761
762    /// Execute a shell command string, returning a `TestResults`.
763    ///
764    /// The command is split on whitespace for arg parsing. This is
765    /// intentionally simple; complex pipelines should use `sh -c`.
766    ///
767    /// PSP-5 Phase 4: Commands pass through policy sanitization and
768    /// workspace-bound validation before execution.
769    async fn exec_command(&self, command: &str, stage: VerifierStage) -> Result<TestResults> {
770        // Sanitize command through policy
771        let sr = perspt_policy::sanitize_command(command)?;
772        if sr.rejected {
773            anyhow::bail!(
774                "{} command rejected by policy: {}",
775                stage,
776                sr.rejection_reason.unwrap_or_default()
777            );
778        }
779        for warning in &sr.warnings {
780            log::warn!(
781                "[{}] policy warning for {} stage: {}",
782                self.profile.plugin_name,
783                stage,
784                warning
785            );
786        }
787
788        // Validate workspace bounds
789        perspt_policy::validate_workspace_bound(command, &self.working_dir)?;
790
791        let parts: Vec<&str> = command.split_whitespace().collect();
792        if parts.is_empty() {
793            anyhow::bail!("empty command for stage {}", stage);
794        }
795
796        let program = parts[0];
797        let args = &parts[1..];
798
799        log::info!(
800            "[{}] running {} stage: {}",
801            self.profile.plugin_name,
802            stage,
803            command
804        );
805
806        let output = Command::new(program)
807            .args(args)
808            .current_dir(&self.working_dir)
809            .env_remove("VIRTUAL_ENV")
810            .stdout(Stdio::piped())
811            .stderr(Stdio::piped())
812            .output()
813            .await
814            .with_context(|| format!("Failed to run {} for {} stage", command, stage))?;
815
816        let stdout = String::from_utf8_lossy(&output.stdout).to_string();
817        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
818
819        Ok(TestResults {
820            passed: if output.status.success() { 1 } else { 0 },
821            failed: if output.status.success() { 0 } else { 1 },
822            total: 1,
823            run_succeeded: true,
824            output: format!("{}\n{}", stdout, stderr),
825            ..Default::default()
826        })
827    }
828
829    /// Run a verifier stage using the profile's best available command.
830    ///
831    /// Returns a no-op pass if the stage is not declared or has no available tool.
832    async fn run_profile_stage(&self, stage: VerifierStage) -> Result<TestResults> {
833        let cap = match self.profile.get(stage) {
834            Some(c) => c,
835            None => {
836                return Ok(TestResults {
837                    passed: 1,
838                    total: 1,
839                    run_succeeded: true,
840                    output: format!(
841                        "No {} stage declared for {}",
842                        stage, self.profile.plugin_name
843                    ),
844                    ..Default::default()
845                });
846            }
847        };
848
849        match cap.effective_command() {
850            Some(cmd) => self.exec_command(cmd, stage).await,
851            None => {
852                log::warn!(
853                    "[{}] {} stage declared but no tool available (degraded)",
854                    self.profile.plugin_name,
855                    stage
856                );
857                Ok(TestResults {
858                    passed: 0,
859                    failed: 0,
860                    total: 0,
861                    run_succeeded: false,
862                    output: format!(
863                        "{} stage skipped: no tool available for {}",
864                        stage, self.profile.plugin_name
865                    ),
866                    ..Default::default()
867                })
868            }
869        }
870    }
871
872    /// Run all available stages in order, returning results keyed by stage.
873    pub async fn run_all_stages(&self) -> Vec<(VerifierStage, Result<TestResults>)> {
874        let stages = [
875            VerifierStage::SyntaxCheck,
876            VerifierStage::Build,
877            VerifierStage::Test,
878            VerifierStage::Lint,
879        ];
880        let mut results = Vec::new();
881        for stage in stages {
882            if self.profile.get(stage).is_some() {
883                results.push((stage, self.run_profile_stage(stage).await));
884            }
885        }
886        results
887    }
888
889    /// Get the underlying profile.
890    pub fn profile(&self) -> &VerifierProfile {
891        &self.profile
892    }
893}
894
895#[async_trait::async_trait]
896impl TestRunnerTrait for PluginVerifierRunner {
897    async fn run_syntax_check(&self) -> Result<TestResults> {
898        self.run_profile_stage(VerifierStage::SyntaxCheck).await
899    }
900
901    async fn run_tests(&self) -> Result<TestResults> {
902        self.run_profile_stage(VerifierStage::Test).await
903    }
904
905    async fn run_build_check(&self) -> Result<TestResults> {
906        self.run_profile_stage(VerifierStage::Build).await
907    }
908
909    async fn run_lint(&self) -> Result<TestResults> {
910        self.run_profile_stage(VerifierStage::Lint).await
911    }
912
913    fn name(&self) -> &str {
914        &self.profile.plugin_name
915    }
916}
917
918/// PSP-5: Factory function to create a test runner for a given plugin
919pub fn test_runner_for_plugin(plugin_name: &str, working_dir: PathBuf) -> Box<dyn TestRunnerTrait> {
920    match plugin_name {
921        "rust" => Box::new(RustTestRunner::new(working_dir)),
922        "python" => Box::new(PythonTestRunner::new(working_dir)),
923        _ => Box::new(PythonTestRunner::new(working_dir)), // Default fallback
924    }
925}
926
927/// PSP-5 Phase 4: Create a runner from a verifier profile.
928///
929/// For Rust and Python, this returns the specialised runner (which has
930/// detailed output parsing). For anything else it returns a generic
931/// `PluginVerifierRunner` that executes whatever commands the profile declares.
932pub fn test_runner_for_profile(
933    profile: VerifierProfile,
934    working_dir: PathBuf,
935) -> Box<dyn TestRunnerTrait> {
936    match profile.plugin_name.as_str() {
937        "rust" => Box::new(RustTestRunner::new(working_dir)),
938        "python" => Box::new(PythonTestRunner::new(working_dir)),
939        _ => Box::new(PluginVerifierRunner::new(working_dir, profile)),
940    }
941}
942
943// Re-export PythonTestRunner as TestRunner for backward compatibility
944pub type TestRunner = PythonTestRunner;
945
946#[cfg(test)]
947mod tests {
948    use super::*;
949    use crate::types::WeightedTest;
950    use perspt_core::plugin::{
951        LanguagePlugin, LspCapability, LspConfig, VerifierCapability, VerifierProfile,
952    };
953
954    #[test]
955    fn test_parse_pytest_summary() {
956        let runner = PythonTestRunner::new(PathBuf::from("."));
957
958        let output = "===== 3 passed, 2 failed, 1 skipped in 0.12s =====";
959        let results = runner.parse_pytest_output(output, 120);
960
961        assert_eq!(results.passed, 3);
962        assert_eq!(results.failed, 2);
963        assert_eq!(results.skipped, 1);
964        assert_eq!(results.total, 6);
965    }
966
967    #[test]
968    fn test_parse_pytest_failure_line() {
969        let runner = PythonTestRunner::new(PathBuf::from("."));
970
971        let line = "FAILED test_calculator.py::TestDivide::test_divide_by_zero - ZeroDivisionError";
972        let failure = runner.parse_failure_line(line);
973
974        assert_eq!(failure.name, "test_divide_by_zero");
975        assert_eq!(failure.file, Some("test_calculator.py".to_string()));
976        assert!(failure.message.contains("ZeroDivisionError"));
977    }
978
979    #[test]
980    fn test_force_failure_on_nonzero_exit_marks_failure() {
981        let mut results = TestResults::default();
982
983        force_failure_on_nonzero_exit(&mut results, "pytest", Some(2), "collection error");
984
985        assert_eq!(results.failed, 1);
986        assert_eq!(results.total, 1);
987        assert_eq!(results.failures.len(), 1);
988        assert!(results.failures[0].message.contains("collection error"));
989    }
990
991    #[test]
992    fn test_calculate_v_log() {
993        let runner = PythonTestRunner::new(PathBuf::from("."));
994
995        let results = TestResults {
996            failures: vec![TestFailure {
997                name: "test_critical_feature".to_string(),
998                file: None,
999                line: None,
1000                message: String::new(),
1001                criticality: Criticality::Critical,
1002            }],
1003            ..Default::default()
1004        };
1005
1006        let mut contract = BehavioralContract::new();
1007        contract.weighted_tests = vec![WeightedTest {
1008            test_name: "test_critical_feature".to_string(),
1009            criticality: Criticality::Critical,
1010        }];
1011
1012        let v_log = runner.calculate_v_log(&results, &contract);
1013        // gamma (2.0) * Critical weight (10.0) = 20.0
1014        assert!((v_log - 20.0).abs() < 0.01);
1015    }
1016
1017    #[test]
1018    fn test_parse_cargo_test_output() {
1019        let runner = RustTestRunner::new(PathBuf::from("."));
1020
1021        let output = r#"
1022running 5 tests
1023test tests::test_add ... ok
1024test tests::test_sub ... ok
1025test tests::test_mul ... FAILED
1026test tests::test_div ... ok
1027test tests::test_rem ... ignored
1028
1029test result: ok. 3 passed; 1 failed; 1 ignored; 0 measured; 0 filtered out
1030"#;
1031        let results = runner.parse_cargo_test_output(output);
1032        assert_eq!(results.passed, 3);
1033        assert_eq!(results.failed, 1);
1034        assert_eq!(results.skipped, 1);
1035        assert_eq!(results.total, 5);
1036    }
1037
1038    #[test]
1039    fn test_runner_for_plugin_factory() {
1040        let rust_runner = test_runner_for_plugin("rust", PathBuf::from("."));
1041        assert_eq!(rust_runner.name(), "rust");
1042
1043        let python_runner = test_runner_for_plugin("python", PathBuf::from("."));
1044        assert_eq!(python_runner.name(), "python");
1045
1046        // Unknown falls back to Python
1047        let fallback = test_runner_for_plugin("go", PathBuf::from("."));
1048        assert_eq!(fallback.name(), "python");
1049    }
1050
1051    // =========================================================================
1052    // PluginVerifierRunner tests
1053    // =========================================================================
1054
1055    fn make_test_profile(name: &str, caps: Vec<VerifierCapability>) -> VerifierProfile {
1056        VerifierProfile {
1057            plugin_name: name.to_string(),
1058            capabilities: caps,
1059            lsp: LspCapability {
1060                primary: LspConfig {
1061                    server_binary: "test-ls".to_string(),
1062                    args: vec![],
1063                    language_id: name.to_string(),
1064                },
1065                primary_available: false,
1066                fallback: None,
1067                fallback_available: false,
1068            },
1069        }
1070    }
1071
1072    #[test]
1073    fn test_plugin_verifier_runner_name() {
1074        let profile = make_test_profile("go", vec![]);
1075        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1076        assert_eq!(runner.name(), "go");
1077    }
1078
1079    #[tokio::test]
1080    async fn test_plugin_verifier_runner_no_stage_declared() {
1081        // When no capability is declared for a stage, run_stage returns a no-op pass
1082        let profile = make_test_profile("go", vec![]);
1083        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1084        let result = runner.run_syntax_check().await.unwrap();
1085        assert_eq!(result.passed, 1);
1086        assert_eq!(result.total, 1);
1087        assert!(result.output.contains("No syntax_check stage"));
1088    }
1089
1090    #[tokio::test]
1091    async fn test_plugin_verifier_runner_no_tool_available() {
1092        // Stage is declared but neither primary nor fallback tool is available
1093        let profile = make_test_profile(
1094            "go",
1095            vec![VerifierCapability {
1096                stage: VerifierStage::Build,
1097                command: Some("go build ./...".to_string()),
1098                available: false,
1099                fallback_command: None,
1100                fallback_available: false,
1101            }],
1102        );
1103        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1104        let result = runner.run_build_check().await.unwrap();
1105        assert!(!result.run_succeeded);
1106        assert!(result.output.contains("no tool available"));
1107    }
1108
1109    #[tokio::test]
1110    async fn test_plugin_verifier_runner_echo_command() {
1111        // Use `echo` as a trivially-available command to test real execution
1112        let profile = make_test_profile(
1113            "echo-lang",
1114            vec![VerifierCapability {
1115                stage: VerifierStage::SyntaxCheck,
1116                command: Some("echo syntax-ok".to_string()),
1117                available: true,
1118                fallback_command: None,
1119                fallback_available: false,
1120            }],
1121        );
1122        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1123        let result = runner.run_syntax_check().await.unwrap();
1124        assert_eq!(result.passed, 1);
1125        assert!(result.run_succeeded);
1126        assert!(result.output.contains("syntax-ok"));
1127    }
1128
1129    #[tokio::test]
1130    async fn test_plugin_verifier_runner_run_all_stages() {
1131        let profile = make_test_profile(
1132            "echo-lang",
1133            vec![
1134                VerifierCapability {
1135                    stage: VerifierStage::SyntaxCheck,
1136                    command: Some("echo check".to_string()),
1137                    available: true,
1138                    fallback_command: None,
1139                    fallback_available: false,
1140                },
1141                VerifierCapability {
1142                    stage: VerifierStage::Lint,
1143                    command: Some("echo lint".to_string()),
1144                    available: true,
1145                    fallback_command: None,
1146                    fallback_available: false,
1147                },
1148            ],
1149        );
1150        let runner = PluginVerifierRunner::new(PathBuf::from("."), profile);
1151        let results = runner.run_all_stages().await;
1152        // Only the 2 declared stages should appear
1153        assert_eq!(results.len(), 2);
1154        assert_eq!(results[0].0, VerifierStage::SyntaxCheck);
1155        assert_eq!(results[1].0, VerifierStage::Lint);
1156        assert!(results[0].1.is_ok());
1157        assert!(results[1].1.is_ok());
1158    }
1159
1160    #[test]
1161    fn test_runner_for_profile_factory() {
1162        use perspt_core::plugin::RustPlugin;
1163        // Known plugins get specialised runners
1164        let rust_profile = RustPlugin.verifier_profile();
1165        let runner = test_runner_for_profile(rust_profile, PathBuf::from("."));
1166        assert_eq!(runner.name(), "rust");
1167
1168        // Unknown plugins get PluginVerifierRunner
1169        let custom = make_test_profile("go", vec![]);
1170        let runner = test_runner_for_profile(custom, PathBuf::from("."));
1171        assert_eq!(runner.name(), "go");
1172    }
1173
1174    #[tokio::test]
1175    async fn test_exec_command_rejects_dangerous_pattern() {
1176        let profile = make_test_profile(
1177            "danger",
1178            vec![VerifierCapability {
1179                stage: VerifierStage::SyntaxCheck,
1180                command: Some("rm -rf /".to_string()),
1181                available: true,
1182                fallback_command: None,
1183                fallback_available: false,
1184            }],
1185        );
1186        let runner = PluginVerifierRunner::new(PathBuf::from("/tmp"), profile);
1187        let result = runner.run_syntax_check().await;
1188        // The command should be rejected by policy sanitisation
1189        assert!(result.is_err());
1190    }
1191
1192    #[tokio::test]
1193    async fn test_exec_command_rejects_workspace_escape() {
1194        let profile = make_test_profile(
1195            "escape",
1196            vec![VerifierCapability {
1197                stage: VerifierStage::SyntaxCheck,
1198                command: Some("cat /etc/passwd".to_string()),
1199                available: true,
1200                fallback_command: None,
1201                fallback_available: false,
1202            }],
1203        );
1204        let runner = PluginVerifierRunner::new(PathBuf::from("/home/user/project"), profile);
1205        let result = runner.run_syntax_check().await;
1206        // The command references a path outside the workspace
1207        assert!(result.is_err());
1208    }
1209
1210    #[test]
1211    fn test_fallback_command_selected_when_primary_unavailable() {
1212        let cap = VerifierCapability {
1213            stage: VerifierStage::Test,
1214            command: Some("uv run pytest".to_string()),
1215            available: false,
1216            fallback_command: Some("python -m pytest".to_string()),
1217            fallback_available: true,
1218        };
1219        assert_eq!(cap.effective_command(), Some("python -m pytest"));
1220    }
1221}