Skip to main content

perspt_policy/
sanitize.rs

1//! Command Sanitization
2//!
3//! Parses and validates shell commands to detect dangerous patterns.
4
5use anyhow::Result;
6use shell_words;
7
8fn has_windows_drive_prefix(part: &str) -> bool {
9    part.chars()
10        .nth(1)
11        .is_some_and(|character| character == ':')
12}
13
14fn looks_like_path_argument(part: &str) -> bool {
15    part.contains('/') || part.contains('\\') || has_windows_drive_prefix(part)
16}
17
18fn is_explicit_absolute_path(part: &str, candidate: &std::path::Path) -> bool {
19    candidate.is_absolute()
20        || part.starts_with('/')
21        || part.starts_with('\\')
22        || has_windows_drive_prefix(part)
23}
24
25/// Sanitization result
26#[derive(Debug, Clone)]
27pub struct SanitizeResult {
28    /// The parsed command parts
29    pub parts: Vec<String>,
30    /// Warnings about potentially dangerous patterns
31    pub warnings: Vec<String>,
32    /// Whether the command was rejected
33    pub rejected: bool,
34    /// Rejection reason if rejected
35    pub rejection_reason: Option<String>,
36}
37
38/// Sanitize a command string
39///
40/// Parses the command and checks for:
41/// - Subshell expansion (backticks, $())
42/// - Command chaining (&&, ||, ;)
43/// - Redirections to sensitive paths
44/// - Network access without acknowledgment
45pub fn sanitize_command(command: &str) -> Result<SanitizeResult> {
46    let mut result = SanitizeResult {
47        parts: Vec::new(),
48        warnings: Vec::new(),
49        rejected: false,
50        rejection_reason: None,
51    };
52
53    // Parse using shell-words
54    match shell_words::split(command) {
55        Ok(parts) => {
56            result.parts = parts;
57        }
58        Err(e) => {
59            result.rejected = true;
60            result.rejection_reason = Some(format!("Failed to parse command: {}", e));
61            return Ok(result);
62        }
63    }
64
65    // Check for backtick subshell expansion
66    if command.contains('`') {
67        result
68            .warnings
69            .push("Command contains backtick subshell expansion".to_string());
70    }
71
72    // Check for $() subshell expansion
73    if command.contains("$(") {
74        result
75            .warnings
76            .push("Command contains $() subshell expansion".to_string());
77    }
78
79    // Check for command chaining (if not in quotes)
80    let dangerous_chains = ["&&", "||", ";"];
81    for chain in &dangerous_chains {
82        // Simple check - a more robust implementation would respect quoting
83        if command.contains(chain) {
84            result
85                .warnings
86                .push(format!("Command contains chaining operator: {}", chain));
87        }
88    }
89
90    // Check for redirections to sensitive paths
91    let sensitive_paths = ["/etc/", "/root/", "~/.ssh/", "/dev/", "/proc/", "/sys/"];
92
93    for path in &sensitive_paths {
94        if command.contains(&format!("> {}", path))
95            || command.contains(&format!(">> {}", path))
96            || command.contains(&format!("< {}", path))
97        {
98            result.warnings.push(format!(
99                "Command redirects to/from sensitive path: {}",
100                path
101            ));
102        }
103    }
104
105    // Check for destructive patterns
106    let destructive_patterns = [
107        ("rm -rf /", "Recursive delete of root"),
108        ("rm -rf /*", "Recursive delete of root contents"),
109        ("rm -rf ~", "Recursive delete of home directory"),
110        (":(){:|:&};:", "Fork bomb"),
111        ("mkfs", "Filesystem creation"),
112        ("dd if=/dev/zero", "Disk overwrite"),
113        ("> /dev/sda", "Direct disk write"),
114    ];
115
116    for (pattern, description) in &destructive_patterns {
117        if command.contains(pattern) {
118            result.rejected = true;
119            result.rejection_reason = Some(format!(
120                "Dangerous pattern detected: {} ({})",
121                pattern, description
122            ));
123            return Ok(result);
124        }
125    }
126
127    Ok(result)
128}
129
130#[cfg(test)]
131/// Canonicalize a command for display
132///
133/// Normalizes the command to prevent visual obfuscation attacks
134pub(crate) fn canonicalize(command: &str) -> Result<String> {
135    // Parse and rejoin to normalize spacing
136    let parts = shell_words::split(command)?;
137    Ok(shell_words::join(&parts))
138}
139
140/// Validate that a command is workspace-bound.
141///
142/// Checks parsed command parts for absolute paths that escape the given
143/// workspace root.  Returns `Ok(())` when all path-like arguments resolve
144/// inside the workspace, or an error describing the violation.
145pub fn validate_workspace_bound(command: &str, workspace_root: &std::path::Path) -> Result<()> {
146    // On Windows, normalize backslash path separators to forward slashes
147    // before POSIX-style shell tokenization (`shell_words` treats `\` as
148    // an escape character, which mangles Windows paths).
149    let normalized;
150    let command_for_parse = if cfg!(windows) {
151        normalized = command.replace('\\', "/");
152        &normalized
153    } else {
154        command
155    };
156    let parts = shell_words::split(command_for_parse)?;
157
158    for part in &parts {
159        // Skip flags and non-path arguments
160        if part.starts_with('-') || !looks_like_path_argument(part) {
161            continue;
162        }
163
164        let candidate = std::path::Path::new(part);
165        if is_explicit_absolute_path(part, candidate) {
166            // Absolute path — must be inside workspace
167            if !candidate.starts_with(workspace_root) {
168                anyhow::bail!(
169                    "command references path outside workspace: {} (workspace: {})",
170                    part,
171                    workspace_root.display()
172                );
173            }
174        } else if part.contains("..") {
175            // First: logical check that catches traversal even when the
176            // target path doesn't exist on disk yet.
177            if perspt_core::path::normalize_artifact_path(part).is_err() {
178                anyhow::bail!(
179                    "command contains path that escapes workspace root: {} (workspace: {})",
180                    part,
181                    workspace_root.display()
182                );
183            }
184            // Second: filesystem-level check for paths that do exist.
185            let resolved = workspace_root.join(candidate);
186            if let Ok(canonical) = resolved.canonicalize() {
187                if !canonical.starts_with(workspace_root) {
188                    anyhow::bail!(
189                        "command escapes workspace via '..': {} resolves to {} (workspace: {})",
190                        part,
191                        canonical.display(),
192                        workspace_root.display()
193                    );
194                }
195            }
196        }
197    }
198
199    Ok(())
200}
201
202/// Validate that an artifact path is safe for a destructive operation
203/// (delete or move).
204///
205/// Beyond the standard path-traversal and absolute-path checks already
206/// performed by `ArtifactBundle::validate()`, this adds domain-level
207/// guards that prevent accidental loss of critical project files.
208pub fn validate_artifact_mutation(
209    path: &str,
210    workspace_root: &std::path::Path,
211    operation: &str,
212) -> Result<()> {
213    // 1. Canonical path check via perspt_core::path
214    perspt_core::path::normalize_artifact_path(path)
215        .map_err(|e| anyhow::anyhow!("{} rejected for {}: {}", operation, path, e))?;
216
217    // 2. Protect critical project root files
218    let protected: &[&str] = &[
219        "Cargo.toml",
220        "Cargo.lock",
221        "pyproject.toml",
222        "package.json",
223        "package-lock.json",
224        ".gitignore",
225        ".git",
226    ];
227
228    let normalized = path.replace('\\', "/");
229    let basename = normalized.rsplit('/').next().unwrap_or(&normalized);
230
231    // Only protect at root level (not nested e.g. crates/foo/Cargo.toml)
232    if !normalized.contains('/') && protected.contains(&basename) {
233        anyhow::bail!(
234            "{} rejected: '{}' is a protected project root file",
235            operation,
236            path
237        );
238    }
239
240    // 3. Reject entire top-level directories (e.g. "src", "crates")
241    let resolved = workspace_root.join(path);
242    if resolved.is_dir() && !normalized.contains('/') {
243        anyhow::bail!(
244            "{} rejected: '{}' is a top-level directory; specify individual files",
245            operation,
246            path
247        );
248    }
249
250    Ok(())
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn test_safe_command() {
259        let result = sanitize_command("cargo build --release").unwrap();
260        assert!(!result.rejected);
261        assert!(result.warnings.is_empty());
262    }
263
264    #[test]
265    fn test_dangerous_command_rejected() {
266        let result = sanitize_command("rm -rf /").unwrap();
267        assert!(result.rejected);
268    }
269
270    #[test]
271    fn test_subshell_warning() {
272        let result = sanitize_command("echo $(whoami)").unwrap();
273        assert!(!result.warnings.is_empty());
274    }
275
276    #[test]
277    fn test_chaining_warning() {
278        let result = sanitize_command("ls && rm file").unwrap();
279        assert!(!result.warnings.is_empty());
280    }
281
282    #[test]
283    fn test_canonicalize() {
284        let normalized = canonicalize("ls   -la    /tmp").unwrap();
285        assert_eq!(normalized, "ls -la /tmp");
286    }
287
288    #[test]
289    fn test_workspace_bound_relative_safe() {
290        let ws = std::path::PathBuf::from("/home/user/project");
291        assert!(validate_workspace_bound("cargo build", &ws).is_ok());
292    }
293
294    #[test]
295    fn test_workspace_bound_absolute_inside() {
296        let (ws, command) = if cfg!(windows) {
297            (
298                std::path::PathBuf::from(r"C:\Users\user\project"),
299                r"cat C:\Users\user\project\src\main.rs",
300            )
301        } else {
302            (
303                std::path::PathBuf::from("/home/user/project"),
304                "cat /home/user/project/src/main.rs",
305            )
306        };
307
308        assert!(validate_workspace_bound(command, &ws).is_ok());
309    }
310
311    #[test]
312    fn test_workspace_bound_absolute_outside_rejected() {
313        let (ws, command) = if cfg!(windows) {
314            (
315                std::path::PathBuf::from(r"C:\Users\user\project"),
316                r"cat C:\Windows\System32\drivers\etc\hosts",
317            )
318        } else {
319            (
320                std::path::PathBuf::from("/home/user/project"),
321                "cat /etc/passwd",
322            )
323        };
324
325        let result = validate_workspace_bound(command, &ws);
326        assert!(result.is_err());
327        assert!(result
328            .unwrap_err()
329            .to_string()
330            .contains("outside workspace"));
331    }
332
333    #[test]
334    fn test_workspace_bound_flags_ignored() {
335        let ws = std::path::PathBuf::from("/home/user/project");
336        assert!(validate_workspace_bound("cargo build --release", &ws).is_ok());
337    }
338
339    #[test]
340    fn test_artifact_mutation_normal_file_allowed() {
341        let ws = std::env::temp_dir();
342        assert!(validate_artifact_mutation("src/main.rs", &ws, "Delete").is_ok());
343    }
344
345    #[test]
346    fn test_artifact_mutation_nested_cargo_toml_allowed() {
347        let ws = std::env::temp_dir();
348        assert!(validate_artifact_mutation("crates/foo/Cargo.toml", &ws, "Delete").is_ok());
349    }
350
351    #[test]
352    fn test_artifact_mutation_root_cargo_toml_rejected() {
353        let ws = std::env::temp_dir();
354        let result = validate_artifact_mutation("Cargo.toml", &ws, "Delete");
355        assert!(result.is_err());
356        assert!(result.unwrap_err().to_string().contains("protected"));
357    }
358
359    #[test]
360    fn test_artifact_mutation_gitignore_rejected() {
361        let ws = std::env::temp_dir();
362        let result = validate_artifact_mutation(".gitignore", &ws, "Delete");
363        assert!(result.is_err());
364    }
365
366    #[test]
367    fn test_artifact_mutation_traversal_rejected() {
368        let ws = std::env::temp_dir();
369        let result = validate_artifact_mutation("../etc/passwd", &ws, "Move");
370        assert!(result.is_err());
371    }
372}