Skip to main content

perspt_core/
path.rs

1//! Canonical path resolution for artifact paths.
2//!
3//! Provides a single normalization function that all path consumers share:
4//! bundle validation, ownership manifest lookups, sandbox copy, policy checks,
5//! and commit reconciliation.  This ensures that `src/main.rs`, `./src/main.rs`,
6//! `src/../src/main.rs`, and `src/./main.rs` all resolve to the same identity.
7//!
8//! Paths are always workspace-relative.  Absolute paths and traversals that
9//! escape the workspace root are rejected.
10
11use std::path::{Component, PathBuf};
12
13/// Errors returned by path normalization.
14#[derive(Debug, Clone, PartialEq, Eq)]
15pub enum PathError {
16    /// The path is empty after normalization.
17    Empty,
18    /// The path is absolute (starts with `/` or a drive letter).
19    Absolute(String),
20    /// The path escapes the workspace root via `..` traversal.
21    Escapes(String),
22    /// The path contains a null byte or other invalid component.
23    Invalid(String),
24}
25
26impl std::fmt::Display for PathError {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            PathError::Empty => write!(f, "path is empty"),
30            PathError::Absolute(p) => write!(f, "path is absolute: '{}'", p),
31            PathError::Escapes(p) => write!(f, "path escapes workspace root: '{}'", p),
32            PathError::Invalid(p) => write!(f, "path contains invalid components: '{}'", p),
33        }
34    }
35}
36
37impl std::error::Error for PathError {}
38
39/// Normalize a workspace-relative artifact path to its canonical form.
40///
41/// Resolves `.` and `..` components, strips redundant separators, and
42/// converts backslashes to forward slashes.  The result is a clean
43/// relative path suitable for use as a map key and file identity.
44///
45/// # Errors
46///
47/// Returns `PathError` if the path is empty, absolute, or escapes the
48/// workspace root (net `..` depth goes below zero).
49///
50/// # Examples
51///
52/// ```
53/// use perspt_core::path::normalize_artifact_path;
54///
55/// assert_eq!(normalize_artifact_path("src/main.rs").unwrap(), "src/main.rs");
56/// assert_eq!(normalize_artifact_path("./src/main.rs").unwrap(), "src/main.rs");
57/// assert_eq!(normalize_artifact_path("src/../src/main.rs").unwrap(), "src/main.rs");
58/// assert_eq!(normalize_artifact_path("src/./main.rs").unwrap(), "src/main.rs");
59/// assert!(normalize_artifact_path("../escape.rs").is_err());
60/// assert!(normalize_artifact_path("/absolute/path").is_err());
61/// ```
62pub fn normalize_artifact_path(raw: &str) -> Result<String, PathError> {
63    if raw.is_empty() {
64        return Err(PathError::Empty);
65    }
66
67    // Null bytes are never valid in paths
68    if raw.contains('\0') {
69        return Err(PathError::Invalid(raw.to_string()));
70    }
71
72    // PSP-7: Strip surrounding backticks, quotes, and markdown formatting
73    // that LLMs often wrap around file paths.
74    let stripped = raw
75        .trim()
76        .trim_matches('`')
77        .trim_matches('"')
78        .trim_matches('\'')
79        .trim_start_matches("**")
80        .trim_end_matches("**")
81        .trim();
82
83    if stripped.is_empty() {
84        return Err(PathError::Empty);
85    }
86
87    // Normalize backslashes before parsing
88    let normalized = stripped.replace('\\', "/");
89    let p = std::path::Path::new(&normalized);
90
91    // Reject absolute paths early
92    if p.is_absolute() || normalized.starts_with('/') {
93        return Err(PathError::Absolute(raw.to_string()));
94    }
95
96    // Windows drive prefix check
97    let bytes = normalized.as_bytes();
98    if bytes.len() >= 2 && bytes[1] == b':' && bytes[0].is_ascii_alphabetic() {
99        return Err(PathError::Absolute(raw.to_string()));
100    }
101
102    // Resolve components, tracking depth to detect escapes
103    let mut components: Vec<String> = Vec::new();
104    let mut depth: i32 = 0;
105
106    for component in p.components() {
107        match component {
108            Component::Normal(s) => {
109                let s = s.to_string_lossy().to_string();
110                components.push(s);
111                depth += 1;
112            }
113            Component::ParentDir => {
114                if depth <= 0 {
115                    return Err(PathError::Escapes(raw.to_string()));
116                }
117                components.pop();
118                depth -= 1;
119            }
120            Component::CurDir => {
121                // Skip `.` components
122            }
123            Component::RootDir | Component::Prefix(_) => {
124                return Err(PathError::Absolute(raw.to_string()));
125            }
126        }
127    }
128
129    let result: PathBuf = components.iter().collect();
130    let result_str = result.to_string_lossy().to_string();
131
132    // Normalize to forward slashes in the output
133    let result_str = result_str.replace('\\', "/");
134
135    if result_str.is_empty() {
136        return Err(PathError::Empty);
137    }
138
139    Ok(result_str)
140}
141
142/// Normalize a path for use as a map key (ownership manifest, bundle dedup).
143///
144/// Thin wrapper around `normalize_artifact_path` that returns `None` on
145/// error instead of `Err`.  Callers that want diagnostics should use
146/// `normalize_artifact_path` directly.
147pub fn normalize_path_key(raw: &str) -> Option<String> {
148    normalize_artifact_path(raw).ok()
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn test_simple_relative_path() {
157        assert_eq!(
158            normalize_artifact_path("src/main.rs").unwrap(),
159            "src/main.rs"
160        );
161    }
162
163    #[test]
164    fn test_dot_prefix_stripped() {
165        assert_eq!(
166            normalize_artifact_path("./src/main.rs").unwrap(),
167            "src/main.rs"
168        );
169    }
170
171    #[test]
172    fn test_redundant_parent_resolved() {
173        assert_eq!(
174            normalize_artifact_path("src/../src/main.rs").unwrap(),
175            "src/main.rs"
176        );
177    }
178
179    #[test]
180    fn test_dot_in_middle_stripped() {
181        assert_eq!(
182            normalize_artifact_path("src/./main.rs").unwrap(),
183            "src/main.rs"
184        );
185    }
186
187    #[test]
188    fn test_multiple_slashes_normalized() {
189        assert_eq!(
190            normalize_artifact_path("src///main.rs").unwrap(),
191            "src/main.rs"
192        );
193    }
194
195    #[test]
196    fn test_backslash_normalized() {
197        assert_eq!(
198            normalize_artifact_path("src\\lib\\mod.rs").unwrap(),
199            "src/lib/mod.rs"
200        );
201    }
202
203    #[test]
204    fn test_trailing_slash_preserved_as_dir() {
205        // A trailing slash results in the directory name
206        let r = normalize_artifact_path("src/lib/").unwrap();
207        assert_eq!(r, "src/lib");
208    }
209
210    #[test]
211    fn test_empty_path_rejected() {
212        assert_eq!(normalize_artifact_path(""), Err(PathError::Empty));
213    }
214
215    #[test]
216    fn test_absolute_unix_rejected() {
217        assert!(matches!(
218            normalize_artifact_path("/etc/passwd"),
219            Err(PathError::Absolute(_))
220        ));
221    }
222
223    #[test]
224    fn test_absolute_windows_rejected() {
225        assert!(matches!(
226            normalize_artifact_path("C:\\Windows\\file.txt"),
227            Err(PathError::Absolute(_))
228        ));
229    }
230
231    #[test]
232    fn test_escape_via_dotdot_rejected() {
233        assert!(matches!(
234            normalize_artifact_path("../escape.rs"),
235            Err(PathError::Escapes(_))
236        ));
237    }
238
239    #[test]
240    fn test_deep_escape_rejected() {
241        assert!(matches!(
242            normalize_artifact_path("a/b/../../../../escape"),
243            Err(PathError::Escapes(_))
244        ));
245    }
246
247    #[test]
248    fn test_dotdot_that_stays_inside() {
249        assert_eq!(
250            normalize_artifact_path("a/b/../c/file.rs").unwrap(),
251            "a/c/file.rs"
252        );
253    }
254
255    #[test]
256    fn test_null_byte_rejected() {
257        assert!(matches!(
258            normalize_artifact_path("src/\0bad.rs"),
259            Err(PathError::Invalid(_))
260        ));
261    }
262
263    #[test]
264    fn test_just_dot_is_empty() {
265        assert_eq!(normalize_artifact_path("."), Err(PathError::Empty));
266    }
267
268    #[test]
269    fn test_normalize_path_key_returns_none_on_error() {
270        assert!(normalize_path_key("").is_none());
271        assert!(normalize_path_key("/abs").is_none());
272        assert!(normalize_path_key("../escape").is_none());
273    }
274
275    #[test]
276    fn test_normalize_path_key_returns_some_on_success() {
277        assert_eq!(
278            normalize_path_key("./src/main.rs"),
279            Some("src/main.rs".into())
280        );
281    }
282
283    // PSP-7 regression tests
284
285    #[test]
286    fn test_backtick_wrapped_path() {
287        assert_eq!(
288            normalize_artifact_path("`src/main.rs`").unwrap(),
289            "src/main.rs"
290        );
291    }
292
293    #[test]
294    fn test_double_quoted_path() {
295        assert_eq!(
296            normalize_artifact_path("\"src/main.rs\"").unwrap(),
297            "src/main.rs"
298        );
299    }
300
301    #[test]
302    fn test_single_quoted_path() {
303        assert_eq!(
304            normalize_artifact_path("'src/main.rs'").unwrap(),
305            "src/main.rs"
306        );
307    }
308
309    #[test]
310    fn test_bold_markdown_path() {
311        assert_eq!(
312            normalize_artifact_path("**src/main.rs**").unwrap(),
313            "src/main.rs"
314        );
315    }
316
317    #[test]
318    fn test_backtick_with_dot_prefix() {
319        assert_eq!(
320            normalize_artifact_path("`./src/lib.rs`").unwrap(),
321            "src/lib.rs"
322        );
323    }
324
325    #[test]
326    fn test_only_backticks_is_empty() {
327        assert_eq!(normalize_artifact_path("``"), Err(PathError::Empty));
328    }
329}