1use anyhow::Result;
7use grep::regex::RegexMatcher;
8use grep::searcher::sinks::UTF8;
9use grep::searcher::Searcher;
10use ignore::WalkBuilder;
11use std::path::{Path, PathBuf};
12
13#[derive(Debug, Clone)]
15pub struct SearchHit {
16 pub file: PathBuf,
18 pub line: u32,
20 pub content: String,
22 pub column: Option<usize>,
24}
25
26pub struct ContextRetriever {
28 working_dir: PathBuf,
30 max_file_bytes: usize,
32 max_context_bytes: usize,
34}
35
36impl ContextRetriever {
37 pub fn new(working_dir: PathBuf) -> Self {
39 Self {
40 working_dir,
41 max_file_bytes: 50 * 1024, max_context_bytes: 100 * 1024, }
44 }
45
46 pub fn with_max_file_bytes(mut self, bytes: usize) -> Self {
48 self.max_file_bytes = bytes;
49 self
50 }
51
52 pub fn with_max_context_bytes(mut self, bytes: usize) -> Self {
54 self.max_context_bytes = bytes;
55 self
56 }
57
58 pub fn search(&self, pattern: &str, max_results: usize) -> Vec<SearchHit> {
61 let mut hits = Vec::new();
62
63 let matcher = match RegexMatcher::new(pattern) {
65 Ok(m) => m,
66 Err(e) => {
67 log::warn!("Invalid search pattern '{}': {}", pattern, e);
68 return hits;
69 }
70 };
71
72 let walker = WalkBuilder::new(&self.working_dir)
74 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .build();
79
80 let mut searcher = Searcher::new();
81
82 for entry in walker.flatten() {
83 if hits.len() >= max_results {
84 break;
85 }
86
87 let path = entry.path();
88
89 if !path.is_file() {
91 continue;
92 }
93
94 if Self::is_binary_extension(path) {
96 continue;
97 }
98
99 let _ = searcher.search_path(
101 &matcher,
102 path,
103 UTF8(|line_num, line| {
104 if hits.len() < max_results {
105 let relative_path = path
106 .strip_prefix(&self.working_dir)
107 .unwrap_or(path)
108 .to_path_buf();
109
110 hits.push(SearchHit {
111 file: relative_path,
112 line: line_num as u32,
113 content: line.trim_end().to_string(),
114 column: None,
115 });
116 }
117 Ok(hits.len() < max_results)
118 }),
119 );
120 }
121
122 hits
123 }
124
125 pub fn read_file_truncated(&self, path: &Path) -> Result<String> {
127 let full_path = if path.is_absolute() {
128 path.to_path_buf()
129 } else {
130 self.working_dir.join(path)
131 };
132
133 let content = std::fs::read_to_string(&full_path)?;
134
135 if content.len() > self.max_file_bytes {
136 let truncated = &content[..self.max_file_bytes];
137 let last_newline = truncated.rfind('\n').unwrap_or(self.max_file_bytes);
139 Ok(format!(
140 "{}\n\n... [truncated, {} more bytes]",
141 &content[..last_newline],
142 content.len() - last_newline
143 ))
144 } else {
145 Ok(content)
146 }
147 }
148
149 pub fn get_task_context(&self, context_files: &[PathBuf], output_files: &[PathBuf]) -> String {
152 let mut context = String::new();
153 let mut remaining_budget = self.max_context_bytes;
154
155 if !context_files.is_empty() {
157 context.push_str("## Context Files (for reference)\n\n");
158 for file in context_files {
159 if remaining_budget == 0 {
160 break;
161 }
162 if let Ok(content) = self.read_file_truncated(file) {
163 let section = format!("### {}\n```\n{}\n```\n\n", file.display(), content);
164 if section.len() <= remaining_budget {
165 remaining_budget -= section.len();
166 context.push_str(§ion);
167 }
168 }
169 }
170 }
171
172 if !output_files.is_empty() {
174 context.push_str("## Target Files (to modify)\n\n");
175 for file in output_files {
176 if remaining_budget == 0 {
177 break;
178 }
179 let full_path = self.working_dir.join(file);
180 if full_path.exists() {
181 if let Ok(content) = self.read_file_truncated(file) {
182 let section = format!(
183 "### {} (current content)\n```\n{}\n```\n\n",
184 file.display(),
185 content
186 );
187 if section.len() <= remaining_budget {
188 remaining_budget -= section.len();
189 context.push_str(§ion);
190 }
191 }
192 } else {
193 context.push_str(&format!("### {} (new file)\n\n", file.display()));
194 }
195 }
196 }
197
198 context
199 }
200
201 pub fn search_for_context(&self, query: &str, max_results: usize) -> String {
204 let hits = self.search(query, max_results);
205
206 if hits.is_empty() {
207 return String::new();
208 }
209
210 let mut context = format!("## Related Code (search: '{}')\n\n", query);
211
212 for hit in &hits {
213 context.push_str(&format!(
214 "- **{}:{}**: `{}`\n",
215 hit.file.display(),
216 hit.line,
217 hit.content.trim()
218 ));
219 }
220 context.push('\n');
221
222 context
223 }
224
225 pub fn build_restriction_map(
235 &self,
236 node: &perspt_core::types::SRBNNode,
237 manifest: &perspt_core::types::OwnershipManifest,
238 ) -> perspt_core::types::RestrictionMap {
239 let mut map = perspt_core::types::RestrictionMap::for_node(node.node_id.clone());
240
241 let owned = manifest.files_owned_by(&node.node_id);
243 map.owned_files = owned.iter().map(|s| s.to_string()).collect();
244
245 for target in &node.output_targets {
247 let path_str = target.to_string_lossy().to_string();
248 if !map.owned_files.contains(&path_str) {
249 map.owned_files.push(path_str);
250 }
251 }
252
253 for ctx_file in &node.context_files {
255 map.sealed_interfaces
256 .push(ctx_file.to_string_lossy().to_string());
257 }
258
259 map.budget = perspt_core::types::ContextBudget {
261 byte_limit: self.max_context_bytes,
262 file_count_limit: 20,
263 };
264
265 map
266 }
267
268 pub fn assemble_context_package(
273 &self,
274 node: &perspt_core::types::SRBNNode,
275 restriction_map: &perspt_core::types::RestrictionMap,
276 ) -> perspt_core::types::ContextPackage {
277 let mut package = perspt_core::types::ContextPackage::new(node.node_id.clone());
278 package.restriction_map = restriction_map.clone();
279
280 for file_path in &restriction_map.owned_files {
282 let full_path = self.working_dir.join(file_path);
283 if full_path.exists() {
284 if let Ok(content) = self.read_file_truncated(&full_path) {
285 if !package.add_file(file_path, content) {
286 log::warn!(
287 "Budget exceeded adding owned file '{}' for node '{}'",
288 file_path,
289 node.node_id
290 );
291 break;
292 }
293 }
294 }
295 }
296
297 for iface_path in &restriction_map.sealed_interfaces {
299 let full_path = self.working_dir.join(iface_path);
300 if full_path.exists() {
301 if let Ok(content) = self.read_file_truncated(&full_path) {
303 if !package.add_file(iface_path, content) {
304 if let Ok(raw) = std::fs::read(&full_path) {
306 let digest = perspt_core::types::StructuralDigest::from_content(
307 &node.node_id,
308 iface_path,
309 perspt_core::types::ArtifactKind::InterfaceSeal,
310 &raw,
311 );
312 package.add_structural_digest(digest);
313 }
314 }
315 }
316 }
317 }
318
319 for digest in &restriction_map.structural_digests {
321 package.add_structural_digest(digest.clone());
322 }
323
324 for summary in &restriction_map.summary_digests {
326 package.add_summary_digest(summary.clone());
327 }
328
329 package
330 }
331
332 pub fn compute_structural_digest(
334 &self,
335 path: &str,
336 artifact_kind: perspt_core::types::ArtifactKind,
337 source_node_id: &str,
338 ) -> Result<perspt_core::types::StructuralDigest> {
339 let full_path = self.working_dir.join(path);
340 let content = std::fs::read(&full_path)?;
341 Ok(perspt_core::types::StructuralDigest::from_content(
342 source_node_id,
343 path,
344 artifact_kind,
345 &content,
346 ))
347 }
348
349 pub fn format_context_package(&self, package: &perspt_core::types::ContextPackage) -> String {
351 let mut context = String::new();
352
353 if !package.included_files.is_empty() {
355 context.push_str("## Context Files\n\n");
356 for (path, content) in &package.included_files {
357 context.push_str(&format!("### {}\n```\n{}\n```\n\n", path, content));
358 }
359 }
360
361 if !package.structural_digests.is_empty() {
363 context.push_str("## Structural Dependencies (digests)\n\n");
364 for digest in &package.structural_digests {
365 context.push_str(&format!(
366 "- {} ({}) from node '{}' [hash: {:02x}{:02x}..]\n",
367 digest.source_path,
368 digest.artifact_kind,
369 digest.source_node_id,
370 digest.hash[0],
371 digest.hash[1],
372 ));
373 }
374 context.push('\n');
375 }
376
377 if !package.summary_digests.is_empty() {
379 context.push_str("## Advisory Summaries\n\n");
380 for summary in &package.summary_digests {
381 context.push_str(&format!(
382 "### {} (from {})\n{}\n\n",
383 summary.digest_id, summary.source_node_id, summary.summary_text
384 ));
385 }
386 }
387
388 if package.budget_exceeded {
389 context.push_str(
390 "\n> Note: Context budget was exceeded. Some files replaced with structural digests.\n",
391 );
392 }
393
394 context
395 }
396
397 fn is_binary_extension(path: &Path) -> bool {
399 match path.extension().and_then(|e| e.to_str()) {
400 Some(ext) => matches!(
401 ext.to_lowercase().as_str(),
402 "png"
403 | "jpg"
404 | "jpeg"
405 | "gif"
406 | "bmp"
407 | "ico"
408 | "webp"
409 | "pdf"
410 | "doc"
411 | "docx"
412 | "xls"
413 | "xlsx"
414 | "ppt"
415 | "pptx"
416 | "zip"
417 | "tar"
418 | "gz"
419 | "bz2"
420 | "7z"
421 | "rar"
422 | "exe"
423 | "dll"
424 | "so"
425 | "dylib"
426 | "a"
427 | "wasm"
428 | "o"
429 | "obj"
430 | "pyc"
431 | "pyo"
432 | "class"
433 | "db"
434 | "sqlite"
435 | "sqlite3"
436 ),
437 None => false,
438 }
439 }
440
441 pub fn validate_provenance_record(
447 &self,
448 record: &perspt_store::ContextProvenanceRecord,
449 ) -> Vec<String> {
450 let mut missing = Vec::new();
451
452 if let Ok(entries) = serde_json::from_str::<Vec<String>>(&record.structural_hashes) {
455 for entry in &entries {
456 let parts: Vec<&str> = entry.splitn(4, ':').collect();
459 if parts.len() >= 3 {
460 let source_path = parts[1];
463 let full_path = self.working_dir.join(source_path);
464 if !full_path.exists() {
465 missing.push(source_path.to_string());
466 }
467 }
468 }
469 }
470
471 missing
472 }
473
474 pub fn get_project_summary(&self) -> String {
484 let registry = perspt_core::plugin::PluginRegistry::new();
485 let detected = registry.detect_all(&self.working_dir);
486
487 if detected.is_empty() {
488 return String::new();
489 }
490
491 let mut summary = String::from("## Existing Project Summary\n\n");
492
493 for plugin in &detected {
494 summary.push_str(&format!("**Language/Plugin:** {}\n", plugin.name()));
495 }
496 summary.push('\n');
497
498 let manifest_candidates = [
500 "Cargo.toml",
501 "pyproject.toml",
502 "setup.py",
503 "requirements.txt",
504 "package.json",
505 "uv.lock",
506 "Cargo.lock",
507 "poetry.lock",
508 ];
509 let mut found_manifests = Vec::new();
510 for candidate in &manifest_candidates {
511 if self.working_dir.join(candidate).exists() {
512 found_manifests.push(*candidate);
513 }
514 }
515 if !found_manifests.is_empty() {
516 summary.push_str(&format!(
517 "**Dependency manifests:** {}\n",
518 found_manifests.join(", ")
519 ));
520 }
521
522 let entry_candidates = [
524 "src/main.rs",
525 "src/lib.rs",
526 "src/main.py",
527 "main.py",
528 "app.py",
529 "__main__.py",
530 "src/index.ts",
531 "src/index.js",
532 "index.ts",
533 "index.js",
534 ];
535 let mut found_entries = Vec::new();
536 for candidate in &entry_candidates {
537 if self.working_dir.join(candidate).exists() {
538 found_entries.push(*candidate);
539 }
540 }
541 if !found_entries.is_empty() {
542 summary.push_str(&format!("**Entry points:** {}\n", found_entries.join(", ")));
543 }
544
545 let test_candidates = ["tests/", "test/", "src/tests/", "tests.py", "test_*.py"];
547 let mut found_tests = Vec::new();
548 for candidate in &test_candidates {
549 if self.working_dir.join(candidate).exists() {
550 found_tests.push(*candidate);
551 }
552 }
553 if !found_tests.is_empty() {
554 summary.push_str(&format!("**Test locations:** {}\n", found_tests.join(", ")));
555 }
556
557 for manifest in &found_manifests {
559 if let Ok(content) = self.read_file_truncated(Path::new(manifest)) {
560 let truncated = if content.len() > 2048 {
562 format!("{}...\n[truncated]", &content[..2048])
563 } else {
564 content
565 };
566 summary.push_str(&format!("\n### {}\n```\n{}\n```\n", manifest, truncated));
567 }
568 }
569
570 summary
571 }
572
573 pub fn gather_architect_evidence(&self) -> String {
579 let mut sections: Vec<String> = Vec::new();
580
581 let api_hits = self.search(r"pub\s+(fn|struct|trait|enum|type|mod)\b", 30);
583 if !api_hits.is_empty() {
584 let mut lines: Vec<String> = Vec::new();
585 for hit in &api_hits {
586 let rel = hit
587 .file
588 .strip_prefix(&self.working_dir)
589 .unwrap_or(&hit.file);
590 lines.push(format!(
591 "- `{}` L{}: {}",
592 rel.display(),
593 hit.line,
594 hit.content.trim()
595 ));
596 }
597 sections.push(format!(
598 "### API Seams (public symbols)\n{}",
599 lines.join("\n")
600 ));
601 }
602
603 let mod_hits = self.search(r"^pub\s+mod\s+\w+", 20);
605 if !mod_hits.is_empty() {
606 let mut lines: Vec<String> = Vec::new();
607 for hit in &mod_hits {
608 let rel = hit
609 .file
610 .strip_prefix(&self.working_dir)
611 .unwrap_or(&hit.file);
612 lines.push(format!(
613 "- `{}` L{}: {}",
614 rel.display(),
615 hit.line,
616 hit.content.trim()
617 ));
618 }
619 sections.push(format!("### Module Boundaries\n{}", lines.join("\n")));
620 }
621
622 let test_hits = self.search(r"#\[test\]|#\[cfg\(test\)\]|def test_|class Test", 20);
624 if !test_hits.is_empty() {
625 let mut test_files: Vec<String> = Vec::new();
626 let mut seen = std::collections::HashSet::new();
627 for hit in &test_hits {
628 let rel = hit
629 .file
630 .strip_prefix(&self.working_dir)
631 .unwrap_or(&hit.file);
632 let key = rel.display().to_string();
633 if seen.insert(key.clone()) {
634 test_files.push(format!("- `{}`", key));
635 }
636 }
637 sections.push(format!(
638 "### Test Layout\nFiles containing tests:\n{}",
639 test_files.join("\n")
640 ));
641 }
642
643 let import_hits = self.search(r"^use |^from \w+ import|^import |require\(", 40);
645 if !import_hits.is_empty() {
646 let mut counts: std::collections::HashMap<String, usize> =
647 std::collections::HashMap::new();
648 for hit in &import_hits {
649 let rel = hit
650 .file
651 .strip_prefix(&self.working_dir)
652 .unwrap_or(&hit.file);
653 *counts.entry(rel.display().to_string()).or_insert(0) += 1;
654 }
655 let mut sorted: Vec<_> = counts.into_iter().collect();
656 sorted.sort_by_key(|b| std::cmp::Reverse(b.1));
657 let top: Vec<String> = sorted
658 .iter()
659 .take(10)
660 .map(|(f, c)| format!("- `{}`: {} import statements", f, c))
661 .collect();
662 sections.push(format!(
663 "### Dependency Hotspots (files with most imports)\n{}",
664 top.join("\n")
665 ));
666 }
667
668 if sections.is_empty() {
669 String::new()
670 } else {
671 format!("## Architect Evidence\n\n{}\n", sections.join("\n\n"))
672 }
673 }
674}
675
676#[cfg(test)]
677mod tests {
678 use super::*;
679 use std::fs;
680 use tempfile::tempdir;
681
682 #[test]
683 fn test_search_finds_pattern() {
684 let dir = tempdir().unwrap();
685 let file_path = dir.path().join("test.py");
686 fs::write(&file_path, "def hello_world():\n print('Hello')\n").unwrap();
687
688 let retriever = ContextRetriever::new(dir.path().to_path_buf());
689 let hits = retriever.search("hello_world", 10);
690
691 assert_eq!(hits.len(), 1);
692 assert!(hits[0].content.contains("def hello_world"));
693 }
694
695 #[test]
696 fn test_read_file_truncated() {
697 let dir = tempdir().unwrap();
698 let file_path = dir.path().join("large.txt");
699 let content = "line\n".repeat(10000); fs::write(&file_path, &content).unwrap();
701
702 let retriever = ContextRetriever::new(dir.path().to_path_buf()).with_max_file_bytes(1000);
703
704 let result = retriever.read_file_truncated(&file_path).unwrap();
705 assert!(result.contains("truncated"));
706 assert!(result.len() < 2000); }
708
709 #[test]
714 fn test_build_restriction_map() {
715 let dir = tempdir().unwrap();
716 let retriever = ContextRetriever::new(dir.path().to_path_buf());
717
718 let mut node = perspt_core::types::SRBNNode::new(
719 "node_1".to_string(),
720 "test goal".to_string(),
721 perspt_core::types::ModelTier::Actuator,
722 );
723 node.output_targets = vec![std::path::PathBuf::from("src/main.rs")];
724 node.context_files = vec![std::path::PathBuf::from("src/lib.rs")];
725
726 let mut manifest = perspt_core::types::OwnershipManifest::new();
727 manifest.assign(
728 "src/main.rs",
729 "node_1",
730 "rust",
731 perspt_core::types::NodeClass::Implementation,
732 );
733 manifest.assign(
734 "src/utils.rs",
735 "node_1",
736 "rust",
737 perspt_core::types::NodeClass::Implementation,
738 );
739
740 let map = retriever.build_restriction_map(&node, &manifest);
741
742 assert_eq!(map.node_id, "node_1");
743 assert!(map.owned_files.contains(&"src/main.rs".to_string()));
745 assert!(map.owned_files.contains(&"src/utils.rs".to_string()));
746 assert_eq!(map.sealed_interfaces, vec!["src/lib.rs".to_string()]);
748 }
749
750 #[test]
751 fn test_assemble_context_package_with_files() {
752 let dir = tempdir().unwrap();
753 let src_dir = dir.path().join("src");
755 fs::create_dir_all(&src_dir).unwrap();
756 fs::write(src_dir.join("main.rs"), "fn main() {}").unwrap();
757
758 let retriever = ContextRetriever::new(dir.path().to_path_buf());
759
760 let node = perspt_core::types::SRBNNode::new(
761 "node_1".to_string(),
762 "test goal".to_string(),
763 perspt_core::types::ModelTier::Actuator,
764 );
765
766 let mut map = perspt_core::types::RestrictionMap::for_node("node_1".to_string());
767 map.owned_files.push("src/main.rs".to_string());
768 map.budget.byte_limit = 10 * 1024; let package = retriever.assemble_context_package(&node, &map);
771
772 assert_eq!(package.node_id, "node_1");
773 assert!(package.included_files.contains_key("src/main.rs"));
774 assert!(!package.budget_exceeded);
775 assert!(package.total_bytes > 0);
776 }
777
778 #[test]
779 fn test_assemble_context_package_budget_exceeded() {
780 let dir = tempdir().unwrap();
781 let src_dir = dir.path().join("src");
782 fs::create_dir_all(&src_dir).unwrap();
783 fs::write(src_dir.join("big.rs"), "x".repeat(500)).unwrap();
785
786 let retriever = ContextRetriever::new(dir.path().to_path_buf());
787
788 let node = perspt_core::types::SRBNNode::new(
789 "node_1".to_string(),
790 "test goal".to_string(),
791 perspt_core::types::ModelTier::Actuator,
792 );
793
794 let mut map = perspt_core::types::RestrictionMap::for_node("node_1".to_string());
795 map.owned_files.push("src/big.rs".to_string());
796 map.budget.byte_limit = 100; let package = retriever.assemble_context_package(&node, &map);
799 assert!(package.budget_exceeded);
800 }
801
802 #[test]
803 fn test_format_context_package_empty() {
804 let retriever = ContextRetriever::new(PathBuf::from("."));
805 let package = perspt_core::types::ContextPackage::new("node_1".to_string());
806
807 let formatted = retriever.format_context_package(&package);
808 assert!(formatted.is_empty());
809 }
810
811 #[test]
812 fn test_format_context_package_with_files() {
813 let retriever = ContextRetriever::new(PathBuf::from("."));
814 let mut package = perspt_core::types::ContextPackage::new("node_1".to_string());
815 package.add_file("src/main.rs", "fn main() {}".to_string());
816
817 let formatted = retriever.format_context_package(&package);
818 assert!(formatted.contains("## Context Files"));
819 assert!(formatted.contains("src/main.rs"));
820 assert!(formatted.contains("fn main() {}"));
821 }
822
823 #[test]
824 fn test_compute_structural_digest() {
825 let dir = tempdir().unwrap();
826 fs::write(dir.path().join("test.rs"), "fn test() {}").unwrap();
827
828 let retriever = ContextRetriever::new(dir.path().to_path_buf());
829 let digest = retriever
830 .compute_structural_digest(
831 "test.rs",
832 perspt_core::types::ArtifactKind::Signature,
833 "node_1",
834 )
835 .unwrap();
836
837 assert_eq!(digest.source_node_id, "node_1");
838 assert_eq!(digest.source_path, "test.rs");
839 assert_ne!(digest.hash, [0u8; 32]);
840 }
841
842 #[test]
843 fn test_gather_architect_evidence_rust_project() {
844 let dir = tempdir().unwrap();
845 fs::create_dir_all(dir.path().join("src")).unwrap();
847 fs::write(
848 dir.path().join("src/lib.rs"),
849 "pub mod math;\npub mod utils;\n\nuse crate::math::add;\n",
850 )
851 .unwrap();
852 fs::write(
853 dir.path().join("src/math.rs"),
854 "pub fn add(a: i32, b: i32) -> i32 { a + b }\n\n#[cfg(test)]\nmod tests {\n #[test]\n fn test_add() { assert_eq!(super::add(1, 2), 3); }\n}\n",
855 )
856 .unwrap();
857 fs::write(
858 dir.path().join("src/utils.rs"),
859 "pub struct Config { pub name: String }\n",
860 )
861 .unwrap();
862
863 let retriever = ContextRetriever::new(dir.path().to_path_buf());
864 let evidence = retriever.gather_architect_evidence();
865
866 assert!(
867 evidence.contains("Architect Evidence"),
868 "Should produce an evidence section"
869 );
870 assert!(evidence.contains("API Seams"), "Should find public symbols");
871 assert!(
872 evidence.contains("pub fn add") || evidence.contains("pub mod math"),
873 "Should list at least one public API"
874 );
875 }
876
877 #[test]
878 fn test_gather_architect_evidence_empty_dir() {
879 let dir = tempdir().unwrap();
880 let retriever = ContextRetriever::new(dir.path().to_path_buf());
881 let evidence = retriever.gather_architect_evidence();
882 assert!(
883 evidence.is_empty(),
884 "Empty projects should produce no evidence"
885 );
886 }
887}