AirLibrary/Indexing/Process/
ExtractSymbols.rs

1//! # ExtractSymbols
2//!
3//! ## File: Indexing/Process/ExtractSymbols.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides symbol extraction functionality for the File Indexer service,
8//! extracting classes, functions, and other code constructs for VSCode
9//! Outline View and Go to Symbol features.
10//!
11//! ## Primary Responsibility
12//!
13//! Extract code symbols from file content based on detected language,
14//! including functions, classes, structs, enums, traits, and more.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Language-specific symbol extraction
19//! - Line and column tracking for symbols
20//! - Symbol kind classification
21//! - Cross-file symbol reference support
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - None (uses std library)
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `super::Language` - Language-specific parsers
31//!
32//! ## Dependents
33//!
34//! - `Indexing::Scan::ScanFile` - Symbol extraction during file scan
35//! - `Indexing::mod::FileIndexer` - Symbol search operations
36//!
37//! ## VSCode Pattern Reference
38//!
39//! Inspired by VSCode's symbol extraction in
40//! `src/vs/workbench/services/search/common/`
41//!
42//! ## Security Considerations
43//!
44//! - Line-by-line parsing without eval
45//! - No code execution during extraction
46//! - Safe string handling
47//!
48//! ## Performance Considerations
49//!
50//! - efficient line-based parsing
51//! - Minimal allocations per file
52//! - Early termination for non-code files
53//!
54//! ## Error Handling Strategy
55//!
56//! Symbol extraction returns empty vectors on parse errors rather than
57//! failures, allowing indexing to continue for other languages.
58//!
59//! ## Thread Safety
60//!
61//! Symbol extraction functions are pure and safe to call from
62//! parallel indexing tasks.
63
64use std::path::PathBuf;
65
66use crate::{
67	Indexing::{
68		Language::{ParseRust::ExtractRustSymbols, ParseTypeScript::ExtractTypeScriptSymbols},
69		State::CreateState::{SymbolInfo, SymbolKind, SymbolLocation},
70	},
71	Result,
72};
73
74/// Extract symbols from code for VSCode Outline View and Go to Symbol
75///
76/// Supports multiple programming languages:
77/// - Rust: struct, impl, fn, mod, enum, trait, type
78/// - TypeScript/JavaScript: class, interface, function, const, let, var
79/// - Python: class, def
80/// - Go: type, func, struct, interface
81pub async fn ExtractSymbols(file_path:&PathBuf, content:&[u8], language:&str) -> Result<Vec<SymbolInfo>> {
82	let content_str = String::from_utf8_lossy(content);
83	let mut symbols = Vec::new();
84
85	match language.to_lowercase().as_str() {
86		"rust" => symbols.extend(ExtractRustSymbols(&content_str, file_path)),
87		"typescript" | "javascript" => symbols.extend(ExtractTypeScriptSymbols(&content_str, file_path)),
88		_ => {},
89	}
90
91	Ok(symbols)
92}
93
94/// Group symbols by kind for organization
95pub fn GroupSymbolsByKind(symbols:&[SymbolInfo]) -> std::collections::HashMap<SymbolKind, Vec<&SymbolInfo>> {
96	let mut grouped = std::collections::HashMap::new();
97
98	for symbol in symbols {
99		grouped.entry(symbol.kind.clone()).or_insert_with(Vec::new).push(symbol);
100	}
101
102	grouped
103}
104
105/// Sort symbols by line number
106pub fn SortSymbolsByLine(symbols:&mut Vec<SymbolInfo>) { symbols.sort_by(|a, b| a.line.cmp(&b.line)); }
107
108/// Filter symbols by name pattern
109pub fn FilterSymbolsByName<'a>(symbols:&'a [SymbolInfo], pattern:&str) -> Vec<&'a SymbolInfo> {
110	let pattern_lower = pattern.to_lowercase();
111	symbols
112		.iter()
113		.filter(|s| s.name.to_lowercase().contains(&pattern_lower))
114		.collect()
115}
116
117/// Get symbols of a specific kind
118pub fn GetSymbolsByKind(symbols:&[SymbolInfo], kind:SymbolKind) -> Vec<&SymbolInfo> {
119	symbols.iter().filter(|s| s.kind == kind).collect()
120}
121
122/// Find symbol at specific line
123pub fn FindSymbolAtLine(symbols:&[SymbolInfo], line:u32) -> Option<&SymbolInfo> {
124	symbols.iter().find(|s| s.line == line)
125}
126
127/// Find symbols in line range
128pub fn FindSymbolsInRange(symbols:&[SymbolInfo], start_line:u32, end_line:u32) -> Vec<&SymbolInfo> {
129	symbols.iter().filter(|s| s.line >= start_line && s.line <= end_line).collect()
130}
131
132/// Create symbol summary statistics
133pub fn GetSymbolStatistics(symbols:&[SymbolInfo]) -> SymbolStatistics {
134	let mut stats = SymbolStatistics { total:symbols.len(), by_kind:std::collections::HashMap::new() };
135
136	for symbol in symbols {
137		*stats.by_kind.entry(symbol.kind.clone()).or_insert(0) += 1;
138	}
139
140	stats
141}
142
143/// Symbol statistics
144#[derive(Debug, Clone)]
145pub struct SymbolStatistics {
146	pub total:usize,
147	pub by_kind:std::collections::HashMap<SymbolKind, usize>,
148}
149
150impl std::fmt::Display for SymbolStatistics {
151	fn fmt(&self, f:&mut std::fmt::Formatter<'_>) -> std::fmt::Result {
152		write!(f, "Total symbols: {}", self.total)?;
153		for (kind, count) in &self.by_kind {
154			write!(f, ", {:?}: {}", kind, count)?;
155		}
156		Ok(())
157	}
158}
159
160/// Validate symbol information
161pub fn ValidateSymbol(symbol:&SymbolInfo) -> bool {
162	!symbol.name.is_empty() && symbol.line > 0 && !symbol.full_path.is_empty()
163}
164
165/// Deduplicate symbols by name and line
166pub fn DeduplicateSymbols(symbols:Vec<SymbolInfo>) -> Vec<SymbolInfo> {
167	let mut seen = std::collections::HashSet::new();
168	symbols.into_iter().filter(|s| seen.insert((s.name.clone(), s.line))).collect()
169}
170
171/// Merge symbol lists from multiple files
172pub fn MergeSymbolLists(symbol_lists:Vec<Vec<SymbolInfo>>) -> Vec<SymbolInfo> {
173	let mut merged = Vec::new();
174	for symbols in symbol_lists {
175		merged.extend(symbols);
176	}
177	DeduplicateSymbols(merged)
178}
179
180/// Deduplicate multiple symbol lists
181pub fn DeduplicateLists(symbol_lists:Vec<Vec<SymbolInfo>>) -> Vec<Vec<SymbolInfo>> {
182	symbol_lists.into_iter().map(|list| DeduplicateSymbols(list)).collect()
183}
184
185/// Create a symbol search index (name -> symbols)
186pub fn CreateSymbolIndex(symbols:&[SymbolInfo]) -> std::collections::HashMap<String, Vec<usize>> {
187	let mut index = std::collections::HashMap::new();
188	for (idx, symbol) in symbols.iter().enumerate() {
189		index.entry(symbol.name.to_lowercase()).or_insert_with(Vec::new).push(idx);
190	}
191	index
192}
193
194/// Find symbols matching multiple criteria
195pub fn FindSymbolsMatching<'a>(
196	symbols:&'a [SymbolInfo],
197	name_pattern:Option<&'a str>,
198	kind:&Option<SymbolKind>,
199	line_range:Option<(u32, u32)>,
200) -> Vec<&'a SymbolInfo> {
201	symbols
202		.iter()
203		.filter(|s| {
204			if let Some(pattern) = name_pattern {
205				if !s.name.to_lowercase().contains(&pattern.to_lowercase()) {
206					return false;
207				}
208			}
209			if let Some(k) = kind {
210				if s.kind != *k {
211					return false;
212				}
213			}
214			if let Some((start, end)) = line_range {
215				if s.line < start || s.line > end {
216					return false;
217				}
218			}
219			true
220		})
221		.collect()
222}