Skip to main content

AirLibrary/Indexing/
mod.rs

1//! # File Indexing and Search Service
2//!
3//! ## File: Indexing/mod.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides comprehensive file indexing, search, and content analysis
8//! capabilities for the Land ecosystem, inspired by and compatible with
9//! Visual Studio Code's search service.
10//!
11//! ## Primary Responsibility
12//!
13//! Facade module for the Indexing service, exposing the public API for
14//! file indexing, search, and symbol extraction operations.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Re-export public types from submodule
19//! - Provide unified FileIndexer API
20//! - Coordinate between indexing subsystems
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `regex` - Regular expression search patterns
26//! - `serde` - Serialization for index storage
27//! - `tokio` - Async runtime for all operations
28//! - `notify` - File system watching
29//! - `chrono` - Timestamp management
30//!
31//! **Internal Modules:**
32//! - `crate::Result` - Error handling type
33//! - `crate::AirError` - Error types
34//! - `crate::ApplicationState::ApplicationState` - Application state
35//! - `crate::Configuration::ConfigurationManager` - Configuration management
36//!
37//! ## Dependents
38//!
39//! - `Indexing::FileIndexer` - Main indexer implementation
40//! - `Vine::Server::AirVinegRPCService` - gRPC integration
41//!
42//! ## VSCode Integration
43//!
44//! This service integrates with VSCode's search and file service architecture:
45//!
46//! - References: vs/workbench/services/search
47//! - File Service: vs/workbench/services/files
48//!
49//! The indexing system supports VSCode features:
50//! - **Outline View**: Symbol extraction for class/function navigation
51//! - **Go to Symbol**: Cross-file symbol search and lookup
52//! - **Search Integration**: File content and name search with regex support
53//! - **Workspace Search**: Multi-workspace index sharing
54//!
55//! ## FUTURE Enhancements
56//!
57//! - [ ] Implement full ripgrep integration for ultra-fast text search
58//! - [ ] Add project-level search with workspace awareness
59//! - [ ] Implement search query caching
60//! - [ ] Add fuzzy search with typos tolerance
61//! - [ ] Implement search history and recent queries
62//! - [ ] Add search result preview with context
63//! - [ ] Implement parallel indexing for large directories
64
65// Modules - file-based (no inline definitions)
66pub mod State;
67
68pub mod Scan;
69
70pub mod Process;
71
72pub mod Language;
73
74pub mod Store;
75
76pub mod Watch;
77
78pub mod Background;
79
80// Import types and functions needed for the FileIndexer implementation
81use std::{collections::HashMap, path::PathBuf, sync::Arc};
82
83use tokio::sync::{Mutex, RwLock};
84
85use crate::{
86	AirError,
87	ApplicationState::ApplicationState,
88	Configuration::ConfigurationManager,
89	Indexing::{
90		Scan::{
91			ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
92			ScanFile::IndexFileInternal,
93		},
94		State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
95		Store::{
96			QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
97			StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
98			UpdateIndex::UpdateFileContent,
99		},
100	},
101	Result,
102	dev_log,
103};
104// Import types from submodules with explicit full paths
105use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
106
107/// Maximum number of parallel indexing operations
108const MAX_PARALLEL_INDEXING:usize = 10;
109
110/// Indexing result with statistics
111#[derive(Debug, Clone)]
112pub struct IndexResult {
113	/// Number of files successfully indexed
114	pub files_indexed:u32,
115
116	/// Total size of indexed files in bytes
117	pub total_size:u64,
118
119	/// Time taken in seconds
120	pub duration_seconds:f64,
121
122	/// Number of symbols extracted
123	pub symbols_extracted:u32,
124
125	/// Number of files with errors
126	pub files_with_errors:u32,
127}
128
129/// Index statistics
130#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
131pub struct IndexStatistics {
132	pub file_count:u32,
133
134	pub total_size:u64,
135
136	pub total_symbols:u32,
137
138	pub language_counts:HashMap<String, u32>,
139
140	pub last_updated:chrono::DateTime<chrono::Utc>,
141
142	pub index_version:String,
143}
144
145/// File indexer implementation with comprehensive search capabilities
146///
147/// This indexer provides:
148/// - Incremental file watching with real-time updates
149/// - Multi-mode search (literal, regex, fuzzy)
150/// - Symbol extraction for VSCode Outline View
151/// - Language detection for syntax highlighting
152/// - Index corruption detection and recovery
153/// - Parallel indexing with resource limits
154pub struct FileIndexer {
155	/// Application state
156	AppState:Arc<ApplicationState>,
157
158	/// File index with metadata and symbols
159	file_index:Arc<RwLock<FileIndex>>,
160
161	/// Index storage directory
162	index_directory:PathBuf,
163
164	/// File watcher for incremental updates
165	file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
166
167	/// Semaphore for limiting parallel indexing operations
168	indexing_semaphore:Arc<tokio::sync::Semaphore>,
169
170	/// Index corruption detection state
171	corruption_detected:Arc<Mutex<bool>>,
172}
173
174impl FileIndexer {
175	/// Create a new file indexer with comprehensive setup
176	///
177	/// Initializes the indexer with:
178	/// - Index directory creation
179	/// - Existing index loading or fresh creation
180	/// - Index corruption detection
181	/// - Service status initialization
182	pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
183		let config = &AppState.Configuration.Indexing;
184
185		// Expand index directory path with validation
186		let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
187
188		// Create index directory if it doesn't exist with error handling
189		EnsureIndexDirectory(&index_directory).await?;
190
191		// Load or create index with corruption detection
192		let file_index = LoadOrCreateIndex(&index_directory).await?;
193
194		let indexer = Self {
195			AppState:AppState.clone(),
196
197			file_index:Arc::new(RwLock::new(file_index)),
198
199			index_directory:index_directory.clone(),
200
201			file_watcher:Arc::new(Mutex::new(None)),
202
203			indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
204
205			corruption_detected:Arc::new(Mutex::new(false)),
206		};
207
208		// Verify index integrity
209		indexer.VerifyIndexIntegrity().await?;
210
211		// Initialize service status
212		indexer
213			.AppState
214			.UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
215			.await
216			.map_err(|e| AirError::Internal(e.to_string()))?;
217
218		dev_log!(
219			"indexing",
220			"[FileIndexer] Initialized with index directory: {}",
221			index_directory.display()
222		);
223
224		Ok(indexer)
225	}
226
227	/// Validate and expand path with traversal protection
228	fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
229		let expanded = ConfigurationManager::ExpandPath(path)?;
230
231		// Prevent path traversal attacks
232		let path_str = expanded.to_string_lossy();
233
234		if path_str.contains("..") {
235			return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
236		}
237
238		Ok(expanded)
239	}
240
241	/// Verify index integrity and detect corruption
242	async fn VerifyIndexIntegrity(&self) -> Result<()> {
243		let index = self.file_index.read().await;
244
245		// Check consistency
246		ValidateIndexConsistency(&index)?;
247
248		// Verify all indexed files exist
249		let mut missing_files = 0;
250
251		for file_path in index.files.keys() {
252			if !file_path.exists() {
253				missing_files += 1;
254			}
255		}
256
257		if missing_files > 0 {
258			dev_log!("indexing", "warn: [FileIndexer] Found {} missing files in index", missing_files);
259		}
260
261		dev_log!("indexing", "[FileIndexer] Index integrity verified successfully");
262
263		Ok(())
264	}
265
266	/// Index a directory with comprehensive validation and parallel processing
267	pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
268		let start_time = std::time::Instant::now();
269
270		dev_log!("indexing", "[FileIndexer] Starting directory index: {}", path);
271
272		let config = &self.AppState.Configuration.Indexing;
273
274		// Scan directory
275		let (files_to_index, _scan_result) =
276			ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
277
278		// Index files in parallel
279		// Variables cloned for use in async task
280		let _index_arc = self.file_index.clone();
281
282		let semaphore = self.indexing_semaphore.clone();
283
284		let config_clone = config.clone();
285
286		let mut index_tasks = Vec::new();
287
288		for file_path in files_to_index {
289			let permit = semaphore.clone().acquire_owned().await.unwrap();
290
291			let config_for_task = config_clone.clone();
292
293			let task = tokio::spawn(async move {
294				let _permit = permit;
295
296				IndexFileInternal(&file_path, &config_for_task, &[]).await
297			});
298
299			index_tasks.push(task);
300		}
301
302		// Collect results
303		let mut index = self.file_index.write().await;
304
305		let mut indexed_paths = std::collections::HashSet::new();
306
307		let mut files_indexed = 0u32;
308
309		let mut total_size = 0u64;
310
311		let mut symbols_extracted = 0u32;
312
313		let mut files_with_errors = 0u32;
314
315		for task in index_tasks {
316			match task.await {
317				Ok(Ok((metadata, symbols))) => {
318					let file_path = metadata.path.clone();
319
320					index.files.insert(file_path.clone(), metadata.clone());
321
322					indexed_paths.insert(file_path.clone());
323
324					// Index content for search
325					if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
326						dev_log!(
327							"indexing",
328							"warn: [FileIndexer] Failed to index content for {}: {}",
329							file_path.display(),
330							e
331						);
332					}
333
334					// Index symbols
335					index.file_symbols.insert(file_path.clone(), symbols.clone());
336
337					symbols_extracted += symbols.len() as u32;
338
339					// Update symbol index
340					for symbol in symbols {
341						index
342							.symbol_index
343							.entry(symbol.name.clone())
344							.or_insert_with(Vec::new)
345							.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
346					}
347
348					files_indexed += 1;
349
350					total_size += metadata.size;
351				},
352
353				Ok(Err(_)) => {
354					files_with_errors += 1;
355				},
356
357				Err(e) => {
358					dev_log!("indexing", "error: [FileIndexer] Indexing task failed: {}", e);
359
360					files_with_errors += 1;
361				},
362			}
363		}
364
365		// Remove files that were indexed before but no longer exist
366		ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
367
368		// Update index metadata
369		UpdateIndexMetadata(&mut index)?;
370
371		// Save index to disk
372		SaveIndex(&self.index_directory, &index).await?;
373
374		let duration = start_time.elapsed().as_secs_f64();
375
376		dev_log!(
377			"indexing",
378			"[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
379			files_indexed,
380			total_size,
381			symbols_extracted,
382			files_with_errors,
383			duration
384		);
385
386		Ok(IndexResult {
387			files_indexed,
388			total_size,
389			duration_seconds:duration,
390			symbols_extracted,
391			files_with_errors,
392		})
393	}
394
395	/// Search files with multiple modes
396	pub async fn SearchFiles(
397		&self,
398
399		query:SearchQuery,
400
401		path:Option<String>,
402
403		language:Option<String>,
404	) -> Result<PaginatedSearchResults> {
405		let index = self.file_index.read().await;
406
407		QueryIndexSearch(&index, query, path, language).await
408	}
409
410	/// Search symbols across all files (for VSCode Go to Symbol)
411	pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
412		let index = self.file_index.read().await;
413
414		let query_lower = query.to_lowercase();
415
416		let mut results = Vec::new();
417
418		for (symbol_name, locations) in &index.symbol_index {
419			if symbol_name.to_lowercase().contains(&query_lower) {
420				for loc in locations.iter().take(max_results as usize) {
421					results.push(loc.symbol.clone());
422
423					if results.len() >= max_results as usize {
424						break;
425					}
426				}
427			}
428		}
429
430		Ok(results)
431	}
432
433	/// Get symbols for a specific file (for VSCode Outline View)
434	pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
435		let index = self.file_index.read().await;
436
437		Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
438	}
439
440	/// Get file information
441	pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
442		let file_path = Self::ValidateAndExpandPath(&path)?;
443
444		let index = self.file_index.read().await;
445
446		Ok(index.files.get(&file_path).cloned())
447	}
448
449	/// Get index statistics
450	pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
451		let index = self.file_index.read().await;
452
453		let mut language_counts:HashMap<String, u32> = HashMap::new();
454
455		let total_size = index.files.values().map(|m| m.size).sum();
456
457		let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
458
459		for metadata in index.files.values() {
460			if let Some(lang) = &metadata.language {
461				*language_counts.entry(lang.clone()).or_insert(0) += 1;
462			}
463		}
464
465		Ok(IndexStatistics {
466			file_count:index.files.len() as u32,
467			total_size,
468			total_symbols,
469			language_counts,
470			last_updated:index.last_updated,
471			index_version:index.index_version.clone(),
472		})
473	}
474
475	/// Recover corrupted index
476	pub async fn recover_from_corruption(&self) -> Result<()> {
477		dev_log!("indexing", "[FileIndexer] Recovering from corrupted index...");
478
479		// Backup corrupted index
480		BackupCorruptedIndex(&self.index_directory).await?;
481
482		// Create new index
483		let new_index = CreateNewIndex();
484
485		*self.file_index.write().await = new_index;
486
487		// Clear corruption flag
488		*self.corruption_detected.lock().await = false;
489
490		dev_log!("indexing", "[FileIndexer] Index recovery completed");
491
492		Ok(())
493	}
494}
495
496impl Clone for FileIndexer {
497	fn clone(&self) -> Self {
498		Self {
499			AppState:self.AppState.clone(),
500
501			file_index:self.file_index.clone(),
502
503			index_directory:self.index_directory.clone(),
504
505			file_watcher:self.file_watcher.clone(),
506
507			indexing_semaphore:self.indexing_semaphore.clone(),
508
509			corruption_detected:self.corruption_detected.clone(),
510		}
511	}
512}