1pub mod State;
67
68pub mod Scan;
69
70pub mod Process;
71
72pub mod Language;
73
74pub mod Store;
75
76pub mod Watch;
77
78pub mod Background;
79
80use std::{collections::HashMap, path::PathBuf, sync::Arc};
82
83use tokio::sync::{Mutex, RwLock};
84
85use crate::{
86 AirError,
87 ApplicationState::ApplicationState,
88 Configuration::ConfigurationManager,
89 Indexing::{
90 Scan::{
91 ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
92 ScanFile::IndexFileInternal,
93 },
94 State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
95 Store::{
96 QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
97 StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
98 UpdateIndex::UpdateFileContent,
99 },
100 },
101 Result,
102 dev_log,
103};
104use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
106
107const MAX_PARALLEL_INDEXING:usize = 10;
109
110#[derive(Debug, Clone)]
112pub struct IndexResult {
113 pub files_indexed:u32,
115
116 pub total_size:u64,
118
119 pub duration_seconds:f64,
121
122 pub symbols_extracted:u32,
124
125 pub files_with_errors:u32,
127}
128
129#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
131pub struct IndexStatistics {
132 pub file_count:u32,
133
134 pub total_size:u64,
135
136 pub total_symbols:u32,
137
138 pub language_counts:HashMap<String, u32>,
139
140 pub last_updated:chrono::DateTime<chrono::Utc>,
141
142 pub index_version:String,
143}
144
145pub struct FileIndexer {
155 AppState:Arc<ApplicationState>,
157
158 file_index:Arc<RwLock<FileIndex>>,
160
161 index_directory:PathBuf,
163
164 file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
166
167 indexing_semaphore:Arc<tokio::sync::Semaphore>,
169
170 corruption_detected:Arc<Mutex<bool>>,
172}
173
174impl FileIndexer {
175 pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
183 let config = &AppState.Configuration.Indexing;
184
185 let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
187
188 EnsureIndexDirectory(&index_directory).await?;
190
191 let file_index = LoadOrCreateIndex(&index_directory).await?;
193
194 let indexer = Self {
195 AppState:AppState.clone(),
196
197 file_index:Arc::new(RwLock::new(file_index)),
198
199 index_directory:index_directory.clone(),
200
201 file_watcher:Arc::new(Mutex::new(None)),
202
203 indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
204
205 corruption_detected:Arc::new(Mutex::new(false)),
206 };
207
208 indexer.VerifyIndexIntegrity().await?;
210
211 indexer
213 .AppState
214 .UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
215 .await
216 .map_err(|e| AirError::Internal(e.to_string()))?;
217
218 dev_log!(
219 "indexing",
220 "[FileIndexer] Initialized with index directory: {}",
221 index_directory.display()
222 );
223
224 Ok(indexer)
225 }
226
227 fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
229 let expanded = ConfigurationManager::ExpandPath(path)?;
230
231 let path_str = expanded.to_string_lossy();
233
234 if path_str.contains("..") {
235 return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
236 }
237
238 Ok(expanded)
239 }
240
241 async fn VerifyIndexIntegrity(&self) -> Result<()> {
243 let index = self.file_index.read().await;
244
245 ValidateIndexConsistency(&index)?;
247
248 let mut missing_files = 0;
250
251 for file_path in index.files.keys() {
252 if !file_path.exists() {
253 missing_files += 1;
254 }
255 }
256
257 if missing_files > 0 {
258 dev_log!("indexing", "warn: [FileIndexer] Found {} missing files in index", missing_files);
259 }
260
261 dev_log!("indexing", "[FileIndexer] Index integrity verified successfully");
262
263 Ok(())
264 }
265
266 pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
268 let start_time = std::time::Instant::now();
269
270 dev_log!("indexing", "[FileIndexer] Starting directory index: {}", path);
271
272 let config = &self.AppState.Configuration.Indexing;
273
274 let (files_to_index, _scan_result) =
276 ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
277
278 let _index_arc = self.file_index.clone();
281
282 let semaphore = self.indexing_semaphore.clone();
283
284 let config_clone = config.clone();
285
286 let mut index_tasks = Vec::new();
287
288 for file_path in files_to_index {
289 let permit = semaphore.clone().acquire_owned().await.unwrap();
290
291 let config_for_task = config_clone.clone();
292
293 let task = tokio::spawn(async move {
294 let _permit = permit;
295
296 IndexFileInternal(&file_path, &config_for_task, &[]).await
297 });
298
299 index_tasks.push(task);
300 }
301
302 let mut index = self.file_index.write().await;
304
305 let mut indexed_paths = std::collections::HashSet::new();
306
307 let mut files_indexed = 0u32;
308
309 let mut total_size = 0u64;
310
311 let mut symbols_extracted = 0u32;
312
313 let mut files_with_errors = 0u32;
314
315 for task in index_tasks {
316 match task.await {
317 Ok(Ok((metadata, symbols))) => {
318 let file_path = metadata.path.clone();
319
320 index.files.insert(file_path.clone(), metadata.clone());
321
322 indexed_paths.insert(file_path.clone());
323
324 if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
326 dev_log!(
327 "indexing",
328 "warn: [FileIndexer] Failed to index content for {}: {}",
329 file_path.display(),
330 e
331 );
332 }
333
334 index.file_symbols.insert(file_path.clone(), symbols.clone());
336
337 symbols_extracted += symbols.len() as u32;
338
339 for symbol in symbols {
341 index
342 .symbol_index
343 .entry(symbol.name.clone())
344 .or_insert_with(Vec::new)
345 .push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
346 }
347
348 files_indexed += 1;
349
350 total_size += metadata.size;
351 },
352
353 Ok(Err(_)) => {
354 files_with_errors += 1;
355 },
356
357 Err(e) => {
358 dev_log!("indexing", "error: [FileIndexer] Indexing task failed: {}", e);
359
360 files_with_errors += 1;
361 },
362 }
363 }
364
365 ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
367
368 UpdateIndexMetadata(&mut index)?;
370
371 SaveIndex(&self.index_directory, &index).await?;
373
374 let duration = start_time.elapsed().as_secs_f64();
375
376 dev_log!(
377 "indexing",
378 "[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
379 files_indexed,
380 total_size,
381 symbols_extracted,
382 files_with_errors,
383 duration
384 );
385
386 Ok(IndexResult {
387 files_indexed,
388 total_size,
389 duration_seconds:duration,
390 symbols_extracted,
391 files_with_errors,
392 })
393 }
394
395 pub async fn SearchFiles(
397 &self,
398
399 query:SearchQuery,
400
401 path:Option<String>,
402
403 language:Option<String>,
404 ) -> Result<PaginatedSearchResults> {
405 let index = self.file_index.read().await;
406
407 QueryIndexSearch(&index, query, path, language).await
408 }
409
410 pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
412 let index = self.file_index.read().await;
413
414 let query_lower = query.to_lowercase();
415
416 let mut results = Vec::new();
417
418 for (symbol_name, locations) in &index.symbol_index {
419 if symbol_name.to_lowercase().contains(&query_lower) {
420 for loc in locations.iter().take(max_results as usize) {
421 results.push(loc.symbol.clone());
422
423 if results.len() >= max_results as usize {
424 break;
425 }
426 }
427 }
428 }
429
430 Ok(results)
431 }
432
433 pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
435 let index = self.file_index.read().await;
436
437 Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
438 }
439
440 pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
442 let file_path = Self::ValidateAndExpandPath(&path)?;
443
444 let index = self.file_index.read().await;
445
446 Ok(index.files.get(&file_path).cloned())
447 }
448
449 pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
451 let index = self.file_index.read().await;
452
453 let mut language_counts:HashMap<String, u32> = HashMap::new();
454
455 let total_size = index.files.values().map(|m| m.size).sum();
456
457 let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
458
459 for metadata in index.files.values() {
460 if let Some(lang) = &metadata.language {
461 *language_counts.entry(lang.clone()).or_insert(0) += 1;
462 }
463 }
464
465 Ok(IndexStatistics {
466 file_count:index.files.len() as u32,
467 total_size,
468 total_symbols,
469 language_counts,
470 last_updated:index.last_updated,
471 index_version:index.index_version.clone(),
472 })
473 }
474
475 pub async fn recover_from_corruption(&self) -> Result<()> {
477 dev_log!("indexing", "[FileIndexer] Recovering from corrupted index...");
478
479 BackupCorruptedIndex(&self.index_directory).await?;
481
482 let new_index = CreateNewIndex();
484
485 *self.file_index.write().await = new_index;
486
487 *self.corruption_detected.lock().await = false;
489
490 dev_log!("indexing", "[FileIndexer] Index recovery completed");
491
492 Ok(())
493 }
494}
495
496impl Clone for FileIndexer {
497 fn clone(&self) -> Self {
498 Self {
499 AppState:self.AppState.clone(),
500
501 file_index:self.file_index.clone(),
502
503 index_directory:self.index_directory.clone(),
504
505 file_watcher:self.file_watcher.clone(),
506
507 indexing_semaphore:self.indexing_semaphore.clone(),
508
509 corruption_detected:self.corruption_detected.clone(),
510 }
511 }
512}