AirLibrary/Indexing/Scan/
ScanDirectory.rs1use std::{path::Path, sync::Arc};
68
69use tokio::sync::Semaphore;
70
71use crate::{
72 AirError,
73 Configuration::IndexingConfig,
74 Indexing::{Scan::ScanFile::ValidateFileAccess, State::CreateState::FileIndex},
75 Result,
76 dev_log,
77};
78
79#[derive(Debug, Clone)]
81pub struct ScanDirectoryResult {
82 pub files_found:u32,
84
85 pub files_skipped:u32,
87
88 pub errors:u32,
90
91 pub total_size:u64,
93}
94
95pub async fn ScanDirectory(
105 path:&str,
106
107 patterns:Vec<String>,
108
109 config:&IndexingConfig,
110
111 _max_parallel:usize,
112) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
113 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
114
115 if !directory_path.exists() {
117 return Err(AirError::FileSystem(format!("Directory does not exist: {}", path)));
118 }
119
120 if !directory_path.is_dir() {
121 return Err(AirError::FileSystem(format!("Path is not a directory: {}", path)));
122 }
123
124 CheckDirectoryPermissions(&directory_path).await?;
126
127 let include_patterns = if patterns.is_empty() { config.FileTypes.clone() } else { patterns };
129
130 let walker = ignore::WalkBuilder::new(&directory_path)
132 .max_depth(Some(10)) .hidden(false)
134 .follow_links(false) .build();
136
137 let mut files_to_scan:Vec<std::path::PathBuf> = Vec::new();
138
139 let mut files_found = 0u32;
140
141 let mut files_skipped = 0u32;
142
143 let mut errors = 0u32;
144
145 let mut total_size = 0u64;
146
147 for result in walker {
149 match result {
150 Ok(entry) => {
151 if entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
153 let file_path = entry.path().to_path_buf();
154
155 if entry.path_is_symlink() {
157 dev_log!("indexing", "[ScanDirectory] Skipping symlink: {}", file_path.display());
158
159 files_skipped += 1;
160
161 continue;
162 }
163
164 if let Ok(metadata) = entry.metadata() {
166 let file_size = metadata.len();
167
168 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
169 dev_log!(
170 "indexing",
171 "warn: [ScanDirectory] Skipping oversized file: {} ({} bytes)",
172 file_path.display(),
173 file_size
174 );
175
176 files_skipped += 1;
177
178 continue;
179 }
180
181 if MatchesPatterns(&file_path, &include_patterns) {
183 if ValidateFileAccess(&file_path).await {
185 files_to_scan.push(file_path);
186
187 files_found += 1;
188
189 total_size += file_size;
190 } else {
191 dev_log!(
192 "indexing",
193 "warn: [ScanDirectory] Cannot access file (permission denied): {}",
194 file_path.display()
195 );
196
197 errors += 1;
198 }
199 } else {
200 files_skipped += 1;
201 }
202 } else {
203 errors += 1;
204 }
205 }
206 },
207
208 Err(e) => {
209 dev_log!("indexing", "warn: [ScanDirectory] Error walking directory: {}", e);
210
211 errors += 1;
212 },
213 }
214 }
215
216 dev_log!(
217 "indexing",
218 "[ScanDirectory] Directory scan completed: {} files, {} skipped, {} errors, {} bytes",
219 files_found,
220 files_skipped,
221 errors,
222 total_size
223 );
224
225 Ok((
226 files_to_scan,
227 ScanDirectoryResult { files_found, files_skipped, errors, total_size },
228 ))
229}
230
231pub async fn ScanAndRemoveDeleted(index:&mut FileIndex, directory_path:&Path) -> Result<u32> {
233 let mut paths_to_remove = Vec::new();
234
235 let all_paths:Vec<_> = index.files.keys().cloned().collect();
236
237 for path in all_paths {
238 if !path.exists() && path.starts_with(directory_path) {
239 paths_to_remove.push(path.clone());
240 }
241 }
242
243 let removed_count = paths_to_remove.len();
244
245 for path in paths_to_remove {
246 index.files.remove(&path);
247
248 index.file_symbols.remove(&path);
249
250 for (_, locations) in index.symbol_index.iter_mut() {
252 locations.retain(|loc| loc.file_path != path);
253 }
254
255 for (_, files) in index.content_index.iter_mut() {
257 files.retain(|p| p != &path);
258 }
259 }
260
261 Ok(removed_count as u32)
262}
263
264async fn CheckDirectoryPermissions(path:&Path) -> Result<()> {
266 tokio::task::spawn_blocking({
267 let path = path.to_path_buf();
268
269 move || {
270 std::fs::read_dir(&path)
271 .map_err(|e| AirError::FileSystem(format!("Cannot read directory {}: {}", path.display(), e)))?;
272
273 Ok(())
274 }
275 })
276 .await?
277}
278
279pub fn MatchesPatterns(file_path:&std::path::Path, patterns:&[String]) -> bool {
281 if patterns.is_empty() {
282 return true;
283 }
284
285 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
286
287 for pattern in patterns {
288 if MatchesPattern(&file_name, pattern) {
289 return true;
290 }
291 }
292
293 false
294}
295
296pub fn MatchesPattern(filename:&str, pattern:&str) -> bool {
298 if pattern.starts_with("*.") {
299 let extension = &pattern[2..];
300
301 filename.ends_with(extension)
302 } else {
303 filename == pattern
304 }
305}
306
307pub fn GetDefaultExcludePatterns() -> Vec<String> {
309 vec![
310 "node_modules".to_string(),
311 "target".to_string(),
312 ".git".to_string(),
313 ".svn".to_string(),
314 ".hg".to_string(),
315 ".bzr".to_string(),
316 "dist".to_string(),
317 "build".to_string(),
318 ".next".to_string(),
319 ".nuxt".to_string(),
320 "__pycache__".to_string(),
321 "*.pyc".to_string(),
322 ".venv".to_string(),
323 "venv".to_string(),
324 "env".to_string(),
325 ".env".to_string(),
326 ".idea".to_string(),
327 ".vscode".to_string(),
328 ".DS_Store".to_string(),
329 "Thumbs.db".to_string(),
330 ]
331}
332
333pub async fn ScanDirectoriesParallel(
335 directories:Vec<String>,
336
337 patterns:Vec<String>,
338
339 config:&IndexingConfig,
340
341 max_parallel:usize,
342) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
343 let semaphore = Arc::new(Semaphore::new(max_parallel));
344
345 let mut all_files = Vec::new();
346
347 let mut total_result = ScanDirectoryResult { files_found:0, files_skipped:0, errors:0, total_size:0 };
348
349 let mut scan_tasks = Vec::new();
350
351 for directory in directories {
352 let permit = semaphore.clone().acquire_owned().await.unwrap();
353
354 let config_clone = config.clone();
355
356 let patterns_clone = patterns.clone();
357
358 let task = tokio::spawn(async move {
359 let _permit = permit;
360
361 ScanDirectory(&directory, patterns_clone, &config_clone, max_parallel).await
362 });
363
364 scan_tasks.push(task);
365 }
366
367 for task in scan_tasks {
369 match task.await {
370 Ok(Ok((files, result))) => {
371 all_files.extend(files);
372
373 total_result.files_found += result.files_found;
374
375 total_result.files_skipped += result.files_skipped;
376
377 total_result.errors += result.errors;
378
379 total_result.total_size += result.total_size;
380 },
381
382 Ok(Err(e)) => {
383 dev_log!("indexing", "error: [ScanDirectory] Parallel scan failed: {}", e);
384
385 total_result.errors += 1;
386 },
387
388 Err(e) => {
389 dev_log!("indexing", "error: [ScanDirectory] Parallel task panicked: {}", e);
390
391 total_result.errors += 1;
392 },
393 }
394 }
395
396 Ok((all_files, total_result))
397}
398
399pub async fn GetDirectoryStatistics(path:&str, max_depth:Option<usize>) -> Result<DirectoryStatistics> {
401 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
402
403 if !directory_path.exists() || !directory_path.is_dir() {
404 return Err(AirError::FileSystem(format!("Invalid directory: {}", path)));
405 }
406
407 let mut file_count = 0u64;
408
409 let mut total_size = 0u64;
410
411 let mut directory_count = 0u64;
412
413 let mut hidden_count = 0u64;
414
415 let walker = ignore::WalkBuilder::new(&directory_path)
416 .max_depth(max_depth)
417 .hidden(true)
418 .follow_links(false)
419 .build();
420
421 for entry in walker.flatten() {
422 let file_type = entry.file_type().expect("Failed to get file type");
423
424 if file_type.is_file() {
425 file_count += 1;
426
427 if let Ok(metadata) = entry.metadata() {
428 total_size += metadata.len();
429 }
430 } else if file_type.is_dir() {
431 directory_count += 1;
432 }
433
434 if entry.depth() > 0
435 && entry
436 .path()
437 .components()
438 .any(|c| c.as_os_str().to_string_lossy().starts_with('.'))
439 {
440 hidden_count += 1;
441 }
442 }
443
444 Ok(DirectoryStatistics { file_count, directory_count, hidden_count, total_size })
445}
446
447#[derive(Debug, Clone)]
449pub struct DirectoryStatistics {
450 pub file_count:u64,
451
452 pub directory_count:u64,
453
454 pub hidden_count:u64,
455
456 pub total_size:u64,
457}