AirLibrary/Indexing/Scan/
ScanFile.rs1use std::{
72 path::PathBuf,
73 time::{Duration, Instant},
74};
75
76use crate::dev_log;
78use crate::{
79 AirError,
80 Configuration::IndexingConfig,
81 Indexing::{
82 Process::{
83 ExtractSymbols::ExtractSymbols,
84 ProcessContent::{DetectEncoding, DetectLanguage, DetectMimeType},
85 },
86 State::CreateState::{FileMetadata, SymbolInfo},
87 },
88 Result,
89};
90
91pub async fn IndexFileInternal(
103 file_path:&PathBuf,
104
105 config:&IndexingConfig,
106
107 _patterns:&[String],
108) -> Result<(FileMetadata, Vec<SymbolInfo>)> {
109 let start_time = Instant::now();
110
111 let metadata = std::fs::metadata(file_path)
113 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
114
115 let modified = metadata
117 .modified()
118 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
119
120 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
121
122 let file_size = metadata.len();
124
125 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
126 return Err(AirError::FileSystem(format!(
127 "File size {} exceeds limit {} MB",
128 file_size, config.MaxFileSizeMb
129 )));
130 }
131
132 let content = tokio::time::timeout(Duration::from_secs(30), tokio::fs::read(file_path))
134 .await
135 .map_err(|_| AirError::FileSystem(format!("Timeout reading file: {} (30s limit)", file_path.display())))?
136 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
137
138 let is_symlink = std::fs::symlink_metadata(file_path)
140 .map(|m| m.file_type().is_symlink())
141 .unwrap_or(false);
142
143 let checksum = CalculateChecksum(&content);
145
146 let encoding = DetectEncoding(&content);
148
149 let mime_type = DetectMimeType(file_path, &content);
151
152 let language = DetectLanguage(file_path);
154
155 let line_count = if mime_type.starts_with("text/") {
157 Some(content.iter().filter(|&&b| b == b'\n').count() as u32 + 1)
158 } else {
159 None
160 };
161
162 let symbols = if let Some(lang) = &language {
164 ExtractSymbols(file_path, &content, lang).await?
165 } else {
166 Vec::new()
167 };
168
169 let permissions = GetPermissionsString(&metadata);
170
171 let elapsed = start_time.elapsed();
172
173 dev_log!(
174 "indexing",
175 "indexed {} in {}ms ({} symbols)",
176 file_path.display(),
177 elapsed.as_millis(),
178 symbols.len()
179 );
180
181 Ok((
182 FileMetadata {
183 path:file_path.clone(),
184 size:file_size,
185 modified:modified_time,
186 mime_type,
187 language,
188 line_count,
189 checksum,
190 is_symlink,
191 permissions,
192 encoding,
193 indexed_at:chrono::Utc::now(),
194 symbol_count:symbols.len() as u32,
195 },
196 symbols,
197 ))
198}
199
200pub async fn ValidateFileAccess(file_path:&PathBuf) -> bool {
202 tokio::task::spawn_blocking({
203 let file_path = file_path.to_path_buf();
204
205 move || {
206 let can_access = std::fs::metadata(&file_path).is_ok();
208
209 if can_access {
210 std::fs::File::open(&file_path).is_ok()
212 } else {
213 false
214 }
215 }
216 })
217 .await
218 .unwrap_or(false)
219}
220
221pub fn CalculateChecksum(content:&[u8]) -> String {
223 use sha2::{Digest, Sha256};
228
229 let mut hasher = Sha256::new();
230
231 hasher.update(content);
232
233 hex::encode(hasher.finalize())
234}
235
236#[cfg(unix)]
238pub fn GetPermissionsString(metadata:&std::fs::Metadata) -> String {
239 use std::os::unix::fs::PermissionsExt;
240
241 let mode = metadata.permissions().mode();
242
243 let mut perms = String::new();
244
245 perms.push(if mode & 0o400 != 0 { 'r' } else { '-' });
247
248 perms.push(if mode & 0o200 != 0 { 'w' } else { '-' });
250
251 perms.push(if mode & 0o100 != 0 { 'x' } else { '-' });
253
254 perms.push(if mode & 0o040 != 0 { 'r' } else { '-' });
256
257 perms.push(if mode & 0o020 != 0 { 'w' } else { '-' });
258
259 perms.push(if mode & 0o010 != 0 { 'x' } else { '-' });
260
261 perms.push(if mode & 0o004 != 0 { 'r' } else { '-' });
263
264 perms.push(if mode & 0o002 != 0 { 'w' } else { '-' });
265
266 perms.push(if mode & 0o001 != 0 { 'x' } else { '-' });
267
268 perms
269}
270
271#[cfg(not(unix))]
273pub fn GetPermissionsString(_metadata:&std::fs::Metadata) -> String { "--------".to_string() }
274
275pub async fn ScanFileMetadata(file_path:&PathBuf) -> Result<FileMetadata> {
277 let metadata = std::fs::metadata(file_path)
278 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
279
280 let modified = metadata
281 .modified()
282 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
283
284 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
285
286 Ok(FileMetadata {
287 path:file_path.clone(),
288 size:metadata.len(),
289 modified:modified_time,
290 mime_type:"application/octet-stream".to_string(),
291 language:None,
292 line_count:None,
293 checksum:String::new(),
294 is_symlink:metadata.file_type().is_symlink(),
295 permissions:GetPermissionsString(&metadata),
296 encoding:None,
297 indexed_at:chrono::Utc::now(),
298 symbol_count:0,
299 })
300}
301
302pub fn FileModifiedSince(file_path:&PathBuf, last_indexed:chrono::DateTime<chrono::Utc>) -> Result<bool> {
304 let metadata = std::fs::metadata(file_path)
305 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
306
307 let modified = metadata
308 .modified()
309 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
310
311 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
312
313 Ok(modified_time > last_indexed)
314}
315
316pub async fn GetFileSize(file_path:&PathBuf) -> Result<u64> {
318 tokio::task::spawn_blocking({
319 let file_path = file_path.to_path_buf();
320
321 move || {
322 let metadata = std::fs::metadata(&file_path)
323 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
324
325 Ok(metadata.len())
326 }
327 })
328 .await?
329}
330
331pub fn IsTextFile(metadata:&FileMetadata) -> bool {
333 metadata.mime_type.starts_with("text/")
334 || metadata.mime_type.contains("json")
335 || metadata.mime_type.contains("xml")
336 || metadata.mime_type.contains("yaml")
337 || metadata.mime_type.contains("toml")
338 || metadata.language.is_some()
339}
340
341pub fn IsBinaryFile(metadata:&FileMetadata) -> bool {
343 !IsTextFile(metadata)
344 || metadata.mime_type == "application/octet-stream"
345 || metadata.mime_type == "application/zip"
346 || metadata.mime_type == "application/x-tar"
347 || metadata.mime_type == "application/x-gzip"
348 || metadata.mime_type == "application/x-bzip2"
349}