AirLibrary/Indexing/Scan/
ScanFile.rs1use std::{
73 path::PathBuf,
74 time::{Duration, Instant},
75};
76
77use tokio::sync::RwLock;
78
79use crate::{
80 AirError,
81 Configuration::IndexingConfig,
82 Indexing::{
83 Process::{
84 ExtractSymbols::ExtractSymbols,
85 ProcessContent::{DetectEncoding, DetectLanguage, DetectMimeType},
86 },
87 State::CreateState::{FileMetadata, SymbolInfo, SymbolLocation},
88 },
89 Result,
90};
91
92pub async fn IndexFileInternal(
104 file_path:&PathBuf,
105 config:&IndexingConfig,
106 _index_ref:&RwLock<crate::Indexing::State::CreateState::FileIndex>,
107 _patterns:&[String],
108) -> Result<(FileMetadata, Vec<SymbolInfo>)> {
109 let start_time = Instant::now();
110
111 let metadata = std::fs::metadata(file_path)
113 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
114
115 let modified = metadata
117 .modified()
118 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
119
120 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
121
122 let file_size = metadata.len();
124 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
125 return Err(AirError::FileSystem(format!(
126 "File size {} exceeds limit {} MB",
127 file_size, config.MaxFileSizeMb
128 )));
129 }
130
131 let content = tokio::time::timeout(Duration::from_secs(30), tokio::fs::read(file_path))
133 .await
134 .map_err(|_| AirError::FileSystem(format!("Timeout reading file: {} (30s limit)", file_path.display())))?
135 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
136
137 let is_symlink = std::fs::symlink_metadata(file_path)
139 .map(|m| m.file_type().is_symlink())
140 .unwrap_or(false);
141
142 let checksum = CalculateChecksum(&content);
144
145 let encoding = DetectEncoding(&content);
147
148 let mime_type = DetectMimeType(file_path, &content);
150
151 let language = DetectLanguage(file_path);
153
154 let line_count = if mime_type.starts_with("text/") {
156 Some(content.iter().filter(|&&b| b == b'\n').count() as u32 + 1)
157 } else {
158 None
159 };
160
161 let symbols = if let Some(lang) = &language {
163 ExtractSymbols(file_path, &content, lang).await?
164 } else {
165 Vec::new()
166 };
167
168 let permissions = GetPermissionsString(&metadata);
169
170 let elapsed = start_time.elapsed();
171
172 log::trace!(
173 "[ScanFile] Indexed {} in {}ms ({} symbols)",
174 file_path.display(),
175 elapsed.as_millis(),
176 symbols.len()
177 );
178
179 Ok((
180 FileMetadata {
181 path:file_path.clone(),
182 size:file_size,
183 modified:modified_time,
184 mime_type,
185 language,
186 line_count,
187 checksum,
188 is_symlink,
189 permissions,
190 encoding,
191 indexed_at:chrono::Utc::now(),
192 symbol_count:symbols.len() as u32,
193 },
194 symbols,
195 ))
196}
197
198pub async fn ValidateFileAccess(file_path:&PathBuf) -> bool {
200 tokio::task::spawn_blocking({
201 let file_path = file_path.to_path_buf();
202 move || {
203 let can_access = std::fs::metadata(&file_path).is_ok();
205 if can_access {
206 std::fs::File::open(&file_path).is_ok()
208 } else {
209 false
210 }
211 }
212 })
213 .await
214 .unwrap_or(false)
215}
216
217pub fn CalculateChecksum(content:&[u8]) -> String {
219 use sha2::{Digest, Sha256};
220 let mut hasher = Sha256::new();
221 hasher.update(content);
222 format!("{:x}", hasher.finalize())
223}
224
225#[cfg(unix)]
227pub fn GetPermissionsString(metadata:&std::fs::Metadata) -> String {
228 use std::os::unix::fs::PermissionsExt;
229 let mode = metadata.permissions().mode();
230 let mut perms = String::new();
231 perms.push(if mode & 0o400 != 0 { 'r' } else { '-' });
233 perms.push(if mode & 0o200 != 0 { 'w' } else { '-' });
235 perms.push(if mode & 0o100 != 0 { 'x' } else { '-' });
237 perms.push(if mode & 0o040 != 0 { 'r' } else { '-' });
239 perms.push(if mode & 0o020 != 0 { 'w' } else { '-' });
240 perms.push(if mode & 0o010 != 0 { 'x' } else { '-' });
241 perms.push(if mode & 0o004 != 0 { 'r' } else { '-' });
243 perms.push(if mode & 0o002 != 0 { 'w' } else { '-' });
244 perms.push(if mode & 0o001 != 0 { 'x' } else { '-' });
245 perms
246}
247
248#[cfg(not(unix))]
250pub fn GetPermissionsString(_metadata:&std::fs::Metadata) -> String { "--------".to_string() }
251
252pub async fn ScanFileMetadata(file_path:&PathBuf) -> Result<FileMetadata> {
254 let metadata = std::fs::metadata(file_path)
255 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
256
257 let modified = metadata
258 .modified()
259 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
260
261 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
262
263 Ok(FileMetadata {
264 path:file_path.clone(),
265 size:metadata.len(),
266 modified:modified_time,
267 mime_type:"application/octet-stream".to_string(),
268 language:None,
269 line_count:None,
270 checksum:String::new(),
271 is_symlink:metadata.file_type().is_symlink(),
272 permissions:GetPermissionsString(&metadata),
273 encoding:None,
274 indexed_at:chrono::Utc::now(),
275 symbol_count:0,
276 })
277}
278
279pub fn FileModifiedSince(file_path:&PathBuf, last_indexed:chrono::DateTime<chrono::Utc>) -> Result<bool> {
281 let metadata = std::fs::metadata(file_path)
282 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
283
284 let modified = metadata
285 .modified()
286 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
287
288 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
289
290 Ok(modified_time > last_indexed)
291}
292
293pub async fn GetFileSize(file_path:&PathBuf) -> Result<u64> {
295 tokio::task::spawn_blocking({
296 let file_path = file_path.to_path_buf();
297 move || {
298 let metadata = std::fs::metadata(&file_path)
299 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
300 Ok(metadata.len())
301 }
302 })
303 .await?
304}
305
306pub fn IsTextFile(metadata:&FileMetadata) -> bool {
308 metadata.mime_type.starts_with("text/")
309 || metadata.mime_type.contains("json")
310 || metadata.mime_type.contains("xml")
311 || metadata.mime_type.contains("yaml")
312 || metadata.mime_type.contains("toml")
313 || metadata.language.is_some()
314}
315
316pub fn IsBinaryFile(metadata:&FileMetadata) -> bool {
318 !IsTextFile(metadata)
319 || metadata.mime_type == "application/octet-stream"
320 || metadata.mime_type == "application/zip"
321 || metadata.mime_type == "application/x-tar"
322 || metadata.mime_type == "application/x-gzip"
323 || metadata.mime_type == "application/x-bzip2"
324}