AirLibrary/Indexing/State/
UpdateState.rs

1//! # UpdateState
2//!
3//! ## File: Indexing/State/UpdateState.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides state update operations for the File Indexer service, handling
8//! modification of index structures including adding, removing, and updating
9//! entries in the file index.
10//!
11//! ## Primary Responsibility
12//!
13//! Update file index state by adding/removing files, symbols, and content
14//! entries in a thread-safe manner.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Remove deleted files from all indexes
19//! - Update symbol index with new symbol locations
20//! - Update content index with new file paths
21//! - Maintain index version and checksum on updates
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - `tokio` - Async runtime for update operations
27//!
28//! **Internal Modules:**
29//! - `crate::Result` - Error handling type
30//! - `crate::AirError` - Error types
31//! - `super::CreateState` - State structure definitions
32//!
33//! ## Dependents
34//!
35//! - `Indexing::Scan::ScanDirectory` - Updates index after directory scan
36//! - `Indexing::Scan::ScanFile` - Updates index after file scan
37//! - `Indexing::Store::UpdateIndex` - Incremental index updates
38//! - `Indexing::Watch::WatchFile` - Updates index on file changes
39//!
40//! ## VSCode Pattern Reference
41//!
42//! Inspired by VSCode's index update operations in
43//! `src/vs/workbench/services/search/common/`
44//!
45//! ## Security Considerations
46//!
47//! - Thread-safe updates prevent race conditions
48//! - Path validation before state updates
49//! - Size limits enforced on all update operations
50//!
51//! ## Performance Considerations
52//!
53//! - Incremental updates minimize reindexing
54//! - Batch updates for multiple files
55//! - Efficient hash lookups for O(1) updates
56//!
57//! ## Error Handling Strategy
58//!
59//! Update operations silently fail on missing keys (idempotent) and
60//! propagate errors for I/O failures or invalid state transitions.
61//!
62//! ## Thread Safety
63//!
64//! All update operations are designed to work within RwLock write
65//! guards and should be called while holding appropriate locks.
66
67use std::path::PathBuf;
68
69use crate::{
70	AirError,
71	ApplicationState::ApplicationState,
72	Configuration::IndexingConfig,
73	Indexing::State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolLocation},
74	Result,
75};
76
77/// Add a file to the index with its metadata and symbols
78pub fn AddFileToIndex(
79	index:&mut FileIndex,
80	file_path:PathBuf,
81	metadata:FileMetadata,
82	symbols:Vec<SymbolInfo>,
83) -> Result<()> {
84	// Check if file already exists and update accordingly
85	let is_new = !index.files.contains_key(&file_path);
86
87	// Add or update file metadata
88	index.files.insert(file_path.clone(), metadata.clone());
89
90	// Update symbol index
91	if is_new {
92		// Clear old symbols for this file if any
93		index.file_symbols.remove(&file_path);
94	}
95
96	// Add new symbols
97	index.file_symbols.insert(file_path.clone(), symbols.clone());
98
99	// Update symbol index for cross-referencing
100	for symbol in symbols {
101		index
102			.symbol_index
103			.entry(symbol.name.clone())
104			.or_insert_with(Vec::new)
105			.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
106	}
107
108	Ok(())
109}
110
111/// Remove a file from all indexes (content, symbols, files)
112pub fn RemoveFileFromIndex(index:&mut FileIndex, file_path:&PathBuf) -> Result<()> {
113	// Remove from files index
114	index.files.remove(file_path);
115
116	// Remove from file_symbols
117	index.file_symbols.remove(file_path);
118
119	// Remove from symbol index
120	for (_, locations) in index.symbol_index.iter_mut() {
121		locations.retain(|loc| loc.file_path != *file_path);
122	}
123
124	// Remove from content index
125	for (_, files) in index.content_index.iter_mut() {
126		files.retain(|p| p != file_path);
127	}
128
129	Ok(())
130}
131
132/// Remove multiple files from the index in a batch operation
133pub fn RemoveFilesFromIndex(index:&mut FileIndex, file_paths:&[PathBuf]) -> Result<()> {
134	for file_path in file_paths {
135		RemoveFileFromIndex(index, file_path)?;
136	}
137	Ok(())
138}
139
140/// Update index metadata (version, timestamp, checksum)
141pub fn UpdateIndexMetadata(index:&mut FileIndex) -> Result<()> {
142	use crate::Indexing::State::CreateState::{CalculateIndexChecksum, GenerateIndexVersion};
143
144	index.last_updated = chrono::Utc::now();
145	index.index_version = GenerateIndexVersion();
146	index.index_checksum = CalculateIndexChecksum(index)?;
147
148	Ok(())
149}
150
151/// Update file metadata for an existing file
152pub fn UpdateFileMetadata(index:&mut FileIndex, file_path:&PathBuf, metadata:FileMetadata) -> Result<()> {
153	if !index.files.contains_key(file_path) {
154		return Err(AirError::Internal(format!(
155			"Cannot update metadata for file not in index: {}",
156			file_path.display()
157		)));
158	}
159
160	index.files.insert(file_path.clone(), metadata);
161	Ok(())
162}
163
164/// Update symbols for a file
165pub fn UpdateFileSymbols(index:&mut FileIndex, file_path:&PathBuf, symbols:Vec<SymbolInfo>) -> Result<()> {
166	if !index.files.contains_key(file_path) {
167		return Err(AirError::Internal(format!(
168			"Cannot update symbols for file not in index: {}",
169			file_path.display()
170		)));
171	}
172
173	// Remove old symbols from symbol index
174	if let Some(old_symbols) = index.file_symbols.get(file_path) {
175		for old_symbol in old_symbols {
176			if let Some(locations) = index.symbol_index.get_mut(&old_symbol.name) {
177				locations.retain(|loc| loc.file_path != *file_path);
178			}
179		}
180	}
181
182	// Add new symbols
183	index.file_symbols.insert(file_path.clone(), symbols.clone());
184
185	for symbol in symbols {
186		index
187			.symbol_index
188			.entry(symbol.name.clone())
189			.or_insert_with(Vec::new)
190			.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
191	}
192
193	Ok(())
194}
195
196/// Update content index for a file
197pub fn UpdateContentIndex(index:&mut FileIndex, file_path:&PathBuf, tokens:Vec<String>) -> Result<()> {
198	// Remove file from existing content index entries
199	for (_, files) in index.content_index.iter_mut() {
200		files.retain(|p| p != file_path);
201	}
202
203	// Add new tokens
204	for token in tokens {
205		if token.len() > 2 {
206			// Only index tokens longer than 2 characters
207			index
208				.content_index
209				.entry(token)
210				.or_insert_with(Vec::new)
211				.push(file_path.clone());
212		}
213	}
214
215	Ok(())
216}
217
218/// Clean up orphaned entries (files with no matching content/symbols)
219pub fn CleanupOrphanedEntries(index:&mut FileIndex) -> Result<u32> {
220	let mut removed_count = 0;
221
222	let files_to_keep:Vec<_> = index.files.keys().cloned().collect();
223
224	// Clean up content index entries with no files
225	let orphaned_tokens:Vec<_> = index
226		.content_index
227		.iter()
228		.filter(|(_, files)| files.is_empty())
229		.map(|(token, _)| token.clone())
230		.collect();
231
232	for token in orphaned_tokens {
233		index.content_index.remove(&token);
234		removed_count += 1;
235	}
236
237	// Clean up symbol index entries with no locations
238	let orphaned_symbols:Vec<_> = index
239		.symbol_index
240		.iter()
241		.filter(|(_, locations)| locations.is_empty())
242		.map(|(symbol, _)| symbol.clone())
243		.collect();
244
245	for symbol in orphaned_symbols {
246		index.symbol_index.remove(&symbol);
247		removed_count += 1;
248	}
249
250	Ok(removed_count)
251}
252
253/// Merge another index into this one
254pub fn MergeIndexes(target:&mut FileIndex, source:FileIndex) -> Result<u32> {
255	let mut merged_files = 0;
256
257	// Merge files
258	for (path, metadata) in source.files {
259		if !target.files.contains_key(&path) {
260			target.files.insert(path.clone(), metadata);
261			merged_files += 1;
262		}
263	}
264
265	// Merge content index
266	for (token, mut files) in source.content_index {
267		target.content_index.entry(token).or_insert_with(Vec::new).append(&mut files);
268	}
269
270	// Merge symbol index
271	for (symbol, mut locations) in source.symbol_index {
272		target
273			.symbol_index
274			.entry(symbol)
275			.or_insert_with(Vec::new)
276			.append(&mut locations);
277	}
278
279	// Merge file symbols
280	for (path, symbols) in source.file_symbols {
281		if !target.file_symbols.contains_key(&path) {
282			target.file_symbols.insert(path, symbols);
283		}
284	}
285
286	// Update metadata
287	UpdateIndexMetadata(target)?;
288
289	Ok(merged_files)
290}
291
292/// Validate that index is in a consistent state
293pub fn ValidateIndexConsistency(index:&FileIndex) -> Result<()> {
294	// Check that all files in content_index exist in files
295	for (_, files) in &index.content_index {
296		for file_path in files {
297			if !index.files.contains_key(file_path) {
298				return Err(AirError::Internal(format!(
299					"Content index references non-existent file: {}",
300					file_path.display()
301				)));
302			}
303		}
304	}
305
306	// Check that all files in symbol_index exist in files
307	for (_, locations) in &index.symbol_index {
308		for location in locations {
309			if !index.files.contains_key(&location.file_path) {
310				return Err(AirError::Internal(format!(
311					"Symbol index references non-existent file: {}",
312					location.file_path.display()
313				)));
314			}
315		}
316	}
317
318	// Check that all files in file_symbols exist in files
319	for (file_path, _) in &index.file_symbols {
320		if !index.files.contains_key(file_path) {
321			return Err(AirError::Internal(format!(
322				"File symbols references non-existent file: {}",
323				file_path.display()
324			)));
325		}
326	}
327
328	Ok(())
329}
330
331/// Get index size estimate in bytes
332pub fn GetIndexSizeEstimate(index:&FileIndex) -> usize {
333	let mut size = 0;
334
335	// File metadata
336	for (path, metadata) in &index.files {
337		size += path.as_os_str().len();
338		size += std::mem::size_of::<FileMetadata>();
339	}
340
341	// Content index
342	for (token, files) in &index.content_index {
343		size += token.len();
344		size += files.len() * std::mem::size_of::<PathBuf>();
345	}
346
347	// Symbol index
348	for (symbol, locations) in &index.symbol_index {
349		size += symbol.len();
350		size += locations.len() * std::mem::size_of::<SymbolLocation>();
351	}
352
353	// File symbols
354	for (path, symbols) in &index.file_symbols {
355		size += path.as_os_str().len();
356		size += symbols.len() * std::mem::size_of::<SymbolInfo>();
357	}
358
359	size
360}
361
362/// Check if periodic update is needed based on age
363pub fn NeedsUpdate(index:&FileIndex, max_age_minutes:u64) -> bool {
364	let age_minutes = (chrono::Utc::now() - index.last_updated).num_minutes().abs() as u64;
365	age_minutes >= max_age_minutes
366}