Skip to main content

Mountain/ApplicationState/DTO/
DocumentStateDTO.rs

1//! # DocumentStateDTO
2//!
3//! # RESPONSIBILITY
4//! - Data transfer object for text document state
5//! - Serializable format for gRPC/IPC transmission
6//! - Used by Mountain to track document lifecycle and sync with Air
7//!
8//! # FIELDS
9//! - URI: Unique document resource identifier
10//! - LanguageIdentifier: Language ID for syntax highlighting
11//! - Version: Client-side version for change tracking
12//! - Lines: Document content split into lines
13//! - EOL: End-of-line sequence (\n or \r\n)
14//! - IsDirty: Indicates unsaved changes
15//! - Encoding: File encoding (e.g., utf8)
16//! - VersionIdentifier: Internal version for host tracking
17//!
18//! TODO (Mountain→Air Split): If Air implements a background document sync
19//! service, consider delegating delta change validation or conflict resolution
20//! to Air. For now, Mountain handles this synchronously to ensure UI
21//! responsiveness.
22
23use CommonLibrary::{Error::CommonError::CommonError, Utility::Serialization::URLSerializationHelper};
24use serde::{Deserialize, Serialize};
25use serde_json::Value;
26use url::Url;
27
28use crate::{
29	ApplicationState::Internal::TextProcessing::AnalyzeTextLinesAndEOL::Fn as AnalyzeTextLinesAndEOL,
30	dev_log,
31};
32use super::{RPCModelContentChangeDTO::RPCModelContentChangeDTO, RPCRangeDTO::RPCRangeDTO};
33
34/// Maximum line count for a document to prevent memory exhaustion
35const MAX_DOCUMENT_LINES:usize = 1_000_000;
36
37/// Maximum line length to prevent line-based denial of service
38const MAX_LINE_LENGTH:usize = 100_000;
39
40/// Maximum language identifier string length
41const MAX_LANGUAGE_ID_LENGTH:usize = 128;
42
43/// Represents the complete in-memory state of a single text document.
44#[derive(Serialize, Deserialize, Clone, Debug)]
45#[serde(rename_all = "camelCase")]
46pub struct DocumentStateDTO {
47	/// The unique resource identifier for this document.
48	#[serde(rename = "uri", with = "URLSerializationHelper")]
49	pub URI:Url,
50
51	/// The VS Code language identifier (e.g., "rust", "typescript").
52	#[serde(skip_serializing_if = "String::is_empty")]
53	pub LanguageIdentifier:String,
54
55	/// The version number, incremented on each change from the client.
56	pub Version:i64,
57
58	/// The content of the document, split into lines.
59	pub Lines:Vec<String>,
60
61	/// The detected end-of-line sequence (e.g., `\n` or `\r\n`).
62	#[serde(rename = "eol")]
63	pub EOL:String,
64
65	/// A flag indicating if the in-memory version has unsaved changes.
66	pub IsDirty:bool,
67
68	/// The detected file encoding (e.g., "utf8").
69	pub Encoding:String,
70
71	/// An internal version number, used for tracking changes within the host.
72	pub VersionIdentifier:i64,
73}
74
75impl DocumentStateDTO {
76	/// Creates a new `DocumentStateDTO` from its initial content with
77	/// validation.
78	///
79	/// # Arguments
80	/// * `URI` - The document resource URI
81	/// * `LanguageIdentifier` - Optional language ID for syntax highlighting
82	/// * `Content` - The initial document content
83	///
84	/// # Returns
85	/// Result containing the DTO or an error if validation fails
86	///
87	/// # Errors
88	/// Returns `CommonError` if:
89	/// - Language identifier exceeds maximum length
90	/// - Document exceeds maximum line count
91	/// - Any line exceeds maximum length
92	/// - URI is empty
93	pub fn Create(URI:Url, LanguageIdentifier:Option<String>, Content:String) -> Result<Self, CommonError> {
94		// Validate URI is not empty
95		if URI.as_str().is_empty() {
96			return Err(CommonError::InvalidArgument {
97				ArgumentName:"URI".into(),
98				Reason:"URI cannot be empty".into(),
99			});
100		}
101
102		let LanguageID = LanguageIdentifier.unwrap_or_else(|| "plaintext".to_string());
103
104		// Validate language identifier length
105		if LanguageID.len() > MAX_LANGUAGE_ID_LENGTH {
106			return Err(CommonError::InvalidArgument {
107				ArgumentName:"LanguageIdentifier".into(),
108				Reason:format!("Language identifier exceeds maximum length of {} bytes", MAX_LANGUAGE_ID_LENGTH),
109			});
110		}
111
112		let (Lines, EOL) = AnalyzeTextLinesAndEOL(&Content);
113
114		// Validate document line count
115		if Lines.len() > MAX_DOCUMENT_LINES {
116			return Err(CommonError::InvalidArgument {
117				ArgumentName:"Content".into(),
118				Reason:format!("Document exceeds maximum line count of {}", MAX_DOCUMENT_LINES),
119			});
120		}
121
122		// Validate individual line lengths
123		for (Index, Line) in Lines.iter().enumerate() {
124			if Line.len() > MAX_LINE_LENGTH {
125				return Err(CommonError::InvalidArgument {
126					ArgumentName:"Content".into(),
127					Reason:format!("Line {} exceeds maximum length of {} bytes", Index + 1, MAX_LINE_LENGTH),
128				});
129			}
130		}
131
132		let Encoding = "utf8".to_string();
133
134		Ok(Self {
135			URI,
136
137			LanguageIdentifier:LanguageID,
138
139			Version:1,
140
141			Lines,
142
143			EOL,
144
145			IsDirty:false,
146
147			Encoding,
148
149			VersionIdentifier:1,
150		})
151	}
152
153	/// Creates a new `DocumentStateDTO` without validation for internal use.
154	/// This should only be called with trusted data sources.
155	pub fn CreateUnsafe(
156		URI:Url,
157
158		LanguageIdentifier:String,
159
160		Lines:Vec<String>,
161
162		EOL:String,
163
164		IsDirty:bool,
165
166		Encoding:String,
167
168		Version:i64,
169
170		VersionIdentifier:i64,
171	) -> Self {
172		Self {
173			URI,
174
175			LanguageIdentifier,
176
177			Version,
178
179			Lines,
180
181			EOL,
182
183			IsDirty,
184
185			Encoding,
186
187			VersionIdentifier,
188		}
189	}
190
191	/// Reconstructs the full text content of the document from its lines.
192	pub fn GetText(&self) -> String { self.Lines.join(&self.EOL) }
193
194	/// Converts the struct to a `serde_json::Value`, useful for notifications.
195	pub fn ToDTO(&self) -> Result<Value, CommonError> {
196		serde_json::to_value(self).map_err(|Error| CommonError::SerializationError { Description:Error.to_string() })
197	}
198
199	/// Applies a set of changes to the document. This can be a full text
200	/// replacement or a collection of delta changes.
201	pub fn ApplyChanges(&mut self, NewVersion:i64, ChangesValue:&Value) -> Result<(), CommonError> {
202		// Ignore stale changes.
203		if NewVersion <= self.Version {
204			return Ok(());
205		}
206
207		// Attempt to deserialize as an array of delta changes first.
208		if let Ok(RPCChange) = serde_json::from_value::<Vec<RPCModelContentChangeDTO>>(ChangesValue.clone()) {
209			dev_log!("model", "applying {} delta change(s) to document {}", RPCChange.len(), self.URI);
210
211			self.Lines = ApplyDeltaChanges(&self.Lines, &self.EOL, &RPCChange);
212		} else if let Some(FullText) = ChangesValue.as_str() {
213			// If it's not deltas, check if it's a full text replacement.
214			let (NewLines, NewEOL) = AnalyzeTextLinesAndEOL(FullText);
215
216			self.Lines = NewLines;
217
218			self.EOL = NewEOL;
219		} else {
220			return Err(CommonError::InvalidArgument {
221				ArgumentName:"ChangesValue".into(),
222
223				Reason:format!(
224					"Invalid change format for {}: expected string or RPCModelContentChangeDTO array.",
225					self.URI
226				),
227			});
228		}
229
230		// Update metadata after changes have been applied.
231		self.Version = NewVersion;
232
233		self.VersionIdentifier += 1;
234
235		self.IsDirty = true;
236
237		Ok(())
238	}
239}
240
241/// Applies delta changes to the document text and returns the updated lines.
242///
243/// This function:
244/// 1. Sorts changes in reverse order (by start position) to prevent offset
245///    corruption
246/// 2. Converts line/column positions to byte offsets in the full text
247/// 3. Applies each change (delete range + insert new text)
248/// 4. Splits the result back into lines
249///
250/// # Arguments
251/// * `Lines` - The current document lines
252/// * `EOL` - The end-of-line sequence to use
253/// * `RPCChange` - Array of changes to apply
254///
255/// # Returns
256/// Updated lines vector after applying all changes
257fn ApplyDeltaChanges(Lines:&[String], EOL:&str, RPCChange:&[RPCModelContentChangeDTO]) -> Vec<String> {
258	// Join lines into full text for offset-based manipulation
259	let mut ResultText = Lines.join(EOL);
260
261	// If no changes, return original lines
262	if RPCChange.is_empty() {
263		return Lines.to_vec();
264	}
265
266	// Sort changes in reverse order of position to prevent offset corruption
267	// When applying multiple changes, earlier changes shift positions for later
268	// changes. By applying from end to beginning, all offsets remain valid.
269	let mut SortedChanges:Vec<&RPCModelContentChangeDTO> = RPCChange.iter().collect();
270
271	SortedChanges.sort_by(|a, b| CMP_Range_Position(&b.Range, &a.Range));
272
273	// Apply each change to the text
274	for Change in SortedChanges {
275		// Convert (line, column) to byte offset
276		let StartOffset = PositionToOffset(&ResultText, EOL, &Change.Range.StartLineNumber, &Change.Range.StartColumn);
277
278		let EndOffset = PositionToOffset(&ResultText, EOL, &Change.Range.EndLineNumber, &Change.Range.EndColumn);
279
280		// Validate offsets
281		if StartOffset > EndOffset {
282			dev_log!(
283				"model",
284				"error: invalid range: start ({}) > end ({}) for text length {}",
285				StartOffset,
286				EndOffset,
287				ResultText.len()
288			);
289
290			continue;
291		}
292
293		let TextLength = ResultText.len();
294
295		if StartOffset > TextLength || EndOffset > TextLength {
296			dev_log!(
297				"model",
298				"error: out of bounds: start ({}) or end ({}) exceeds text length {}",
299				StartOffset,
300				EndOffset,
301				TextLength
302			);
303
304			continue;
305		}
306
307		// Remove old text and insert new text
308		// Safe slice operation: validated offsets above
309		let OldText = ResultText.as_bytes();
310
311		ResultText =
312			String::from_utf8_lossy(&[&OldText[..StartOffset], Change.Text.as_bytes(), &OldText[EndOffset..]].concat())
313				.into_owned();
314	}
315
316	// Re-split the result into lines
317	AnalyzeTextLinesAndEOL(&ResultText).0
318}
319
320/// Converts line/column position to byte offset in text.
321///
322/// VSCode LSP uses 0-based line numbers and 0-based column numbers.
323/// This function matches that convention.
324fn PositionToOffset(Text:&str, EOL:&str, LineNumber:&usize, Column:&usize) -> usize {
325	let Lines:Vec<&str> = Text.split(EOL).collect();
326
327	let EOLLength = EOL.len();
328
329	let mut Offset = 0;
330
331	// Add length of all preceding lines plus their EOL markers
332	for LineIndex in 0..*LineNumber {
333		if LineIndex < Lines.len() {
334			Offset += Lines[LineIndex].len() + EOLLength;
335		}
336	}
337
338	// Add column offset within the current line
339	if *LineNumber < Lines.len() {
340		// Column is in character positions, convert to byte offset
341		let CurrentLine = Lines[*LineNumber];
342
343		let CharOffset = CurrentLine
344			.char_indices()
345			.nth(*Column)
346			.map_or(CurrentLine.len(), |(offset, _)| offset);
347
348		Offset += CharOffset;
349	}
350
351	Offset
352}
353
354/// Compares two RPC ranges to determine their order in the document.
355/// Returns negative if a comes before b, zero if equal, positive if a comes
356/// after b.
357fn CMP_Range_Position(A:&RPCRangeDTO, B:&RPCRangeDTO) -> std::cmp::Ordering {
358	A.StartLineNumber
359		.cmp(&B.StartLineNumber)
360		.then_with(|| A.StartColumn.cmp(&B.StartColumn))
361}