1use std::{iter::Peekable, ops::Range};
2
3use lapce_xi_rope::{DeltaBuilder, Rope, RopeDelta};
4use memchr::{memchr, memchr2};
5use std::sync::LazyLock;
6
7static CR_LF: LazyLock<Rope> = LazyLock::new(|| Rope::from("\r\n"));
9static LF: LazyLock<Rope> = LazyLock::new(|| Rope::from("\n"));
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum LineEnding {
13 CrLf,
15 Lf,
17}
18impl LineEnding {
19 pub fn normalize(self, text: &Rope) -> Rope {
22 self.normalize_delta(text)
23 .map(|d| d.apply(text))
24 .unwrap_or_else(|| text.clone())
25 }
26
27 pub fn normalize_delta(self, text: &Rope) -> Option<RopeDelta> {
28 let mut builder = DeltaBuilder::new(text.len());
29
30 let le = if self == LineEnding::Lf {
31 LF.clone()
32 } else {
33 CR_LF.clone()
34 };
35
36 let mut had_entries = false;
37 for (range, kind) in FullLeChunkSearch::new(text.iter_chunks(..)) {
38 had_entries = true;
39 match kind {
40 LeChunkKind::CrLf => {
41 if self == LineEnding::Lf {
42 builder.replace(range, LF.clone());
43 }
44 }
45 LeChunkKind::Lf => {
46 if self == LineEnding::CrLf {
47 builder.replace(range, CR_LF.clone());
48 }
49 }
50 LeChunkKind::Cr => {
51 builder.replace(range, le.clone());
52 }
53 }
54 }
55
56 if had_entries {
57 let delta = builder.build();
58 Some(delta)
59 } else {
60 None
61 }
62 }
63
64 pub fn normalize_limited(self, text: &Rope) -> Rope {
66 let mut builder = DeltaBuilder::new(text.len());
67
68 let le = if self == LineEnding::Lf {
69 LF.clone()
70 } else {
71 CR_LF.clone()
72 };
73
74 let mut had_entries = false;
75 for offset in LoneCrChunkSearch::new(text.iter_chunks(..)) {
76 had_entries = true;
77 builder.replace(offset..offset + 1, le.clone());
78 }
79
80 if had_entries {
81 let delta = builder.build();
82 delta.apply(text)
83 } else {
84 text.clone()
85 }
86 }
87
88 pub fn get_chars(&self) -> &'static str {
89 match self {
90 LineEnding::CrLf => "\r\n",
91 LineEnding::Lf => "\n",
92 }
93 }
94
95 pub fn as_str(&self) -> &'static str {
97 match self {
98 LineEnding::CrLf => "CRLF",
99 LineEnding::Lf => "LF",
100 }
101 }
102}
103
104#[derive(Debug, Clone, Copy)]
105pub enum LineEndingDetermination {
106 CrLf,
107 Lf,
108 Mixed,
109 Unknown,
110}
111impl LineEndingDetermination {
112 pub fn determine(text: &Rope) -> Self {
115 let mut crlf = false;
116 let mut lf = false;
117
118 for chunk in text.iter_chunks(..) {
119 match LineEndingDetermination::determine_str(chunk) {
120 LineEndingDetermination::CrLf => crlf = true,
121 LineEndingDetermination::Lf => lf = true,
122 LineEndingDetermination::Mixed => {
123 return LineEndingDetermination::Mixed;
124 }
125 LineEndingDetermination::Unknown => {}
126 }
127 }
128
129 match (crlf, lf) {
130 (true, true) => LineEndingDetermination::Mixed,
131 (true, false) => LineEndingDetermination::CrLf,
132 (false, true) => LineEndingDetermination::Lf,
133 (false, false) => LineEndingDetermination::Unknown,
134 }
135 }
136
137 fn determine_str(chunk: &str) -> LineEndingDetermination {
138 let bytes = chunk.as_bytes();
139 let newline = memchr2(b'\n', b'\r', bytes);
140 match newline {
141 Some(x) if bytes[x] == b'\r' && bytes.len() > x + 1 && bytes[x + 1] == b'\n' => {
142 LineEndingDetermination::CrLf
143 }
144 Some(x) if bytes[x] == b'\n' => LineEndingDetermination::Lf,
145 Some(_) => LineEndingDetermination::Mixed,
146 None => LineEndingDetermination::Unknown,
147 }
148 }
149
150 pub fn unwrap_or(self, le: LineEnding) -> LineEnding {
151 match self {
152 LineEndingDetermination::CrLf => LineEnding::CrLf,
153 LineEndingDetermination::Lf => LineEnding::Lf,
154 LineEndingDetermination::Mixed | LineEndingDetermination::Unknown => le,
155 }
156 }
157}
158
159#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
160enum LeChunkKind {
161 CrLf,
162 Lf,
163 Cr,
164}
165
166struct FullLeChunkSearch<'a, I: Iterator<Item = &'a str>> {
168 offset: usize,
169 chunk_pos: usize,
171 chunks: Peekable<I>,
172}
173impl<'a, I: Iterator<Item = &'a str>> FullLeChunkSearch<'a, I> {
174 fn new(chunks: I) -> Self {
175 Self {
176 offset: 0,
177 chunk_pos: 0,
178 chunks: chunks.peekable(),
179 }
180 }
181
182 fn get_chunk(&mut self) -> Option<&'a str> {
184 let chunk = self.chunks.peek()?;
185 if self.chunk_pos >= chunk.len() {
186 self.advance_chunk();
187 Some(*self.chunks.peek()?)
188 } else {
189 Some(chunk)
190 }
191 }
192
193 fn advance_chunk(&mut self) -> Option<()> {
194 let chunk = self.chunks.next()?;
195 self.offset += chunk.len();
196 self.chunk_pos = 0;
197
198 Some(())
199 }
200}
201impl<'a, I: Iterator<Item = &'a str>> Iterator for FullLeChunkSearch<'a, I> {
202 type Item = (Range<usize>, LeChunkKind);
203
204 fn next(&mut self) -> Option<Self::Item> {
205 let chunk = self.get_chunk()?;
206
207 let bytes = &chunk.as_bytes()[self.chunk_pos..];
208
209 let newline = memchr2(b'\n', b'\r', bytes);
210 match newline {
211 Some(x) if bytes[x] == b'\r' && bytes.len() > x + 1 && bytes[x + 1] == b'\n' => {
213 let start = self.offset + self.chunk_pos + x;
214 let end = start + 2;
215
216 self.chunk_pos += x + 2;
217 Some((start..end, LeChunkKind::CrLf))
218 }
219 Some(x) if bytes[x] == b'\n' => {
221 let start = self.offset + self.chunk_pos + x;
222 let end = start + 1;
223
224 self.chunk_pos += x + 1;
225 Some((start..end, LeChunkKind::Lf))
226 }
227 Some(x) => {
228 assert_eq!(bytes[x], b'\r');
232
233 let start = self.offset + self.chunk_pos + x;
234 self.chunk_pos += x + 1;
235
236 let v = if self.chunk_pos == chunk.len() {
237 if let Some(next_chunk) = self.get_chunk() {
238 let next_chunk = &next_chunk.as_bytes()[self.chunk_pos..];
239 if next_chunk.starts_with(b"\n") {
240 self.chunk_pos += 1;
241 Some((start..start + 2, LeChunkKind::CrLf))
242 } else {
243 None
244 }
245 } else {
246 None
247 }
248 } else {
249 None
250 };
251
252 Some(v.unwrap_or_else(|| {
253 let end = start + 1;
256 (start..end, LeChunkKind::Cr)
257 }))
258 }
259 None => {
260 self.advance_chunk();
261 self.next()
262 }
263 }
264 }
265}
266
267struct LoneCrChunkSearch<'a, I: Iterator<Item = &'a str>> {
269 offset: usize,
271 chunk_pos: usize,
272 chunks: Peekable<I>,
273}
274
275impl<'a, I: Iterator<Item = &'a str>> LoneCrChunkSearch<'a, I> {
276 fn new(chunks: I) -> Self {
277 Self {
278 offset: 0,
279 chunk_pos: 0,
280 chunks: chunks.peekable(),
281 }
282 }
283
284 fn get_chunk(&mut self) -> Option<&'a str> {
287 let chunk = self.chunks.peek()?;
288 if self.chunk_pos >= chunk.len() {
289 self.advance_chunk();
290 Some(*self.chunks.peek()?)
291 } else {
292 Some(chunk)
293 }
294 }
295
296 fn advance_chunk(&mut self) -> Option<()> {
297 let chunk = self.chunks.next()?;
298 self.offset += chunk.len();
299 self.chunk_pos = 0;
300
301 Some(())
302 }
303}
304
305impl<'a, I: Iterator<Item = &'a str>> Iterator for LoneCrChunkSearch<'a, I> {
306 type Item = usize;
307
308 fn next(&mut self) -> Option<Self::Item> {
309 loop {
310 let chunk = self.get_chunk()?;
311
312 let bytes = &chunk.as_bytes()[self.chunk_pos..];
313
314 let newline = memchr(b'\r', bytes);
315 match newline {
316 Some(x) => {
317 let offset = self.offset + self.chunk_pos + x;
318
319 self.chunk_pos += x + 1;
321 if self.chunk_pos < chunk.len() && chunk.as_bytes()[self.chunk_pos] == b'\n' {
322 self.chunk_pos += 1;
324 } else if let Some(chunk_b) = self.get_chunk() {
325 let chunk_b = &chunk_b.as_bytes()[self.chunk_pos..];
326 if chunk_b.starts_with(b"\n") {
327 self.chunk_pos += 1;
329 } else {
330 return Some(offset);
332 }
333 } else {
334 return Some(offset);
336 }
337 }
338 None => {
339 self.advance_chunk();
340 }
341 }
342 }
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn normalize() {
352 let text = Rope::from("hello\r\nworld toast and jam\nthe end\nhi");
353 let normalized = LineEnding::CrLf.normalize(&text);
354 assert_eq!(
355 normalized.slice_to_cow(..),
356 "hello\r\nworld toast and jam\r\nthe end\r\nhi"
357 );
358
359 let text = Rope::from("\n");
360 let normalized = LineEnding::Lf.normalize(&text);
361 assert_eq!(normalized.slice_to_cow(..), "\n");
362 let normalized = LineEnding::CrLf.normalize(&text);
363 assert_eq!(normalized.slice_to_cow(..), "\r\n");
364
365 let text = Rope::from("\r\n");
366 let normalized = LineEnding::Lf.normalize(&text);
367 assert_eq!(normalized.slice_to_cow(..), "\n");
368 let normalized = LineEnding::CrLf.normalize(&text);
369 assert_eq!(normalized.slice_to_cow(..), "\r\n");
370
371 let text = Rope::from("\r");
373 let normalized = LineEnding::Lf.normalize(&text);
374 assert_eq!(normalized.slice_to_cow(..), "\n");
375 let normalized = LineEnding::CrLf.normalize(&text);
376 assert_eq!(normalized.slice_to_cow(..), "\r\n");
377 let normalized = LineEnding::Lf.normalize_limited(&text);
378 assert_eq!(normalized.slice_to_cow(..), "\n");
379
380 let text = Rope::from("\rtest");
381 let normalized = LineEnding::Lf.normalize(&text);
382 assert_eq!(normalized.slice_to_cow(..), "\ntest");
383 let normalized = LineEnding::CrLf.normalize(&text);
384 assert_eq!(normalized.slice_to_cow(..), "\r\ntest");
385 let normalized = LineEnding::Lf.normalize_limited(&text);
386 assert_eq!(normalized.slice_to_cow(..), "\ntest");
387 }
388
389 #[test]
390 fn chunk_search() {
391 let text = Rope::from("hello\r\nworld toast and jam\nthe end\nhi");
392 let c = FullLeChunkSearch::new(text.iter_chunks(..));
393 assert_eq!(
394 c.collect::<Vec<_>>(),
395 vec![
396 (5..7, LeChunkKind::CrLf),
397 (26..27, LeChunkKind::Lf),
398 (34..35, LeChunkKind::Lf),
399 ]
400 );
401 let c = LoneCrChunkSearch::new(text.iter_chunks(..));
402 assert_eq!(c.collect::<Vec<_>>(), Vec::new());
403
404 let text = ["a\n", "\n5", "\r\ne\r", "\ntest\r", "\rv"];
407 let multi_chunk = FullLeChunkSearch::new(text.into_iter());
408 assert_eq!(
409 multi_chunk.collect::<Vec<_>>(),
410 vec![
411 (1..2, LeChunkKind::Lf),
412 (2..3, LeChunkKind::Lf),
413 (4..6, LeChunkKind::CrLf),
414 (7..9, LeChunkKind::CrLf),
415 (13..14, LeChunkKind::Cr),
416 (14..15, LeChunkKind::Cr),
417 ]
418 );
419
420 let multi_chunk = LoneCrChunkSearch::new(text.into_iter());
421 assert_eq!(multi_chunk.collect::<Vec<_>>(), vec![13, 14]);
422
423 let text = ["\n\rb"];
424 let chunks = FullLeChunkSearch::new(text.into_iter());
425 assert_eq!(
426 chunks.collect::<Vec<_>>(),
427 vec![(0..1, LeChunkKind::Lf), (1..2, LeChunkKind::Cr)]
428 );
429
430 let text = ["\n\rb"];
431 let chunks = LoneCrChunkSearch::new(text.into_iter());
432 assert_eq!(chunks.collect::<Vec<_>>(), vec![1]);
433 }
434}