floem_editor_core/
word.rs

1use lapce_xi_rope::{Cursor, Rope, RopeInfo};
2
3use crate::{
4    mode::Mode,
5    util::{matching_char, matching_pair_direction},
6};
7
8/// Describe char classifications used to compose word boundaries
9#[derive(Copy, Clone, PartialEq, Eq)]
10pub enum CharClassification {
11    /// Carriage Return (`r`)
12    Cr,
13    /// Line feed (`\n`)
14    Lf,
15    /// Whitespace character
16    Space,
17    /// Any punctuation character
18    Punctuation,
19    /// Includes letters and all of non-ascii unicode
20    Other,
21}
22
23/// A word boundary can be the start of a word, its end or both for punctuation
24#[derive(PartialEq, Eq)]
25enum WordBoundary {
26    /// Denote that this is not a boundary
27    Interior,
28    /// A boundary indicating the end of a word
29    Start,
30    /// A boundary indicating the start of a word
31    End,
32    /// Both start and end boundaries (ex: punctuation characters)
33    Both,
34}
35
36impl WordBoundary {
37    fn is_start(&self) -> bool {
38        *self == WordBoundary::Start || *self == WordBoundary::Both
39    }
40
41    fn is_end(&self) -> bool {
42        *self == WordBoundary::End || *self == WordBoundary::Both
43    }
44
45    #[allow(unused)]
46    fn is_boundary(&self) -> bool {
47        *self != WordBoundary::Interior
48    }
49}
50
51/// A cursor providing utility function to navigate the rope
52/// by word boundaries.
53/// Boundaries can be the start of a word, its end, punctuation etc.
54pub struct WordCursor<'a> {
55    pub(crate) inner: Cursor<'a, RopeInfo>,
56}
57
58impl<'a> WordCursor<'a> {
59    pub fn new(text: &'a Rope, pos: usize) -> WordCursor<'a> {
60        let inner = Cursor::new(text, pos);
61        WordCursor { inner }
62    }
63
64    /// Get the previous start boundary of a word, and set the cursor position to the boundary found.
65    /// The behaviour diffs a bit on new line character with modal and non modal,
66    /// while on modal, it will ignore the new line character and on non-modal,
67    /// it will stop at the new line character
68    /// **Example:**
69    ///
70    /// ```rust
71    /// # use floem_editor_core::word::WordCursor;
72    /// # use floem_editor_core::mode::Mode;
73    /// # use lapce_xi_rope::Rope;
74    /// let rope = Rope::from("Hello world");
75    /// let mut cursor = WordCursor::new(&rope, 4);
76    /// let boundary = cursor.prev_boundary(Mode::Insert);
77    /// assert_eq!(boundary, Some(0));
78    ///```
79    pub fn prev_boundary(&mut self, mode: Mode) -> Option<usize> {
80        if let Some(ch) = self.inner.prev_codepoint() {
81            let mut prop = get_char_property(ch);
82            let mut candidate = self.inner.pos();
83            while let Some(prev) = self.inner.prev_codepoint() {
84                let prop_prev = get_char_property(prev);
85                if classify_boundary(prop_prev, prop).is_start() {
86                    break;
87                }
88
89                // Stop if line beginning reached, without any non-whitespace characters
90                if mode == Mode::Insert
91                    && prop_prev == CharClassification::Lf
92                    && prop == CharClassification::Space
93                {
94                    break;
95                }
96
97                prop = prop_prev;
98                candidate = self.inner.pos();
99            }
100            self.inner.set(candidate);
101            return Some(candidate);
102        }
103        None
104    }
105
106    /// Computes where the cursor position should be after backward deletion.
107    ///
108    /// **Example:**
109    ///
110    /// ```rust
111    /// # use floem_editor_core::word::WordCursor;
112    /// # use lapce_xi_rope::Rope;
113    /// let text = "violet are blue";
114    /// let rope = Rope::from(text);
115    /// let mut cursor = WordCursor::new(&rope, 9);
116    /// let position = cursor.prev_deletion_boundary();
117    /// let position = position;
118    ///
119    /// assert_eq!(position, Some(7));
120    /// assert_eq!(&text[..position.unwrap()], "violet ");
121    ///```
122    pub fn prev_deletion_boundary(&mut self) -> Option<usize> {
123        if let Some(ch) = self.inner.prev_codepoint() {
124            let mut prop = get_char_property(ch);
125            let mut candidate = self.inner.pos();
126
127            // Flag, determines if the word should be deleted or not
128            // If not, erase only whitespace characters.
129            let mut keep_word = false;
130            while let Some(prev) = self.inner.prev_codepoint() {
131                let prop_prev = get_char_property(prev);
132
133                // Stop if line beginning reached, without any non-whitespace characters
134                if prop_prev == CharClassification::Lf && prop == CharClassification::Space {
135                    break;
136                }
137
138                // More than a single whitespace: keep word, remove only whitespaces
139                if prop == CharClassification::Space && prop_prev == CharClassification::Space {
140                    keep_word = true;
141                }
142
143                // Line break found: keep words, delete line break & trailing whitespaces
144                if prop == CharClassification::Lf || prop == CharClassification::Cr {
145                    keep_word = true;
146                }
147
148                // Skip word deletion if above conditions were met
149                if keep_word
150                    && (prop_prev == CharClassification::Punctuation
151                        || prop_prev == CharClassification::Other)
152                {
153                    break;
154                }
155
156                // Default deletion
157                if classify_boundary(prop_prev, prop).is_start() {
158                    break;
159                }
160                prop = prop_prev;
161                candidate = self.inner.pos();
162            }
163            self.inner.set(candidate);
164            return Some(candidate);
165        }
166        None
167    }
168
169    /// Get the position of the next non blank character in the rope
170    ///
171    /// **Example:**
172    ///
173    /// ```rust
174    /// # use floem_editor_core::word::WordCursor;
175    /// # use lapce_xi_rope::Rope;
176    /// let rope = Rope::from("    world");
177    /// let mut cursor = WordCursor::new(&rope, 0);
178    /// let char_position = cursor.next_non_blank_char();
179    /// assert_eq!(char_position, 4);
180    ///```
181    pub fn next_non_blank_char(&mut self) -> usize {
182        let mut candidate = self.inner.pos();
183        while let Some(next) = self.inner.next_codepoint() {
184            let prop = get_char_property(next);
185            if prop != CharClassification::Space {
186                break;
187            }
188            candidate = self.inner.pos();
189        }
190        self.inner.set(candidate);
191        candidate
192    }
193
194    /// Get the next start boundary of a word, and set the cursor position to the boundary found.
195    /// **Example:**
196    ///
197    /// ```rust
198    /// # use floem_editor_core::word::WordCursor;
199    /// # use lapce_xi_rope::Rope;
200    /// let rope = Rope::from("Hello world");
201    /// let mut cursor = WordCursor::new(&rope, 0);
202    /// let boundary = cursor.next_boundary();
203    /// assert_eq!(boundary, Some(6));
204    ///```
205    pub fn next_boundary(&mut self) -> Option<usize> {
206        if let Some(ch) = self.inner.next_codepoint() {
207            let mut prop = get_char_property(ch);
208            let mut candidate = self.inner.pos();
209            while let Some(next) = self.inner.next_codepoint() {
210                let prop_next = get_char_property(next);
211                if classify_boundary(prop, prop_next).is_start() {
212                    break;
213                }
214                prop = prop_next;
215                candidate = self.inner.pos();
216            }
217            self.inner.set(candidate);
218            return Some(candidate);
219        }
220        None
221    }
222
223    /// Get the next end boundary, and set the cursor position to the boundary found.
224    /// **Example:**
225    ///
226    /// ```rust
227    /// # use floem_editor_core::word::WordCursor;
228    /// # use lapce_xi_rope::Rope;
229    /// let rope = Rope::from("Hello world");
230    /// let mut cursor = WordCursor::new(&rope, 3);
231    /// let end_boundary = cursor.end_boundary();
232    /// assert_eq!(end_boundary, Some(5));
233    ///```
234    pub fn end_boundary(&mut self) -> Option<usize> {
235        self.inner.next_codepoint();
236        if let Some(ch) = self.inner.next_codepoint() {
237            let mut prop = get_char_property(ch);
238            let mut candidate = self.inner.pos();
239            while let Some(next) = self.inner.next_codepoint() {
240                let prop_next = get_char_property(next);
241                if classify_boundary(prop, prop_next).is_end() {
242                    break;
243                }
244                prop = prop_next;
245                candidate = self.inner.pos();
246            }
247            self.inner.set(candidate);
248            return Some(candidate);
249        }
250        None
251    }
252
253    /// Get the first matching [`CharClassification::Other`] backward and set the cursor position to this location .
254    /// **Example:**
255    ///
256    /// ```rust
257    /// # use floem_editor_core::word::WordCursor;
258    /// # use lapce_xi_rope::Rope;
259    /// let text = "violet, are\n blue";
260    /// let rope = Rope::from(text);
261    /// let mut cursor = WordCursor::new(&rope, 11);
262    /// let position = cursor.prev_code_boundary();
263    /// assert_eq!(&text[position..], "are\n blue");
264    ///```
265    pub fn prev_code_boundary(&mut self) -> usize {
266        let mut candidate = self.inner.pos();
267        while let Some(prev) = self.inner.prev_codepoint() {
268            let prop_prev = get_char_property(prev);
269            if prop_prev != CharClassification::Other {
270                break;
271            }
272            candidate = self.inner.pos();
273        }
274        candidate
275    }
276
277    /// Get the first matching [`CharClassification::Other`] forward and set the cursor position to this location .
278    /// **Example:**
279    ///
280    /// ```rust
281    /// # use floem_editor_core::word::WordCursor;
282    /// # use lapce_xi_rope::Rope;
283    /// let text = "violet, are\n blue";
284    /// let rope = Rope::from(text);
285    /// let mut cursor = WordCursor::new(&rope, 11);
286    /// let position = cursor.next_code_boundary();
287    /// assert_eq!(&text[position..], "\n blue");
288    ///```
289    pub fn next_code_boundary(&mut self) -> usize {
290        let mut candidate = self.inner.pos();
291        while let Some(prev) = self.inner.next_codepoint() {
292            let prop_prev = get_char_property(prev);
293            if prop_prev != CharClassification::Other {
294                break;
295            }
296            candidate = self.inner.pos();
297        }
298        candidate
299    }
300
301    /// Looks for a matching pair character, either forward for opening chars (ex: `(`) or
302    /// backward for closing char (ex: `}`), and return the matched character position if found.
303    /// Will return `None` if the character under cursor is not matchable (see [`crate::util::matching_char`]).
304    ///
305    /// **Example:**
306    ///
307    /// ```rust
308    /// # use floem_editor_core::word::WordCursor;
309    /// # use lapce_xi_rope::Rope;
310    /// let text = "{ }";
311    /// let rope = Rope::from(text);
312    /// let mut cursor = WordCursor::new(&rope, 2);
313    /// let position = cursor.match_pairs();
314    /// assert_eq!(position, Some(0));
315    ///```
316    pub fn match_pairs(&mut self) -> Option<usize> {
317        let c = self.inner.peek_next_codepoint()?;
318        let other = matching_char(c)?;
319        let left = matching_pair_direction(other)?;
320        if left {
321            self.previous_unmatched(other)
322        } else {
323            self.inner.next_codepoint();
324            let offset = self.next_unmatched(other)?;
325            Some(offset - 1)
326        }
327    }
328
329    /// Take a matchable character and look cforward for the first unmatched one
330    /// ignoring the encountered matched pairs.
331    ///
332    /// **Example**:
333    /// ```rust
334    /// # use lapce_xi_rope::Rope;
335    /// # use floem_editor_core::word::WordCursor;
336    /// let rope = Rope::from("outer {inner}} world");
337    /// let mut cursor = WordCursor::new(&rope, 0);
338    /// let position = cursor.next_unmatched('}');
339    /// assert_eq!(position, Some(14));
340    ///  ```
341    pub fn next_unmatched(&mut self, c: char) -> Option<usize> {
342        let other = matching_char(c)?;
343        let mut n = 0;
344        while let Some(current) = self.inner.next_codepoint() {
345            if current == c && n == 0 {
346                return Some(self.inner.pos());
347            }
348            if current == other {
349                n += 1;
350            } else if current == c {
351                n -= 1;
352            }
353        }
354        None
355    }
356
357    /// Take a matchable character and look backward for the first unmatched one
358    /// ignoring the encountered matched pairs.
359    ///
360    /// **Example**:
361    ///
362    /// ```rust
363    /// # use lapce_xi_rope::Rope;
364    /// # use floem_editor_core::word::WordCursor;
365    /// let rope = Rope::from("outer {{inner} world");
366    /// let mut cursor = WordCursor::new(&rope, 15);
367    /// let position = cursor.previous_unmatched('{');
368    /// assert_eq!(position, Some(6));
369    ///  ```
370    pub fn previous_unmatched(&mut self, c: char) -> Option<usize> {
371        let other = matching_char(c)?;
372        let mut n = 0;
373        while let Some(current) = self.inner.prev_codepoint() {
374            if current == c && n == 0 {
375                return Some(self.inner.pos());
376            }
377            if current == other {
378                n += 1;
379            } else if current == c {
380                n -= 1;
381            }
382        }
383        None
384    }
385
386    /// Return the previous and end boundaries of the word under cursor.
387    ///
388    /// **Example**:
389    ///
390    ///```rust
391    /// # use floem_editor_core::word::WordCursor;
392    /// # use lapce_xi_rope::Rope;
393    /// let text = "violet are blue";
394    /// let rope = Rope::from(text);
395    /// let mut cursor = WordCursor::new(&rope, 9);
396    /// let (start, end) = cursor.select_word();
397    /// assert_eq!(&text[start..end], "are");
398    ///```
399    pub fn select_word(&mut self) -> (usize, usize) {
400        let initial = self.inner.pos();
401        let end = self.next_code_boundary();
402        self.inner.set(initial);
403        let start = self.prev_code_boundary();
404        (start, end)
405    }
406
407    /// Return the enclosing brackets of the current position
408    ///
409    /// **Example**:
410    ///
411    ///```rust
412    /// # use floem_editor_core::word::WordCursor;
413    /// # use lapce_xi_rope::Rope;
414    /// let text = "outer {{inner} world";
415    /// let rope = Rope::from(text);
416    /// let mut cursor = WordCursor::new(&rope, 10);
417    /// let (start, end) = cursor.find_enclosing_pair().unwrap();
418    /// assert_eq!(start, 7);
419    /// assert_eq!(end, 13)
420    ///```
421    pub fn find_enclosing_pair(&mut self) -> Option<(usize, usize)> {
422        let old_offset = self.inner.pos();
423        while let Some(c) = self.inner.prev_codepoint() {
424            if matching_pair_direction(c) == Some(true) {
425                let opening_bracket_offset = self.inner.pos();
426                if let Some(closing_bracket_offset) = self.match_pairs() {
427                    if (opening_bracket_offset..=closing_bracket_offset).contains(&old_offset) {
428                        return Some((opening_bracket_offset, closing_bracket_offset));
429                    } else {
430                        self.inner.set(opening_bracket_offset);
431                    }
432                }
433            }
434        }
435        None
436    }
437}
438
439/// Return the [`CharClassification`] of the input character
440pub fn get_char_property(codepoint: char) -> CharClassification {
441    if codepoint <= ' ' {
442        if codepoint == '\r' {
443            return CharClassification::Cr;
444        }
445        if codepoint == '\n' {
446            return CharClassification::Lf;
447        }
448        return CharClassification::Space;
449    } else if codepoint <= '\u{3f}' {
450        if (0xfc00fffe00000000u64 >> (codepoint as u32)) & 1 != 0 {
451            return CharClassification::Punctuation;
452        }
453    } else if codepoint <= '\u{7f}' {
454        // Hardcoded: @[\]^`{|}~
455        if (0x7800000178000001u64 >> ((codepoint as u32) & 0x3f)) & 1 != 0 {
456            return CharClassification::Punctuation;
457        }
458    }
459    CharClassification::Other
460}
461
462fn classify_boundary(prev: CharClassification, next: CharClassification) -> WordBoundary {
463    use self::{CharClassification::*, WordBoundary::*};
464    match (prev, next) {
465        (Lf, Lf) => Start,
466        (Lf, Space) => Interior,
467        (Cr, Lf) => Interior,
468        (Space, Lf) => Interior,
469        (Space, Cr) => Interior,
470        (Space, Space) => Interior,
471        (_, Space) => End,
472        (Space, _) => Start,
473        (Lf, _) => Start,
474        (_, Cr) => End,
475        (_, Lf) => End,
476        (Punctuation, Other) => Both,
477        (Other, Punctuation) => Both,
478        _ => Interior,
479    }
480}
481
482#[cfg(test)]
483mod test {
484    use lapce_xi_rope::Rope;
485
486    use super::WordCursor;
487    use crate::mode::Mode;
488
489    #[test]
490    fn prev_boundary_should_be_none_at_position_zero() {
491        let rope = Rope::from("Hello world");
492        let mut cursor = WordCursor::new(&rope, 0);
493        let boundary = cursor.prev_boundary(Mode::Insert);
494        assert!(boundary.is_none())
495    }
496
497    #[test]
498    fn prev_boundary_should_be_zero_when_cursor_on_first_word() {
499        let rope = Rope::from("Hello world");
500        let mut cursor = WordCursor::new(&rope, 4);
501        let boundary = cursor.prev_boundary(Mode::Insert);
502        assert_eq!(boundary, Some(0));
503    }
504
505    #[test]
506    fn prev_boundary_should_be_at_word_start() {
507        let rope = Rope::from("Hello world");
508        let mut cursor = WordCursor::new(&rope, 9);
509        let boundary = cursor.prev_boundary(Mode::Insert);
510        assert_eq!(boundary, Some(6));
511    }
512
513    #[test]
514    fn on_whitespace_prev_boundary_should_be_at_line_start_for_non_modal() {
515        let rope = Rope::from("Hello\n    world");
516        let mut cursor = WordCursor::new(&rope, 10);
517        let boundary = cursor.prev_boundary(Mode::Insert);
518        assert_eq!(boundary, Some(6));
519    }
520
521    #[test]
522    fn on_whitespace_prev_boundary_should_cross_line_for_modal() {
523        let rope = Rope::from("Hello\n    world");
524        let mut cursor = WordCursor::new(&rope, 10);
525        let boundary = cursor.prev_boundary(Mode::Normal);
526        assert_eq!(boundary, Some(0));
527    }
528
529    #[test]
530    fn should_get_next_word_boundary() {
531        let rope = Rope::from("Hello world");
532        let mut cursor = WordCursor::new(&rope, 0);
533        let boundary = cursor.next_boundary();
534        assert_eq!(boundary, Some(6));
535    }
536
537    #[test]
538    fn next_word_boundary_should_be_none_at_last_position() {
539        let rope = Rope::from("Hello world");
540        let mut cursor = WordCursor::new(&rope, 11);
541        let boundary = cursor.next_boundary();
542        assert_eq!(boundary, None);
543    }
544
545    #[test]
546    fn should_get_previous_code_boundary() {
547        let text = "violet, are\n blue";
548        let rope = Rope::from(text);
549        let mut cursor = WordCursor::new(&rope, 11);
550        let position = cursor.prev_code_boundary();
551        assert_eq!(&text[position..], "are\n blue");
552    }
553
554    #[test]
555    fn should_get_next_code_boundary() {
556        let text = "violet, are\n blue";
557        let rope = Rope::from(text);
558        let mut cursor = WordCursor::new(&rope, 11);
559        let position = cursor.next_code_boundary();
560        assert_eq!(&text[position..], "\n blue");
561    }
562
563    #[test]
564    fn get_next_non_blank_char_should_skip_whitespace() {
565        let rope = Rope::from("Hello world");
566        let mut cursor = WordCursor::new(&rope, 5);
567        let char_position = cursor.next_non_blank_char();
568        assert_eq!(char_position, 6);
569    }
570
571    #[test]
572    fn get_next_non_blank_char_should_return_current_position_on_non_blank_char() {
573        let rope = Rope::from("Hello world");
574        let mut cursor = WordCursor::new(&rope, 3);
575        let char_position = cursor.next_non_blank_char();
576        assert_eq!(char_position, 3);
577    }
578
579    #[test]
580    fn should_get_end_boundary() {
581        let rope = Rope::from("Hello world");
582        let mut cursor = WordCursor::new(&rope, 3);
583        let end_boundary = cursor.end_boundary();
584        assert_eq!(end_boundary, Some(5));
585    }
586
587    #[test]
588    fn should_get_next_unmatched_char() {
589        let rope = Rope::from("hello { world");
590        let mut cursor = WordCursor::new(&rope, 0);
591        let position = cursor.next_unmatched('{');
592        assert_eq!(position, Some(7));
593    }
594
595    #[test]
596    fn should_get_next_unmatched_char_witch_matched_chars() {
597        let rope = Rope::from("hello {} world }");
598        let mut cursor = WordCursor::new(&rope, 0);
599        let position = cursor.next_unmatched('}');
600        assert_eq!(position, Some(16));
601    }
602
603    #[test]
604    fn should_get_previous_unmatched_char() {
605        let rope = Rope::from("hello { world");
606        let mut cursor = WordCursor::new(&rope, 12);
607        let position = cursor.previous_unmatched('{');
608        assert_eq!(position, Some(6));
609    }
610
611    #[test]
612    fn should_get_previous_unmatched_char_with_inner_matched_chars() {
613        let rope = Rope::from("{hello {} world");
614        let mut cursor = WordCursor::new(&rope, 10);
615        let position = cursor.previous_unmatched('{');
616        assert_eq!(position, Some(0));
617    }
618
619    #[test]
620    fn should_match_pair_forward() {
621        let text = "{ }";
622        let rope = Rope::from(text);
623        let mut cursor = WordCursor::new(&rope, 0);
624        let position = cursor.match_pairs();
625        assert_eq!(position, Some(2));
626    }
627
628    #[test]
629    fn should_match_pair_backward() {
630        let text = "{ }";
631        let rope = Rope::from(text);
632        let mut cursor = WordCursor::new(&rope, 2);
633        let position = cursor.match_pairs();
634        assert_eq!(position, Some(0));
635    }
636
637    #[test]
638    fn match_pair_should_be_none() {
639        let text = "{ }";
640        let rope = Rope::from(text);
641        let mut cursor = WordCursor::new(&rope, 1);
642        let position = cursor.match_pairs();
643        assert_eq!(position, None);
644    }
645
646    #[test]
647    fn select_word_should_return_word_boundaries() {
648        let text = "violet are blue";
649        let rope = Rope::from(text);
650        let mut cursor = WordCursor::new(&rope, 9);
651        let (start, end) = cursor.select_word();
652        assert_eq!(&text[start..end], "are");
653    }
654
655    #[test]
656    fn should_get_deletion_boundary_backward() {
657        let text = "violet are blue";
658        let rope = Rope::from(text);
659        let mut cursor = WordCursor::new(&rope, 9);
660        let position = cursor.prev_deletion_boundary();
661
662        assert_eq!(position, Some(7));
663        assert_eq!(&text[..position.unwrap()], "violet ");
664    }
665
666    #[test]
667    fn find_pair_should_return_positions() {
668        let text = "violet (are) blue";
669        let rope = Rope::from(text);
670        let mut cursor = WordCursor::new(&rope, 9);
671        let positions = cursor.find_enclosing_pair();
672        assert_eq!(positions, Some((7, 11)));
673    }
674
675    #[test]
676    fn find_pair_should_return_next_pair() {
677        let text = "violets {are (blue)    }";
678        let rope = Rope::from(text);
679
680        let mut cursor = WordCursor::new(&rope, 11);
681        let positions = cursor.find_enclosing_pair();
682        assert_eq!(positions, Some((8, 23)));
683
684        let mut cursor = WordCursor::new(&rope, 20);
685        let positions = cursor.find_enclosing_pair();
686        assert_eq!(positions, Some((8, 23)));
687
688        let mut cursor = WordCursor::new(&rope, 18);
689        let positions = cursor.find_enclosing_pair();
690        assert_eq!(positions, Some((13, 18)));
691    }
692
693    #[test]
694    fn find_pair_should_return_none() {
695        let text = "violet (are) blue";
696        let rope = Rope::from(text);
697        let mut cursor = WordCursor::new(&rope, 1);
698        let positions = cursor.find_enclosing_pair();
699        assert_eq!(positions, None);
700    }
701}