floem_editor_core/
indent.rs

1use lapce_xi_rope::Rope;
2
3use crate::{
4    buffer::{rope_text::RopeText, Buffer},
5    chars::{char_is_line_ending, char_is_whitespace},
6    selection::Selection,
7};
8
9/// Enum representing indentation style.
10///
11/// Only values 1-8 are valid for the `Spaces` variant.
12#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
13pub enum IndentStyle {
14    Tabs,
15    Spaces(u8),
16}
17
18impl std::fmt::Display for IndentStyle {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        match self {
21            IndentStyle::Tabs => f.write_str("Tabs"),
22            IndentStyle::Spaces(spaces) => f.write_fmt(format_args!("{spaces} spaces")),
23        }
24    }
25}
26
27impl IndentStyle {
28    pub const LONGEST_INDENT: &'static str = "        "; // 8 spaces
29    pub const DEFAULT_INDENT: IndentStyle = IndentStyle::Spaces(4);
30
31    /// Creates an `IndentStyle` from an indentation string.
32    ///
33    /// For example, passing `"    "` (four spaces) will create `IndentStyle::Spaces(4)`.
34    #[allow(clippy::should_implement_trait)]
35    #[inline]
36    pub fn from_str(indent: &str) -> Self {
37        debug_assert!(!indent.is_empty() && indent.len() <= Self::LONGEST_INDENT.len());
38        if indent.starts_with(' ') {
39            IndentStyle::Spaces(indent.len() as u8)
40        } else {
41            IndentStyle::Tabs
42        }
43    }
44
45    #[inline]
46    pub fn as_str(&self) -> &'static str {
47        match *self {
48            IndentStyle::Tabs => "\t",
49            IndentStyle::Spaces(x) if x <= Self::LONGEST_INDENT.len() as u8 => {
50                Self::LONGEST_INDENT.split_at(x.into()).0
51            }
52            // Unsupported indentation style.  This should never happen,
53            // but just in case fall back to the default of 4 spaces
54            IndentStyle::Spaces(n) => {
55                debug_assert!(n > 0 && n <= Self::LONGEST_INDENT.len() as u8);
56                "    "
57            }
58        }
59    }
60}
61
62pub fn create_edit<'s>(buffer: &Buffer, offset: usize, indent: &'s str) -> (Selection, &'s str) {
63    let indent = if indent.starts_with('\t') {
64        indent
65    } else {
66        let (_, col) = buffer.offset_to_line_col(offset);
67        indent.split_at(indent.len() - col % indent.len()).0
68    };
69    (Selection::caret(offset), indent)
70}
71
72pub fn create_outdent<'s>(
73    buffer: &Buffer,
74    offset: usize,
75    indent: &'s str,
76) -> Option<(Selection, &'s str)> {
77    let (_, col) = buffer.offset_to_line_col(offset);
78    if col == 0 {
79        return None;
80    }
81
82    let start = if indent.starts_with('\t') {
83        offset - 1
84    } else {
85        let r = col % indent.len();
86        let r = if r == 0 { indent.len() } else { r };
87        offset - r
88    };
89
90    Some((Selection::region(start, offset), ""))
91}
92
93/// Attempts to detect the indentation style used in a document.
94///
95/// Returns the indentation style if the auto-detect confidence is
96/// reasonably high, otherwise returns `None`.
97pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
98    // Build a histogram of the indentation *increases* between
99    // subsequent lines, ignoring lines that are all whitespace.
100    //
101    // Index 0 is for tabs, the rest are 1-8 spaces.
102    let histogram: [usize; 9] = {
103        let mut histogram = [0; 9];
104        let mut prev_line_is_tabs = false;
105        let mut prev_line_leading_count = 0usize;
106
107        // Loop through the lines, checking for and recording indentation
108        // increases as we go.
109        let offset = document_text
110            .offset_of_line(document_text.line_of_offset(document_text.len()).min(1000));
111        'outer: for line in document_text.lines(..offset) {
112            let mut c_iter = line.chars();
113
114            // Is first character a tab or space?
115            let is_tabs = match c_iter.next() {
116                Some('\t') => true,
117                Some(' ') => false,
118
119                // Ignore blank lines.
120                Some(c) if char_is_line_ending(c) => continue,
121
122                _ => {
123                    prev_line_is_tabs = false;
124                    prev_line_leading_count = 0;
125                    continue;
126                }
127            };
128
129            // Count the line's total leading tab/space characters.
130            let mut leading_count = 1;
131            let mut count_is_done = false;
132            for c in c_iter {
133                match c {
134                    '\t' if is_tabs && !count_is_done => leading_count += 1,
135                    ' ' if !is_tabs && !count_is_done => leading_count += 1,
136
137                    // We stop counting if we hit whitespace that doesn't
138                    // qualify as indent or doesn't match the leading
139                    // whitespace, but we don't exit the loop yet because
140                    // we still want to determine if the line is blank.
141                    c if char_is_whitespace(c) => count_is_done = true,
142
143                    // Ignore blank lines.
144                    c if char_is_line_ending(c) => continue 'outer,
145
146                    _ => break,
147                }
148
149                // Bound the worst-case execution time for weird text files.
150                if leading_count > 256 {
151                    continue 'outer;
152                }
153            }
154
155            // If there was an increase in indentation over the previous
156            // line, update the histogram with that increase.
157            if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
158                && prev_line_leading_count < leading_count
159            {
160                if is_tabs {
161                    histogram[0] += 1;
162                } else {
163                    let amount = leading_count - prev_line_leading_count;
164                    if amount <= 8 {
165                        histogram[amount] += 1;
166                    }
167                }
168            }
169
170            // Store this line's leading whitespace info for use with
171            // the next line.
172            prev_line_is_tabs = is_tabs;
173            prev_line_leading_count = leading_count;
174        }
175
176        // Give more weight to tabs, because their presence is a very
177        // strong indicator.
178        histogram[0] *= 2;
179
180        histogram
181    };
182
183    // Find the most frequent indent, its frequency, and the frequency of
184    // the next-most frequent indent.
185    let indent = histogram
186        .iter()
187        .enumerate()
188        .max_by_key(|kv| kv.1)
189        .unwrap()
190        .0;
191    let indent_freq = histogram[indent];
192    let indent_freq_2 = *histogram
193        .iter()
194        .enumerate()
195        .filter(|kv| kv.0 != indent)
196        .map(|kv| kv.1)
197        .max()
198        .unwrap();
199
200    // Return the auto-detected result if we're confident enough in its
201    // accuracy, based on some heuristics.
202    if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
203        Some(match indent {
204            0 => IndentStyle::Tabs,
205            _ => IndentStyle::Spaces(indent as u8),
206        })
207    } else {
208        None
209    }
210}