haar_lib/algo/
sa.rs

1//! Suffix Array
2use std::collections::VecDeque;
3
4#[derive(Clone, Copy, Eq, PartialEq, Debug)]
5enum LS {
6    L,
7    S,
8}
9
10fn sa(mut s: Vec<u32>) -> Vec<usize> {
11    use self::LS::{L, S};
12
13    let n = s.len();
14
15    if n == 0 {
16        return vec![0];
17    }
18    if n == 1 {
19        return vec![1, 0];
20    }
21
22    s.push(0);
23
24    let mut ls = vec![S; n + 1];
25    for i in (0..n).rev() {
26        use std::cmp::Ordering::*;
27        match s[i].cmp(&s[i + 1]) {
28            Less => ls[i] = S,
29            Greater => ls[i] = L,
30            Equal => ls[i] = ls[i + 1],
31        }
32    }
33
34    let bucket_count = *s.iter().max().unwrap() as usize;
35    let mut bucket_size = vec![0; bucket_count + 1];
36    for &x in &s {
37        bucket_size[x as usize] += 1;
38    }
39
40    let induced_sort = |lms: &[usize]| -> Vec<usize> {
41        let mut bucket = vec![None; n + 1];
42        let mut is_lms = vec![false; n + 1];
43        let mut empty = vec![VecDeque::new(); bucket_count + 1];
44
45        let mut k = 0;
46        for i in 0..=bucket_count {
47            for _ in 0..bucket_size[i] {
48                empty[i].push_back(k);
49                k += 1;
50            }
51        }
52
53        for &x in lms.iter().rev() {
54            let i = empty[s[x] as usize].pop_back().unwrap();
55            bucket[i] = Some(x);
56            is_lms[i] = true;
57        }
58
59        for i in 0..=n {
60            match bucket[i] {
61                Some(b) if b >= 1 && ls[b - 1] == L => {
62                    let j = empty[s[b - 1] as usize].pop_front().unwrap();
63                    bucket[j] = Some(b - 1);
64                }
65                _ => {}
66            }
67        }
68
69        for i in 0..=n {
70            if is_lms[i] {
71                bucket[i] = None;
72            }
73        }
74
75        let mut k = 0;
76        for i in 0..=bucket_count {
77            empty[i].clear();
78
79            for _ in 0..bucket_size[i] {
80                empty[i].push_back(k);
81                k += 1;
82            }
83        }
84
85        for i in (0..=n).rev() {
86            match bucket[i] {
87                Some(b) if b >= 1 && ls[b - 1] == S => {
88                    let j = empty[s[b - 1] as usize].pop_back().unwrap();
89                    bucket[j] = Some(b - 1);
90                }
91                _ => {}
92            }
93        }
94
95        bucket[0] = Some(n);
96        bucket.into_iter().map(|x| x.unwrap()).collect()
97    };
98
99    let lms: Vec<_> = (1..=n).filter(|&i| ls[i] == S && ls[i - 1] == L).collect();
100
101    let mut lms_bucket_length = vec![1; n + 1];
102    for i in 0..lms.len() - 1 {
103        lms_bucket_length[lms[i]] = lms[i + 1] - lms[i] + 1;
104    }
105
106    let lms_substr_sorted: Vec<_> = induced_sort(&lms)
107        .into_iter()
108        .filter(|&i| i > 0 && ls[i - 1] == L && ls[i] == S)
109        .collect();
110
111    let mut rank = vec![0; n + 1];
112    rank[lms_substr_sorted[0]] = 1;
113
114    let mut k = 1;
115    for i in 1..lms_substr_sorted.len() {
116        let x = lms_substr_sorted[i - 1];
117        let y = lms_substr_sorted[i];
118        let eq = (|| {
119            if lms_bucket_length[x] != lms_bucket_length[y] {
120                false
121            } else {
122                for j in 0..lms_bucket_length[x] {
123                    if s[x + j] != s[y + j] {
124                        return false;
125                    }
126                }
127                true
128            }
129        })();
130
131        if !eq {
132            k += 1;
133        }
134        rank[y] = k;
135    }
136
137    let t: Vec<_> = (0..=n).map(|i| rank[i]).filter(|&x| x != 0).collect();
138    let lms_sorted: Vec<_> = sa(t).into_iter().skip(1).map(|i| lms[i]).collect();
139
140    induced_sort(&lms_sorted)
141}
142
143/// 接尾辞配列
144#[derive(Clone, Debug)]
145pub struct SuffixArray {
146    data: Vec<usize>,
147    str_data: Vec<u8>,
148}
149
150impl SuffixArray {
151    /// 文字列`s`から接尾辞配列を構築する。
152    pub fn new(s: &str) -> Self {
153        let mut str_data = s.as_bytes().to_vec();
154        str_data.push(0);
155
156        let s_ = s.as_bytes().iter().map(|&x| x as u32).collect();
157        Self {
158            data: sa(s_),
159            str_data,
160        }
161    }
162
163    /// 接尾辞配列への参照を返す。
164    pub fn as_slice(&self) -> &[usize] {
165        &self.data
166    }
167
168    /// 接尾辞配列`s`について、`s[i]`と`s[i-1]`最長共通接頭辞の長さを格納した配列を返す。
169    pub fn lcp_array(&self) -> Vec<usize> {
170        let n = self.data.len();
171        let mut rank = vec![0; n];
172        let mut ret = vec![0; n];
173
174        for i in 0..n {
175            rank[self.data[i]] = i;
176        }
177
178        let mut h: usize = 0;
179        for i in 0..n {
180            if rank[i] == 0 {
181                continue;
182            }
183
184            h = h.saturating_sub(1);
185            let j = self.data[rank[i] - 1];
186            while j + h < n && i + h < n {
187                if self.str_data[j + h] != self.str_data[i + h] {
188                    break;
189                }
190                h += 1;
191            }
192
193            ret[rank[i]] = h;
194        }
195
196        ret
197    }
198}
199
200impl std::ops::Index<usize> for SuffixArray {
201    type Output = usize;
202    fn index(&self, index: usize) -> &Self::Output {
203        &self.data[index]
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210
211    #[test]
212    fn test() {
213        let s = "abracadabra";
214        let sa = SuffixArray::new(s);
215        assert_eq!(sa.as_slice(), &[11, 10, 7, 0, 3, 5, 8, 1, 4, 6, 9, 2]);
216
217        assert_eq!(sa.lcp_array(), vec![0, 0, 1, 4, 1, 1, 0, 3, 0, 0, 0, 2]);
218    }
219}