Skip to main content

competitive/string/
suffix_tree.rs

1use super::SuffixArray;
2use std::ops::Range;
3
4#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
5pub enum Delimited<T> {
6    Separator(usize),
7    Value(T),
8}
9
10#[derive(Debug)]
11pub struct STNode {
12    pub parent: usize,
13    pub depth: usize,
14    pub sa_range: Range<usize>,
15    pub child_index: Range<usize>,
16    pub head: usize,
17    pub pos: usize,
18}
19
20pub struct SuffixTree<T>
21where
22    T: Ord,
23{
24    text: Vec<T>,
25    suffix_array: Vec<usize>,
26    lcp_array: Vec<usize>,
27    rank: Vec<usize>,
28    nodes: Vec<STNode>,
29    children: Vec<usize>,
30    leafs: Vec<usize>,
31    inv: Vec<usize>,
32}
33
34struct Link {
35    first_child: usize,
36    last_child: usize,
37    next_sibling: usize,
38    prev_sibling: usize,
39}
40
41struct SuffixTreeBuilder {
42    nodes: Vec<STNode>,
43    links: Vec<Link>,
44}
45
46impl SuffixTreeBuilder {
47    fn with_capacity(capacity: usize) -> Self {
48        Self {
49            nodes: Vec::with_capacity(capacity),
50            links: Vec::with_capacity(capacity),
51        }
52    }
53
54    fn push_node(&mut self, depth: usize, start: usize) -> usize {
55        let node_id = self.nodes.len();
56        self.nodes.push(STNode {
57            parent: !0,
58            depth,
59            sa_range: start..!0,
60            child_index: 0..0,
61            head: !0,
62            pos: !0,
63        });
64        self.links.push(Link {
65            first_child: !0,
66            last_child: !0,
67            next_sibling: !0,
68            prev_sibling: !0,
69        });
70        node_id
71    }
72
73    fn attach_child(&mut self, parent: usize, child: usize) {
74        self.nodes[child].parent = parent;
75        let tail = self.links[parent].last_child;
76        if tail == !0 {
77            self.links[parent].first_child = child;
78        } else {
79            self.links[tail].next_sibling = child;
80            self.links[child].prev_sibling = tail;
81        }
82        self.links[parent].last_child = child;
83    }
84
85    fn detach_last_child(&mut self, parent: usize, child: usize) {
86        let prev = self.links[child].prev_sibling;
87        if prev == !0 {
88            self.links[parent].first_child = !0;
89        } else {
90            self.links[prev].next_sibling = !0;
91        }
92        self.links[parent].last_child = prev;
93        self.links[child].prev_sibling = !0;
94        self.links[child].next_sibling = !0;
95    }
96
97    fn reindex(&mut self, leafs: &mut [usize]) {
98        let mut new_index = vec![!0; self.nodes.len()];
99        let mut stack = vec![0];
100        let mut index = 0;
101        while let Some(node_id) = stack.pop() {
102            new_index[node_id] = index;
103            index += 1;
104            let mut child = self.links[node_id].last_child;
105            while child != !0 {
106                stack.push(child);
107                child = self.links[child].prev_sibling;
108            }
109        }
110
111        for node in &mut self.nodes {
112            if node.parent != !0 {
113                node.parent = new_index[node.parent];
114            }
115        }
116        for link in &mut self.links {
117            if link.first_child != !0 {
118                link.first_child = new_index[link.first_child];
119            }
120            if link.last_child != !0 {
121                link.last_child = new_index[link.last_child];
122            }
123            if link.next_sibling != !0 {
124                link.next_sibling = new_index[link.next_sibling];
125            }
126            if link.prev_sibling != !0 {
127                link.prev_sibling = new_index[link.prev_sibling];
128            }
129        }
130        for leaf in leafs {
131            *leaf = new_index[*leaf];
132        }
133
134        for i in 0..self.nodes.len() {
135            while new_index[i] != i {
136                let j = new_index[i];
137                self.nodes.swap(i, j);
138                self.links.swap(i, j);
139                new_index.swap(i, j);
140            }
141        }
142    }
143
144    fn build_aux(&mut self, children: &[usize]) -> Vec<usize> {
145        let n = self.nodes.len();
146        let mut sub_size = vec![1usize; n];
147        let mut heavy = vec![!0usize; n];
148        for node in (0..n).rev() {
149            let mut max_size = 0usize;
150            for &child in &children[self.nodes[node].child_index.clone()] {
151                sub_size[node] += sub_size[child];
152                if sub_size[child] > max_size {
153                    max_size = sub_size[child];
154                    heavy[node] = child;
155                }
156            }
157        }
158
159        let mut inv = vec![0usize; n];
160        let mut cur = 0usize;
161        let mut stack = vec![(0usize, 0usize)];
162        while let Some((v, h)) = stack.pop() {
163            let mut u = v;
164            while u != !0 {
165                self.nodes[u].head = h;
166                self.nodes[u].pos = cur;
167                inv[cur] = u;
168                cur += 1;
169                for &child in &children[self.nodes[u].child_index.clone()] {
170                    if child != heavy[u] {
171                        stack.push((child, child));
172                    }
173                }
174                u = heavy[u];
175            }
176        }
177        inv
178    }
179
180    fn build_children(&mut self) -> Vec<usize> {
181        let mut children = vec![!0; self.nodes.len() - 1];
182        let mut index = 0;
183        for node_id in 0..self.nodes.len() {
184            let mut child = self.links[node_id].first_child;
185            self.nodes[node_id].child_index.start = index;
186            while child != !0 {
187                children[index] = child;
188                index += 1;
189                child = self.links[child].next_sibling;
190            }
191            self.nodes[node_id].child_index.end = index;
192        }
193        children
194    }
195}
196
197impl<T> SuffixTree<T>
198where
199    T: Ord,
200{
201    pub fn new(text: Vec<T>) -> Self {
202        let n = text.len();
203        let suffix_array = SuffixArray::new(&text);
204        let (lcp_array, rank) = suffix_array.lcp_array_with_rank(&text);
205        let mut builder = SuffixTreeBuilder::with_capacity(n * 2);
206        builder.push_node(0, 0);
207        let mut leafs = vec![!0; n + 1];
208        leafs[0] = 0;
209        let mut stack = Vec::with_capacity(n);
210        stack.push(0);
211        for i in 1..=n {
212            let lcp = lcp_array[i - 1];
213            let mut last_popped = !0;
214            while builder.nodes[stack.last().cloned().unwrap()].depth > lcp {
215                last_popped = stack.pop().unwrap();
216                builder.nodes[last_popped].sa_range.end = i;
217            }
218
219            // internal node
220            if builder.nodes[stack.last().cloned().unwrap()].depth < lcp {
221                let parent = stack.last().cloned().unwrap();
222                let internal = builder.nodes.len();
223                builder.push_node(lcp, builder.nodes[last_popped].sa_range.start);
224                builder.detach_last_child(parent, last_popped);
225                builder.attach_child(parent, internal);
226                builder.attach_child(internal, last_popped);
227                stack.push(internal);
228            }
229
230            // leaf node
231            let parent = stack.last().cloned().unwrap();
232            let leaf = builder.nodes.len();
233            builder.push_node(n - suffix_array[i], i);
234            builder.attach_child(parent, leaf);
235            stack.push(leaf);
236            leafs[i] = leaf;
237        }
238        while let Some(node) = stack.pop() {
239            builder.nodes[node].sa_range.end = n + 1;
240        }
241
242        builder.reindex(&mut leafs[1..]);
243        let children = builder.build_children();
244        let inv = builder.build_aux(&children);
245        let nodes = builder.nodes;
246        let suffix_array = suffix_array.into_inner();
247
248        Self {
249            text,
250            suffix_array,
251            lcp_array,
252            rank,
253            nodes,
254            children,
255            leafs,
256            inv,
257        }
258    }
259
260    pub fn text(&self) -> &[T] {
261        &self.text
262    }
263
264    pub fn suffix_array(&self) -> &[usize] {
265        &self.suffix_array
266    }
267
268    pub fn lcp_array(&self) -> &[usize] {
269        &self.lcp_array
270    }
271
272    pub fn rank(&self) -> &[usize] {
273        &self.rank
274    }
275
276    pub fn node_size(&self) -> usize {
277        self.nodes.len()
278    }
279
280    pub fn node(&self, node_id: usize) -> &STNode {
281        &self.nodes[node_id]
282    }
283
284    pub fn children(&self, node_id: usize) -> &[usize] {
285        &self.children[self.nodes[node_id].child_index.clone()]
286    }
287
288    pub fn depth_range(&self, node_id: usize) -> Range<usize> {
289        let node = &self.nodes[node_id];
290        if node.parent == !0 {
291            0..0
292        } else {
293            self.nodes[node.parent].depth + 1..node.depth + 1
294        }
295    }
296
297    pub fn kth_substrings(&self) -> KthSubstrings<'_, T> {
298        KthSubstrings::new(self)
299    }
300
301    fn node_covering_depth(&self, leaf: usize, depth: usize) -> usize {
302        debug_assert!(depth > 0);
303        debug_assert!(depth <= self.nodes[leaf].depth);
304        let mut v = leaf;
305        loop {
306            let h = self.nodes[v].head;
307            let parent = self.nodes[h].parent;
308            let parent_depth = if parent == !0 {
309                0
310            } else {
311                self.nodes[parent].depth
312            };
313            if parent_depth < depth {
314                let mut l = self.nodes[h].pos;
315                let mut r = self.nodes[v].pos;
316                while l < r {
317                    let m = (l + r) >> 1;
318                    let node_id = self.inv[m];
319                    if self.nodes[node_id].depth < depth {
320                        l = m + 1;
321                    } else {
322                        r = m;
323                    }
324                }
325                return self.inv[l];
326            }
327            v = parent;
328        }
329    }
330}
331
332pub struct KthSubstrings<'a, T>
333where
334    T: Ord,
335{
336    tree: &'a SuffixTree<T>,
337    prefix_distinct: Vec<u64>,
338    prefix_total: Vec<u64>,
339}
340
341impl<'a, T> KthSubstrings<'a, T>
342where
343    T: Ord,
344{
345    fn new(tree: &'a SuffixTree<T>) -> Self {
346        let n = tree.nodes.len();
347        let mut prefix_distinct = vec![0u64; n];
348        let mut prefix_total = vec![0u64; n];
349        for i in 1..n {
350            let node = &tree.nodes[i];
351            let parent_depth = tree.nodes[node.parent].depth;
352            let distinct = (node.depth - parent_depth) as u64;
353            let total = distinct * node.sa_range.len() as u64;
354            prefix_distinct[i] = prefix_distinct[i - 1] + distinct;
355            prefix_total[i] = prefix_total[i - 1] + total;
356        }
357        Self {
358            tree,
359            prefix_distinct,
360            prefix_total,
361        }
362    }
363
364    pub fn kth_distinct_substring(&self, k: u64) -> Option<Range<usize>> {
365        let idx = self.prefix_distinct.partition_point(|&x| x <= k);
366        if idx == self.prefix_distinct.len() {
367            return None;
368        }
369        let node = &self.tree.nodes[idx];
370        let offset = k - self.prefix_distinct[idx - 1];
371        let len = self.tree.nodes[node.parent].depth + 1 + offset as usize;
372        let start = self.tree.suffix_array[node.sa_range.start];
373        Some(start..start + len)
374    }
375
376    pub fn kth_substring(&self, k: u64) -> Option<Range<usize>> {
377        let idx = self.prefix_total.partition_point(|&x| x <= k);
378        if idx == self.prefix_total.len() {
379            return None;
380        }
381        let node = &self.tree.nodes[idx];
382        let offset = k - self.prefix_total[idx - 1];
383        let occ = node.sa_range.len() as u64;
384        let len = self.tree.nodes[node.parent].depth + 1 + (offset / occ) as usize;
385        let start = self.tree.suffix_array[node.sa_range.start + (offset % occ) as usize];
386        Some(start..start + len)
387    }
388
389    pub fn index_of_distinct_substring(&self, range: Range<usize>) -> u64 {
390        debug_assert!(range.start < range.end && range.end <= self.tree.text.len());
391        let m = range.len();
392        let leaf = self.tree.leafs[self.tree.rank[range.start]];
393        let node = self.tree.node_covering_depth(leaf, m);
394        let offset = m - self.tree.nodes[self.tree.nodes[node].parent].depth - 1;
395        self.prefix_distinct[node - 1] + offset as u64
396    }
397
398    pub fn index_of_substring(&self, range: Range<usize>) -> u64 {
399        debug_assert!(range.start < range.end && range.end <= self.tree.text.len());
400        let m = range.len();
401        let idx = self.tree.rank[range.start];
402        let leaf = self.tree.leafs[idx];
403        let node = self.tree.node_covering_depth(leaf, m);
404        let offset = m - self.tree.nodes[self.tree.nodes[node].parent].depth - 1;
405        let occ = self.tree.nodes[node].sa_range.len() as u64;
406        let occ_idx = (idx - self.tree.nodes[node].sa_range.start) as u64;
407        self.prefix_total[node - 1] + offset as u64 * occ + occ_idx
408    }
409}
410
411pub struct MultipleSuffixTree<T>
412where
413    T: Ord + Clone,
414{
415    texts: Vec<Vec<T>>,
416    offsets: Vec<usize>,
417    tree: SuffixTree<Delimited<T>>,
418    position_map: Vec<(usize, usize)>,
419}
420
421impl<T> MultipleSuffixTree<T>
422where
423    T: Ord + Clone,
424{
425    pub fn new(texts: Vec<Vec<T>>) -> Self {
426        assert!(!texts.is_empty());
427        let total_len: usize = texts.iter().map(|text| text.len() + 1).sum();
428        let mut concat = Vec::with_capacity(total_len - 1);
429        let mut offsets = Vec::with_capacity(texts.len());
430        let mut position_map = Vec::with_capacity(total_len);
431        for (i, text) in texts.iter().enumerate() {
432            offsets.push(concat.len());
433            for (pos, value) in text.iter().cloned().enumerate() {
434                concat.push(Delimited::Value(value));
435                position_map.push((i, pos));
436            }
437            if i + 1 < texts.len() {
438                concat.push(Delimited::Separator(!i));
439            }
440            position_map.push((i, text.len()));
441        }
442
443        let mut tree = SuffixTree::new(concat);
444        for node_id in 0..tree.node_size() {
445            if tree.children(node_id).is_empty() {
446                let node = tree.node(node_id);
447                let (text_idx, pos) = position_map[tree.suffix_array()[node.sa_range.start]];
448                tree.nodes[node_id].depth = texts[text_idx].len() - pos;
449            }
450        }
451
452        Self {
453            texts,
454            offsets,
455            tree,
456            position_map,
457        }
458    }
459
460    pub fn texts(&self) -> &[Vec<T>] {
461        &self.texts
462    }
463
464    pub fn suffix_array(&self) -> &[usize] {
465        self.tree.suffix_array()
466    }
467
468    pub fn lcp_array(&self) -> &[usize] {
469        self.tree.lcp_array()
470    }
471
472    pub fn rank(&self) -> &[usize] {
473        self.tree.rank()
474    }
475
476    pub fn node_size(&self) -> usize {
477        self.tree.node_size()
478    }
479
480    pub fn node(&self, node_id: usize) -> &STNode {
481        self.tree.node(node_id)
482    }
483
484    pub fn children(&self, node_id: usize) -> &[usize] {
485        self.tree.children(node_id)
486    }
487
488    pub fn position_map(&self) -> &[(usize, usize)] {
489        &self.position_map
490    }
491
492    pub fn depth_range(&self, node_id: usize) -> Range<usize> {
493        let node = self.tree.node(node_id);
494        if node.parent == !0 {
495            return 0..0;
496        }
497        let parent_depth = self.tree.node(node.parent).depth;
498        parent_depth + 1..node.depth + 1
499    }
500
501    pub fn kth_substrings(&self) -> MultipleKthSubstrings<'_, T> {
502        MultipleKthSubstrings::new(self)
503    }
504
505    fn to_global_start(&self, text_idx: usize, pos: usize) -> usize {
506        self.offsets[text_idx] + pos
507    }
508}
509
510pub struct MultipleKthSubstrings<'a, T>
511where
512    T: Ord + Clone,
513{
514    tree: &'a MultipleSuffixTree<T>,
515    prefix_distinct: Vec<u64>,
516    prefix_total: Vec<u64>,
517}
518
519impl<'a, T> MultipleKthSubstrings<'a, T>
520where
521    T: Ord + Clone,
522{
523    fn new(tree: &'a MultipleSuffixTree<T>) -> Self {
524        let n = tree.tree.nodes.len();
525        let mut prefix_distinct = vec![0u64; n];
526        let mut prefix_total = vec![0u64; n];
527        for i in 1..n {
528            let node = &tree.tree.nodes[i];
529            let parent_depth = tree.tree.nodes[node.parent].depth;
530            let distinct = (node.depth - parent_depth) as u64;
531            let total = distinct * node.sa_range.len() as u64;
532            prefix_distinct[i] = prefix_distinct[i - 1] + distinct;
533            prefix_total[i] = prefix_total[i - 1] + total;
534        }
535        Self {
536            tree,
537            prefix_distinct,
538            prefix_total,
539        }
540    }
541
542    pub fn kth_distinct_substring(&self, k: u64) -> Option<(usize, Range<usize>)> {
543        let idx = self.prefix_distinct.partition_point(|&x| x <= k);
544        if idx == self.prefix_distinct.len() {
545            return None;
546        }
547        let node = &self.tree.tree.nodes[idx];
548        let parent_depth = self.tree.tree.nodes[node.parent].depth;
549        let offset = k - self.prefix_distinct[idx - 1];
550        let len = parent_depth + 1 + offset as usize;
551        let start = self.tree.tree.suffix_array[node.sa_range.start];
552        let (text_idx, pos) = self.tree.position_map[start];
553        Some((text_idx, pos..pos + len))
554    }
555
556    pub fn kth_substring(&self, k: u64) -> Option<(usize, Range<usize>)> {
557        let idx = self.prefix_total.partition_point(|&x| x <= k);
558        if idx == self.prefix_total.len() {
559            return None;
560        }
561        let node = &self.tree.tree.nodes[idx];
562        let parent_depth = self.tree.tree.nodes[node.parent].depth;
563        let offset = k - self.prefix_total[idx - 1];
564        let occ = node.sa_range.len() as u64;
565        let len_offset = (offset / occ) as usize;
566        let occ_idx = (offset % occ) as usize;
567        let len = parent_depth + 1 + len_offset;
568        let start = self.tree.tree.suffix_array[node.sa_range.start + occ_idx];
569        let (text_idx, pos) = self.tree.position_map[start];
570        Some((text_idx, pos..pos + len))
571    }
572
573    pub fn index_of_distinct_substring(&self, (text_idx, range): (usize, Range<usize>)) -> u64 {
574        debug_assert!(text_idx < self.tree.texts.len());
575        debug_assert!(range.start < range.end && range.end <= self.tree.texts[text_idx].len());
576        let m = range.len();
577        let start = self.tree.to_global_start(text_idx, range.start);
578        let sa_idx = self.tree.tree.rank[start];
579        let leaf = self.tree.tree.leafs[sa_idx];
580        let node = self.tree.tree.node_covering_depth(leaf, m);
581        let parent_depth = self.tree.tree.nodes[self.tree.tree.nodes[node].parent].depth;
582        let offset = m - (parent_depth + 1);
583        self.prefix_distinct[node - 1] + offset as u64
584    }
585
586    pub fn index_of_substring(&self, (text_idx, range): (usize, Range<usize>)) -> u64 {
587        debug_assert!(text_idx < self.tree.texts.len());
588        debug_assert!(range.start < range.end && range.end <= self.tree.texts[text_idx].len());
589        let m = range.len();
590        let start = self.tree.to_global_start(text_idx, range.start);
591        let sa_idx = self.tree.tree.rank[start];
592        let leaf = self.tree.tree.leafs[sa_idx];
593        let node = self.tree.tree.node_covering_depth(leaf, m);
594        let parent_depth = self.tree.tree.nodes[self.tree.tree.nodes[node].parent].depth;
595        let offset = m - (parent_depth + 1);
596        let occ = self.tree.tree.nodes[node].sa_range.len() as u64;
597        let occ_idx = (sa_idx - self.tree.tree.nodes[node].sa_range.start) as u64;
598        self.prefix_total[node - 1] + offset as u64 * occ + occ_idx
599    }
600}
601
602#[cfg(test)]
603mod tests {
604    use super::*;
605    use crate::tools::Xorshift;
606    use std::collections::{BTreeMap, BTreeSet};
607
608    #[test]
609    fn test_suffix_tree_substrings() {
610        let mut rng = Xorshift::default();
611        for _ in 0..500 {
612            let n = rng.random(1usize..=100);
613            let csize = rng.random(1usize..=10);
614            let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
615            let st = SuffixTree::new(s.clone());
616            let mut substrings = vec![];
617            assert_eq!(st.node(0).parent, !0);
618            for node_id in 0..st.node_size() {
619                let node = st.node(node_id);
620                if node.parent != !0 {
621                    let parent_depth = st.node(node.parent).depth;
622                    assert!(parent_depth < node.depth);
623                }
624                for depth in st.depth_range(node_id) {
625                    for sa_idx in node.sa_range.clone() {
626                        let start = st.suffix_array[sa_idx];
627                        substrings.push(start..start + depth);
628                    }
629                }
630                for &child_id in st.children(node_id) {
631                    assert_eq!(st.node(child_id).parent, node_id);
632                }
633            }
634            assert!(substrings.iter().map(|r| &s[r.clone()]).is_sorted());
635            let mut expected = vec![];
636            for i in 0..n {
637                for j in i + 1..=n {
638                    expected.push(i..j);
639                }
640            }
641            expected.sort_by_key(|r| (&s[r.clone()], r.start, r.end));
642            substrings.sort_by_key(|r| (&s[r.clone()], r.start, r.end));
643            assert_eq!(substrings, expected);
644        }
645    }
646
647    #[test]
648    fn test_suffix_tree_distinct_substrings() {
649        let mut rng = Xorshift::default();
650        for _ in 0..500 {
651            let n = rng.random(1usize..=100);
652            let csize = rng.random(1usize..=10);
653            let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
654            let st = SuffixTree::new(s.clone());
655            let mut substrings = vec![];
656            assert_eq!(st.node(0).parent, !0);
657            for node_id in 0..st.node_size() {
658                let node = st.node(node_id);
659                if node.parent != !0 {
660                    let parent_depth = st.node(node.parent).depth;
661                    assert!(parent_depth < node.depth);
662                }
663                for depth in st.depth_range(node_id) {
664                    let sa_idx = node.sa_range.start;
665                    let start = st.suffix_array[sa_idx];
666                    substrings.push(start..start + depth);
667                }
668                for &child_id in st.children(node_id) {
669                    assert_eq!(st.node(child_id).parent, node_id);
670                }
671            }
672            assert!(substrings.iter().map(|r| &s[r.clone()]).is_sorted());
673            let mut expected = BTreeSet::new();
674            for i in 0..n {
675                for j in i + 1..=n {
676                    expected.insert(&s[i..j]);
677                }
678            }
679            assert_eq!(substrings.len(), expected.len());
680            for (a, &b) in substrings.iter().zip(expected.iter()) {
681                assert_eq!(&s[a.clone()], b);
682            }
683        }
684    }
685
686    #[test]
687    fn test_suffix_tree_kth_distinct_substring() {
688        let mut rng = Xorshift::default();
689        for _ in 0..200 {
690            let n = rng.random(0usize..=60);
691            let csize = rng.random(1usize..=10);
692            let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
693            let st = SuffixTree::new(s.clone());
694            let kth = st.kth_substrings();
695
696            let mut set = BTreeSet::new();
697            for i in 0..n {
698                for j in i + 1..=n {
699                    set.insert(s[i..j].to_vec());
700                }
701            }
702            let substrings: Vec<_> = set.into_iter().collect();
703            for (k, expected) in substrings.iter().enumerate() {
704                let range = kth.kth_distinct_substring(k as u64).unwrap();
705                assert_eq!(&s[range.clone()], expected.as_slice());
706            }
707            assert_eq!(kth.kth_distinct_substring(substrings.len() as u64), None);
708            let mut index_map = BTreeMap::new();
709            for (k, expected) in substrings.iter().enumerate() {
710                index_map.insert(expected.clone(), k as _);
711            }
712            for i in 0..n {
713                for j in i + 1..=n {
714                    let key = s[i..j].to_vec();
715                    let expected = *index_map.get(&key).unwrap();
716                    assert_eq!(kth.index_of_distinct_substring(i..j), expected);
717                }
718            }
719        }
720    }
721
722    #[test]
723    fn test_suffix_tree_kth_substring() {
724        let mut rng = Xorshift::default();
725        for _ in 0..200 {
726            let n = rng.random(0usize..=60);
727            let csize = rng.random(1usize..=10);
728            let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
729            let st = SuffixTree::new(s.clone());
730            let kth = st.kth_substrings();
731
732            let mut substrings = Vec::new();
733            for node_id in 1..st.node_size() {
734                let node = st.node(node_id);
735                for depth in st.depth_range(node_id) {
736                    for sa_idx in node.sa_range.clone() {
737                        let start = st.suffix_array[sa_idx];
738                        substrings.push(start..start + depth);
739                    }
740                }
741            }
742
743            for (k, range) in substrings.iter().enumerate() {
744                let got = kth.kth_substring(k as u64).unwrap();
745                assert_eq!(got, range.clone());
746                assert_eq!(kth.index_of_substring(range.clone()), k as _);
747            }
748            assert_eq!(kth.kth_substring(substrings.len() as u64), None);
749        }
750    }
751
752    #[test]
753    fn test_multiple_suffix_tree_substrings() {
754        let mut rng = Xorshift::default();
755        for _ in 0..200 {
756            let k = rng.random(1usize..=6);
757            let csize = rng.random(1usize..=10);
758            let mut texts = Vec::with_capacity(k);
759            for _ in 0..k {
760                let n = rng.random(0usize..=40);
761                let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
762                texts.push(s);
763            }
764            let st = MultipleSuffixTree::new(texts.clone());
765
766            let mut substrings = vec![];
767            assert_eq!(st.node(0).parent, !0);
768            for node_id in 0..st.node_size() {
769                let node = st.node(node_id);
770                if node.parent != !0 {
771                    let parent_depth = st.node(node.parent).depth;
772                    assert!(parent_depth <= node.depth);
773                }
774                for depth in st.depth_range(node_id) {
775                    for sa_idx in node.sa_range.clone() {
776                        let start = st.suffix_array()[sa_idx];
777                        let (text_idx, pos) = st.position_map()[start];
778                        assert!(pos + depth <= texts[text_idx].len());
779                        substrings.push((text_idx, pos..pos + depth));
780                    }
781                }
782                for &child_id in st.children(node_id) {
783                    assert_eq!(st.node(child_id).parent, node_id);
784                }
785            }
786            assert!(
787                substrings
788                    .iter()
789                    .map(|(i, r)| &texts[*i][r.clone()])
790                    .is_sorted()
791            );
792            let mut expected = vec![];
793            for (i, text) in texts.iter().enumerate() {
794                for l in 0..text.len() {
795                    for r in l + 1..=text.len() {
796                        expected.push((i, l..r));
797                    }
798                }
799            }
800            expected.sort_unstable_by_key(|(i, r)| (&texts[*i][r.clone()], *i, r.start, r.end));
801            substrings.sort_unstable_by_key(|(i, r)| (&texts[*i][r.clone()], *i, r.start, r.end));
802            assert_eq!(substrings, expected);
803        }
804    }
805
806    #[test]
807    fn test_multiple_suffix_tree_distinct_substrings() {
808        let mut rng = Xorshift::default();
809        for _ in 0..200 {
810            let k = rng.random(1usize..=6);
811            let csize = rng.random(1usize..=10);
812            let mut texts = Vec::with_capacity(k);
813            for _ in 0..k {
814                let n = rng.random(0usize..=40);
815                let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
816                texts.push(s);
817            }
818            let st = MultipleSuffixTree::new(texts.clone());
819
820            let mut substrings = vec![];
821            assert_eq!(st.node(0).parent, !0);
822            for node_id in 0..st.node_size() {
823                let node = st.node(node_id);
824                if node.parent != !0 {
825                    let parent_depth = st.node(node.parent).depth;
826                    assert!(parent_depth <= node.depth);
827                }
828                for depth in st.depth_range(node_id) {
829                    let sa_idx = node.sa_range.start;
830                    let start = st.suffix_array()[sa_idx];
831                    let (text_idx, pos) = st.position_map()[start];
832                    assert!(pos + depth <= texts[text_idx].len());
833                    substrings.push((text_idx, pos..pos + depth));
834                }
835                for &child_id in st.children(node_id) {
836                    assert_eq!(st.node(child_id).parent, node_id);
837                }
838            }
839            assert!(
840                substrings
841                    .iter()
842                    .map(|(i, r)| &texts[*i][r.clone()])
843                    .is_sorted()
844            );
845            let mut expected = BTreeSet::new();
846            for text in &texts {
847                for i in 0..text.len() {
848                    for j in i + 1..=text.len() {
849                        expected.insert(text[i..j].to_vec());
850                    }
851                }
852            }
853            assert_eq!(substrings.len(), expected.len());
854            for (a, b) in substrings.iter().zip(expected.iter()) {
855                assert_eq!(&texts[a.0][a.1.clone()], b.as_slice());
856            }
857        }
858    }
859
860    #[test]
861    fn test_multiple_suffix_tree_kth_distinct_substring() {
862        let mut rng = Xorshift::default();
863        for _ in 0..200 {
864            let k = rng.random(1usize..=5);
865            let csize = rng.random(1usize..=10);
866            let mut texts = Vec::with_capacity(k);
867            for _ in 0..k {
868                let n = rng.random(0usize..=30);
869                let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
870                texts.push(s);
871            }
872            let st = MultipleSuffixTree::new(texts.clone());
873            let kth = st.kth_substrings();
874
875            let mut set = BTreeSet::new();
876            for text in &texts {
877                for i in 0..text.len() {
878                    for j in i + 1..=text.len() {
879                        set.insert(text[i..j].to_vec());
880                    }
881                }
882            }
883            let substrings: Vec<_> = set.into_iter().collect();
884            for (idx, expected) in substrings.iter().enumerate() {
885                let (text_idx, range) = kth.kth_distinct_substring(idx as u64).unwrap();
886                assert_eq!(&texts[text_idx][range.clone()], expected.as_slice());
887            }
888            assert_eq!(kth.kth_distinct_substring(substrings.len() as u64), None);
889            let mut index_map = BTreeMap::new();
890            for (idx, expected) in substrings.iter().enumerate() {
891                index_map.insert(expected.clone(), idx as _);
892            }
893            for (text_idx, text) in texts.iter().enumerate() {
894                for i in 0..text.len() {
895                    for j in i + 1..=text.len() {
896                        let key = text[i..j].to_vec();
897                        let expected = *index_map.get(&key).unwrap();
898                        assert_eq!(kth.index_of_distinct_substring((text_idx, i..j)), expected);
899                    }
900                }
901            }
902        }
903    }
904
905    #[test]
906    fn test_multiple_suffix_tree_kth_substring() {
907        let mut rng = Xorshift::default();
908        for _ in 0..200 {
909            let k = rng.random(1usize..=5);
910            let csize = rng.random(1usize..=10);
911            let mut texts = Vec::with_capacity(k);
912            for _ in 0..k {
913                let n = rng.random(0usize..=30);
914                let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
915                texts.push(s);
916            }
917            let st = MultipleSuffixTree::new(texts.clone());
918            let kth = st.kth_substrings();
919
920            let mut substrings = Vec::new();
921            for node_id in 1..st.node_size() {
922                let node = st.node(node_id);
923                for depth in st.depth_range(node_id) {
924                    for sa_idx in node.sa_range.clone() {
925                        let start = st.suffix_array()[sa_idx];
926                        let (text_idx, pos) = st.position_map()[start];
927                        substrings.push((text_idx, pos..pos + depth));
928                    }
929                }
930            }
931
932            for (idx, (text_idx, range)) in substrings.iter().enumerate() {
933                let got = kth.kth_substring(idx as u64).unwrap();
934                assert_eq!(got, (*text_idx, range.clone()));
935                assert_eq!(kth.index_of_substring((*text_idx, range.clone())), idx as _);
936            }
937            assert_eq!(kth.kth_substring(substrings.len() as u64), None);
938        }
939    }
940}