1use super::SuffixArray;
2use std::ops::Range;
3
4#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
5pub enum Delimited<T> {
6 Separator(usize),
7 Value(T),
8}
9
10#[derive(Debug)]
11pub struct STNode {
12 pub parent: usize,
13 pub depth: usize,
14 pub sa_range: Range<usize>,
15 pub child_index: Range<usize>,
16 pub head: usize,
17 pub pos: usize,
18}
19
20pub struct SuffixTree<T>
21where
22 T: Ord,
23{
24 text: Vec<T>,
25 suffix_array: Vec<usize>,
26 lcp_array: Vec<usize>,
27 rank: Vec<usize>,
28 nodes: Vec<STNode>,
29 children: Vec<usize>,
30 leafs: Vec<usize>,
31 inv: Vec<usize>,
32}
33
34struct Link {
35 first_child: usize,
36 last_child: usize,
37 next_sibling: usize,
38 prev_sibling: usize,
39}
40
41struct SuffixTreeBuilder {
42 nodes: Vec<STNode>,
43 links: Vec<Link>,
44}
45
46impl SuffixTreeBuilder {
47 fn with_capacity(capacity: usize) -> Self {
48 Self {
49 nodes: Vec::with_capacity(capacity),
50 links: Vec::with_capacity(capacity),
51 }
52 }
53
54 fn push_node(&mut self, depth: usize, start: usize) -> usize {
55 let node_id = self.nodes.len();
56 self.nodes.push(STNode {
57 parent: !0,
58 depth,
59 sa_range: start..!0,
60 child_index: 0..0,
61 head: !0,
62 pos: !0,
63 });
64 self.links.push(Link {
65 first_child: !0,
66 last_child: !0,
67 next_sibling: !0,
68 prev_sibling: !0,
69 });
70 node_id
71 }
72
73 fn attach_child(&mut self, parent: usize, child: usize) {
74 self.nodes[child].parent = parent;
75 let tail = self.links[parent].last_child;
76 if tail == !0 {
77 self.links[parent].first_child = child;
78 } else {
79 self.links[tail].next_sibling = child;
80 self.links[child].prev_sibling = tail;
81 }
82 self.links[parent].last_child = child;
83 }
84
85 fn detach_last_child(&mut self, parent: usize, child: usize) {
86 let prev = self.links[child].prev_sibling;
87 if prev == !0 {
88 self.links[parent].first_child = !0;
89 } else {
90 self.links[prev].next_sibling = !0;
91 }
92 self.links[parent].last_child = prev;
93 self.links[child].prev_sibling = !0;
94 self.links[child].next_sibling = !0;
95 }
96
97 fn reindex(&mut self, leafs: &mut [usize]) {
98 let mut new_index = vec![!0; self.nodes.len()];
99 let mut stack = vec![0];
100 let mut index = 0;
101 while let Some(node_id) = stack.pop() {
102 new_index[node_id] = index;
103 index += 1;
104 let mut child = self.links[node_id].last_child;
105 while child != !0 {
106 stack.push(child);
107 child = self.links[child].prev_sibling;
108 }
109 }
110
111 for node in &mut self.nodes {
112 if node.parent != !0 {
113 node.parent = new_index[node.parent];
114 }
115 }
116 for link in &mut self.links {
117 if link.first_child != !0 {
118 link.first_child = new_index[link.first_child];
119 }
120 if link.last_child != !0 {
121 link.last_child = new_index[link.last_child];
122 }
123 if link.next_sibling != !0 {
124 link.next_sibling = new_index[link.next_sibling];
125 }
126 if link.prev_sibling != !0 {
127 link.prev_sibling = new_index[link.prev_sibling];
128 }
129 }
130 for leaf in leafs {
131 *leaf = new_index[*leaf];
132 }
133
134 for i in 0..self.nodes.len() {
135 while new_index[i] != i {
136 let j = new_index[i];
137 self.nodes.swap(i, j);
138 self.links.swap(i, j);
139 new_index.swap(i, j);
140 }
141 }
142 }
143
144 fn build_aux(&mut self, children: &[usize]) -> Vec<usize> {
145 let n = self.nodes.len();
146 let mut sub_size = vec![1usize; n];
147 let mut heavy = vec![!0usize; n];
148 for node in (0..n).rev() {
149 let mut max_size = 0usize;
150 for &child in &children[self.nodes[node].child_index.clone()] {
151 sub_size[node] += sub_size[child];
152 if sub_size[child] > max_size {
153 max_size = sub_size[child];
154 heavy[node] = child;
155 }
156 }
157 }
158
159 let mut inv = vec![0usize; n];
160 let mut cur = 0usize;
161 let mut stack = vec![(0usize, 0usize)];
162 while let Some((v, h)) = stack.pop() {
163 let mut u = v;
164 while u != !0 {
165 self.nodes[u].head = h;
166 self.nodes[u].pos = cur;
167 inv[cur] = u;
168 cur += 1;
169 for &child in &children[self.nodes[u].child_index.clone()] {
170 if child != heavy[u] {
171 stack.push((child, child));
172 }
173 }
174 u = heavy[u];
175 }
176 }
177 inv
178 }
179
180 fn build_children(&mut self) -> Vec<usize> {
181 let mut children = vec![!0; self.nodes.len() - 1];
182 let mut index = 0;
183 for node_id in 0..self.nodes.len() {
184 let mut child = self.links[node_id].first_child;
185 self.nodes[node_id].child_index.start = index;
186 while child != !0 {
187 children[index] = child;
188 index += 1;
189 child = self.links[child].next_sibling;
190 }
191 self.nodes[node_id].child_index.end = index;
192 }
193 children
194 }
195}
196
197impl<T> SuffixTree<T>
198where
199 T: Ord,
200{
201 pub fn new(text: Vec<T>) -> Self {
202 let n = text.len();
203 let suffix_array = SuffixArray::new(&text);
204 let (lcp_array, rank) = suffix_array.lcp_array_with_rank(&text);
205 let mut builder = SuffixTreeBuilder::with_capacity(n * 2);
206 builder.push_node(0, 0);
207 let mut leafs = vec![!0; n + 1];
208 leafs[0] = 0;
209 let mut stack = Vec::with_capacity(n);
210 stack.push(0);
211 for i in 1..=n {
212 let lcp = lcp_array[i - 1];
213 let mut last_popped = !0;
214 while builder.nodes[stack.last().cloned().unwrap()].depth > lcp {
215 last_popped = stack.pop().unwrap();
216 builder.nodes[last_popped].sa_range.end = i;
217 }
218
219 if builder.nodes[stack.last().cloned().unwrap()].depth < lcp {
221 let parent = stack.last().cloned().unwrap();
222 let internal = builder.nodes.len();
223 builder.push_node(lcp, builder.nodes[last_popped].sa_range.start);
224 builder.detach_last_child(parent, last_popped);
225 builder.attach_child(parent, internal);
226 builder.attach_child(internal, last_popped);
227 stack.push(internal);
228 }
229
230 let parent = stack.last().cloned().unwrap();
232 let leaf = builder.nodes.len();
233 builder.push_node(n - suffix_array[i], i);
234 builder.attach_child(parent, leaf);
235 stack.push(leaf);
236 leafs[i] = leaf;
237 }
238 while let Some(node) = stack.pop() {
239 builder.nodes[node].sa_range.end = n + 1;
240 }
241
242 builder.reindex(&mut leafs[1..]);
243 let children = builder.build_children();
244 let inv = builder.build_aux(&children);
245 let nodes = builder.nodes;
246 let suffix_array = suffix_array.into_inner();
247
248 Self {
249 text,
250 suffix_array,
251 lcp_array,
252 rank,
253 nodes,
254 children,
255 leafs,
256 inv,
257 }
258 }
259
260 pub fn text(&self) -> &[T] {
261 &self.text
262 }
263
264 pub fn suffix_array(&self) -> &[usize] {
265 &self.suffix_array
266 }
267
268 pub fn lcp_array(&self) -> &[usize] {
269 &self.lcp_array
270 }
271
272 pub fn rank(&self) -> &[usize] {
273 &self.rank
274 }
275
276 pub fn node_size(&self) -> usize {
277 self.nodes.len()
278 }
279
280 pub fn node(&self, node_id: usize) -> &STNode {
281 &self.nodes[node_id]
282 }
283
284 pub fn children(&self, node_id: usize) -> &[usize] {
285 &self.children[self.nodes[node_id].child_index.clone()]
286 }
287
288 pub fn depth_range(&self, node_id: usize) -> Range<usize> {
289 let node = &self.nodes[node_id];
290 if node.parent == !0 {
291 0..0
292 } else {
293 self.nodes[node.parent].depth + 1..node.depth + 1
294 }
295 }
296
297 pub fn kth_substrings(&self) -> KthSubstrings<'_, T> {
298 KthSubstrings::new(self)
299 }
300
301 fn node_covering_depth(&self, leaf: usize, depth: usize) -> usize {
302 debug_assert!(depth > 0);
303 debug_assert!(depth <= self.nodes[leaf].depth);
304 let mut v = leaf;
305 loop {
306 let h = self.nodes[v].head;
307 let parent = self.nodes[h].parent;
308 let parent_depth = if parent == !0 {
309 0
310 } else {
311 self.nodes[parent].depth
312 };
313 if parent_depth < depth {
314 let mut l = self.nodes[h].pos;
315 let mut r = self.nodes[v].pos;
316 while l < r {
317 let m = (l + r) >> 1;
318 let node_id = self.inv[m];
319 if self.nodes[node_id].depth < depth {
320 l = m + 1;
321 } else {
322 r = m;
323 }
324 }
325 return self.inv[l];
326 }
327 v = parent;
328 }
329 }
330}
331
332pub struct KthSubstrings<'a, T>
333where
334 T: Ord,
335{
336 tree: &'a SuffixTree<T>,
337 prefix_distinct: Vec<u64>,
338 prefix_total: Vec<u64>,
339}
340
341impl<'a, T> KthSubstrings<'a, T>
342where
343 T: Ord,
344{
345 fn new(tree: &'a SuffixTree<T>) -> Self {
346 let n = tree.nodes.len();
347 let mut prefix_distinct = vec![0u64; n];
348 let mut prefix_total = vec![0u64; n];
349 for i in 1..n {
350 let node = &tree.nodes[i];
351 let parent_depth = tree.nodes[node.parent].depth;
352 let distinct = (node.depth - parent_depth) as u64;
353 let total = distinct * node.sa_range.len() as u64;
354 prefix_distinct[i] = prefix_distinct[i - 1] + distinct;
355 prefix_total[i] = prefix_total[i - 1] + total;
356 }
357 Self {
358 tree,
359 prefix_distinct,
360 prefix_total,
361 }
362 }
363
364 pub fn kth_distinct_substring(&self, k: u64) -> Option<Range<usize>> {
365 let idx = self.prefix_distinct.partition_point(|&x| x <= k);
366 if idx == self.prefix_distinct.len() {
367 return None;
368 }
369 let node = &self.tree.nodes[idx];
370 let offset = k - self.prefix_distinct[idx - 1];
371 let len = self.tree.nodes[node.parent].depth + 1 + offset as usize;
372 let start = self.tree.suffix_array[node.sa_range.start];
373 Some(start..start + len)
374 }
375
376 pub fn kth_substring(&self, k: u64) -> Option<Range<usize>> {
377 let idx = self.prefix_total.partition_point(|&x| x <= k);
378 if idx == self.prefix_total.len() {
379 return None;
380 }
381 let node = &self.tree.nodes[idx];
382 let offset = k - self.prefix_total[idx - 1];
383 let occ = node.sa_range.len() as u64;
384 let len = self.tree.nodes[node.parent].depth + 1 + (offset / occ) as usize;
385 let start = self.tree.suffix_array[node.sa_range.start + (offset % occ) as usize];
386 Some(start..start + len)
387 }
388
389 pub fn index_of_distinct_substring(&self, range: Range<usize>) -> u64 {
390 debug_assert!(range.start < range.end && range.end <= self.tree.text.len());
391 let m = range.len();
392 let leaf = self.tree.leafs[self.tree.rank[range.start]];
393 let node = self.tree.node_covering_depth(leaf, m);
394 let offset = m - self.tree.nodes[self.tree.nodes[node].parent].depth - 1;
395 self.prefix_distinct[node - 1] + offset as u64
396 }
397
398 pub fn index_of_substring(&self, range: Range<usize>) -> u64 {
399 debug_assert!(range.start < range.end && range.end <= self.tree.text.len());
400 let m = range.len();
401 let idx = self.tree.rank[range.start];
402 let leaf = self.tree.leafs[idx];
403 let node = self.tree.node_covering_depth(leaf, m);
404 let offset = m - self.tree.nodes[self.tree.nodes[node].parent].depth - 1;
405 let occ = self.tree.nodes[node].sa_range.len() as u64;
406 let occ_idx = (idx - self.tree.nodes[node].sa_range.start) as u64;
407 self.prefix_total[node - 1] + offset as u64 * occ + occ_idx
408 }
409}
410
411pub struct MultipleSuffixTree<T>
412where
413 T: Ord + Clone,
414{
415 texts: Vec<Vec<T>>,
416 offsets: Vec<usize>,
417 tree: SuffixTree<Delimited<T>>,
418 position_map: Vec<(usize, usize)>,
419}
420
421impl<T> MultipleSuffixTree<T>
422where
423 T: Ord + Clone,
424{
425 pub fn new(texts: Vec<Vec<T>>) -> Self {
426 assert!(!texts.is_empty());
427 let total_len: usize = texts.iter().map(|text| text.len() + 1).sum();
428 let mut concat = Vec::with_capacity(total_len - 1);
429 let mut offsets = Vec::with_capacity(texts.len());
430 let mut position_map = Vec::with_capacity(total_len);
431 for (i, text) in texts.iter().enumerate() {
432 offsets.push(concat.len());
433 for (pos, value) in text.iter().cloned().enumerate() {
434 concat.push(Delimited::Value(value));
435 position_map.push((i, pos));
436 }
437 if i + 1 < texts.len() {
438 concat.push(Delimited::Separator(!i));
439 }
440 position_map.push((i, text.len()));
441 }
442
443 let mut tree = SuffixTree::new(concat);
444 for node_id in 0..tree.node_size() {
445 if tree.children(node_id).is_empty() {
446 let node = tree.node(node_id);
447 let (text_idx, pos) = position_map[tree.suffix_array()[node.sa_range.start]];
448 tree.nodes[node_id].depth = texts[text_idx].len() - pos;
449 }
450 }
451
452 Self {
453 texts,
454 offsets,
455 tree,
456 position_map,
457 }
458 }
459
460 pub fn texts(&self) -> &[Vec<T>] {
461 &self.texts
462 }
463
464 pub fn suffix_array(&self) -> &[usize] {
465 self.tree.suffix_array()
466 }
467
468 pub fn lcp_array(&self) -> &[usize] {
469 self.tree.lcp_array()
470 }
471
472 pub fn rank(&self) -> &[usize] {
473 self.tree.rank()
474 }
475
476 pub fn node_size(&self) -> usize {
477 self.tree.node_size()
478 }
479
480 pub fn node(&self, node_id: usize) -> &STNode {
481 self.tree.node(node_id)
482 }
483
484 pub fn children(&self, node_id: usize) -> &[usize] {
485 self.tree.children(node_id)
486 }
487
488 pub fn position_map(&self) -> &[(usize, usize)] {
489 &self.position_map
490 }
491
492 pub fn depth_range(&self, node_id: usize) -> Range<usize> {
493 let node = self.tree.node(node_id);
494 if node.parent == !0 {
495 return 0..0;
496 }
497 let parent_depth = self.tree.node(node.parent).depth;
498 parent_depth + 1..node.depth + 1
499 }
500
501 pub fn kth_substrings(&self) -> MultipleKthSubstrings<'_, T> {
502 MultipleKthSubstrings::new(self)
503 }
504
505 fn to_global_start(&self, text_idx: usize, pos: usize) -> usize {
506 self.offsets[text_idx] + pos
507 }
508}
509
510pub struct MultipleKthSubstrings<'a, T>
511where
512 T: Ord + Clone,
513{
514 tree: &'a MultipleSuffixTree<T>,
515 prefix_distinct: Vec<u64>,
516 prefix_total: Vec<u64>,
517}
518
519impl<'a, T> MultipleKthSubstrings<'a, T>
520where
521 T: Ord + Clone,
522{
523 fn new(tree: &'a MultipleSuffixTree<T>) -> Self {
524 let n = tree.tree.nodes.len();
525 let mut prefix_distinct = vec![0u64; n];
526 let mut prefix_total = vec![0u64; n];
527 for i in 1..n {
528 let node = &tree.tree.nodes[i];
529 let parent_depth = tree.tree.nodes[node.parent].depth;
530 let distinct = (node.depth - parent_depth) as u64;
531 let total = distinct * node.sa_range.len() as u64;
532 prefix_distinct[i] = prefix_distinct[i - 1] + distinct;
533 prefix_total[i] = prefix_total[i - 1] + total;
534 }
535 Self {
536 tree,
537 prefix_distinct,
538 prefix_total,
539 }
540 }
541
542 pub fn kth_distinct_substring(&self, k: u64) -> Option<(usize, Range<usize>)> {
543 let idx = self.prefix_distinct.partition_point(|&x| x <= k);
544 if idx == self.prefix_distinct.len() {
545 return None;
546 }
547 let node = &self.tree.tree.nodes[idx];
548 let parent_depth = self.tree.tree.nodes[node.parent].depth;
549 let offset = k - self.prefix_distinct[idx - 1];
550 let len = parent_depth + 1 + offset as usize;
551 let start = self.tree.tree.suffix_array[node.sa_range.start];
552 let (text_idx, pos) = self.tree.position_map[start];
553 Some((text_idx, pos..pos + len))
554 }
555
556 pub fn kth_substring(&self, k: u64) -> Option<(usize, Range<usize>)> {
557 let idx = self.prefix_total.partition_point(|&x| x <= k);
558 if idx == self.prefix_total.len() {
559 return None;
560 }
561 let node = &self.tree.tree.nodes[idx];
562 let parent_depth = self.tree.tree.nodes[node.parent].depth;
563 let offset = k - self.prefix_total[idx - 1];
564 let occ = node.sa_range.len() as u64;
565 let len_offset = (offset / occ) as usize;
566 let occ_idx = (offset % occ) as usize;
567 let len = parent_depth + 1 + len_offset;
568 let start = self.tree.tree.suffix_array[node.sa_range.start + occ_idx];
569 let (text_idx, pos) = self.tree.position_map[start];
570 Some((text_idx, pos..pos + len))
571 }
572
573 pub fn index_of_distinct_substring(&self, (text_idx, range): (usize, Range<usize>)) -> u64 {
574 debug_assert!(text_idx < self.tree.texts.len());
575 debug_assert!(range.start < range.end && range.end <= self.tree.texts[text_idx].len());
576 let m = range.len();
577 let start = self.tree.to_global_start(text_idx, range.start);
578 let sa_idx = self.tree.tree.rank[start];
579 let leaf = self.tree.tree.leafs[sa_idx];
580 let node = self.tree.tree.node_covering_depth(leaf, m);
581 let parent_depth = self.tree.tree.nodes[self.tree.tree.nodes[node].parent].depth;
582 let offset = m - (parent_depth + 1);
583 self.prefix_distinct[node - 1] + offset as u64
584 }
585
586 pub fn index_of_substring(&self, (text_idx, range): (usize, Range<usize>)) -> u64 {
587 debug_assert!(text_idx < self.tree.texts.len());
588 debug_assert!(range.start < range.end && range.end <= self.tree.texts[text_idx].len());
589 let m = range.len();
590 let start = self.tree.to_global_start(text_idx, range.start);
591 let sa_idx = self.tree.tree.rank[start];
592 let leaf = self.tree.tree.leafs[sa_idx];
593 let node = self.tree.tree.node_covering_depth(leaf, m);
594 let parent_depth = self.tree.tree.nodes[self.tree.tree.nodes[node].parent].depth;
595 let offset = m - (parent_depth + 1);
596 let occ = self.tree.tree.nodes[node].sa_range.len() as u64;
597 let occ_idx = (sa_idx - self.tree.tree.nodes[node].sa_range.start) as u64;
598 self.prefix_total[node - 1] + offset as u64 * occ + occ_idx
599 }
600}
601
602#[cfg(test)]
603mod tests {
604 use super::*;
605 use crate::tools::Xorshift;
606 use std::collections::{BTreeMap, BTreeSet};
607
608 #[test]
609 fn test_suffix_tree_substrings() {
610 let mut rng = Xorshift::default();
611 for _ in 0..500 {
612 let n = rng.random(1usize..=100);
613 let csize = rng.random(1usize..=10);
614 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
615 let st = SuffixTree::new(s.clone());
616 let mut substrings = vec![];
617 assert_eq!(st.node(0).parent, !0);
618 for node_id in 0..st.node_size() {
619 let node = st.node(node_id);
620 if node.parent != !0 {
621 let parent_depth = st.node(node.parent).depth;
622 assert!(parent_depth < node.depth);
623 }
624 for depth in st.depth_range(node_id) {
625 for sa_idx in node.sa_range.clone() {
626 let start = st.suffix_array[sa_idx];
627 substrings.push(start..start + depth);
628 }
629 }
630 for &child_id in st.children(node_id) {
631 assert_eq!(st.node(child_id).parent, node_id);
632 }
633 }
634 assert!(substrings.iter().map(|r| &s[r.clone()]).is_sorted());
635 let mut expected = vec![];
636 for i in 0..n {
637 for j in i + 1..=n {
638 expected.push(i..j);
639 }
640 }
641 expected.sort_by_key(|r| (&s[r.clone()], r.start, r.end));
642 substrings.sort_by_key(|r| (&s[r.clone()], r.start, r.end));
643 assert_eq!(substrings, expected);
644 }
645 }
646
647 #[test]
648 fn test_suffix_tree_distinct_substrings() {
649 let mut rng = Xorshift::default();
650 for _ in 0..500 {
651 let n = rng.random(1usize..=100);
652 let csize = rng.random(1usize..=10);
653 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
654 let st = SuffixTree::new(s.clone());
655 let mut substrings = vec![];
656 assert_eq!(st.node(0).parent, !0);
657 for node_id in 0..st.node_size() {
658 let node = st.node(node_id);
659 if node.parent != !0 {
660 let parent_depth = st.node(node.parent).depth;
661 assert!(parent_depth < node.depth);
662 }
663 for depth in st.depth_range(node_id) {
664 let sa_idx = node.sa_range.start;
665 let start = st.suffix_array[sa_idx];
666 substrings.push(start..start + depth);
667 }
668 for &child_id in st.children(node_id) {
669 assert_eq!(st.node(child_id).parent, node_id);
670 }
671 }
672 assert!(substrings.iter().map(|r| &s[r.clone()]).is_sorted());
673 let mut expected = BTreeSet::new();
674 for i in 0..n {
675 for j in i + 1..=n {
676 expected.insert(&s[i..j]);
677 }
678 }
679 assert_eq!(substrings.len(), expected.len());
680 for (a, &b) in substrings.iter().zip(expected.iter()) {
681 assert_eq!(&s[a.clone()], b);
682 }
683 }
684 }
685
686 #[test]
687 fn test_suffix_tree_kth_distinct_substring() {
688 let mut rng = Xorshift::default();
689 for _ in 0..200 {
690 let n = rng.random(0usize..=60);
691 let csize = rng.random(1usize..=10);
692 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
693 let st = SuffixTree::new(s.clone());
694 let kth = st.kth_substrings();
695
696 let mut set = BTreeSet::new();
697 for i in 0..n {
698 for j in i + 1..=n {
699 set.insert(s[i..j].to_vec());
700 }
701 }
702 let substrings: Vec<_> = set.into_iter().collect();
703 for (k, expected) in substrings.iter().enumerate() {
704 let range = kth.kth_distinct_substring(k as u64).unwrap();
705 assert_eq!(&s[range.clone()], expected.as_slice());
706 }
707 assert_eq!(kth.kth_distinct_substring(substrings.len() as u64), None);
708 let mut index_map = BTreeMap::new();
709 for (k, expected) in substrings.iter().enumerate() {
710 index_map.insert(expected.clone(), k as _);
711 }
712 for i in 0..n {
713 for j in i + 1..=n {
714 let key = s[i..j].to_vec();
715 let expected = *index_map.get(&key).unwrap();
716 assert_eq!(kth.index_of_distinct_substring(i..j), expected);
717 }
718 }
719 }
720 }
721
722 #[test]
723 fn test_suffix_tree_kth_substring() {
724 let mut rng = Xorshift::default();
725 for _ in 0..200 {
726 let n = rng.random(0usize..=60);
727 let csize = rng.random(1usize..=10);
728 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
729 let st = SuffixTree::new(s.clone());
730 let kth = st.kth_substrings();
731
732 let mut substrings = Vec::new();
733 for node_id in 1..st.node_size() {
734 let node = st.node(node_id);
735 for depth in st.depth_range(node_id) {
736 for sa_idx in node.sa_range.clone() {
737 let start = st.suffix_array[sa_idx];
738 substrings.push(start..start + depth);
739 }
740 }
741 }
742
743 for (k, range) in substrings.iter().enumerate() {
744 let got = kth.kth_substring(k as u64).unwrap();
745 assert_eq!(got, range.clone());
746 assert_eq!(kth.index_of_substring(range.clone()), k as _);
747 }
748 assert_eq!(kth.kth_substring(substrings.len() as u64), None);
749 }
750 }
751
752 #[test]
753 fn test_multiple_suffix_tree_substrings() {
754 let mut rng = Xorshift::default();
755 for _ in 0..200 {
756 let k = rng.random(1usize..=6);
757 let csize = rng.random(1usize..=10);
758 let mut texts = Vec::with_capacity(k);
759 for _ in 0..k {
760 let n = rng.random(0usize..=40);
761 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
762 texts.push(s);
763 }
764 let st = MultipleSuffixTree::new(texts.clone());
765
766 let mut substrings = vec![];
767 assert_eq!(st.node(0).parent, !0);
768 for node_id in 0..st.node_size() {
769 let node = st.node(node_id);
770 if node.parent != !0 {
771 let parent_depth = st.node(node.parent).depth;
772 assert!(parent_depth <= node.depth);
773 }
774 for depth in st.depth_range(node_id) {
775 for sa_idx in node.sa_range.clone() {
776 let start = st.suffix_array()[sa_idx];
777 let (text_idx, pos) = st.position_map()[start];
778 assert!(pos + depth <= texts[text_idx].len());
779 substrings.push((text_idx, pos..pos + depth));
780 }
781 }
782 for &child_id in st.children(node_id) {
783 assert_eq!(st.node(child_id).parent, node_id);
784 }
785 }
786 assert!(
787 substrings
788 .iter()
789 .map(|(i, r)| &texts[*i][r.clone()])
790 .is_sorted()
791 );
792 let mut expected = vec![];
793 for (i, text) in texts.iter().enumerate() {
794 for l in 0..text.len() {
795 for r in l + 1..=text.len() {
796 expected.push((i, l..r));
797 }
798 }
799 }
800 expected.sort_unstable_by_key(|(i, r)| (&texts[*i][r.clone()], *i, r.start, r.end));
801 substrings.sort_unstable_by_key(|(i, r)| (&texts[*i][r.clone()], *i, r.start, r.end));
802 assert_eq!(substrings, expected);
803 }
804 }
805
806 #[test]
807 fn test_multiple_suffix_tree_distinct_substrings() {
808 let mut rng = Xorshift::default();
809 for _ in 0..200 {
810 let k = rng.random(1usize..=6);
811 let csize = rng.random(1usize..=10);
812 let mut texts = Vec::with_capacity(k);
813 for _ in 0..k {
814 let n = rng.random(0usize..=40);
815 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
816 texts.push(s);
817 }
818 let st = MultipleSuffixTree::new(texts.clone());
819
820 let mut substrings = vec![];
821 assert_eq!(st.node(0).parent, !0);
822 for node_id in 0..st.node_size() {
823 let node = st.node(node_id);
824 if node.parent != !0 {
825 let parent_depth = st.node(node.parent).depth;
826 assert!(parent_depth <= node.depth);
827 }
828 for depth in st.depth_range(node_id) {
829 let sa_idx = node.sa_range.start;
830 let start = st.suffix_array()[sa_idx];
831 let (text_idx, pos) = st.position_map()[start];
832 assert!(pos + depth <= texts[text_idx].len());
833 substrings.push((text_idx, pos..pos + depth));
834 }
835 for &child_id in st.children(node_id) {
836 assert_eq!(st.node(child_id).parent, node_id);
837 }
838 }
839 assert!(
840 substrings
841 .iter()
842 .map(|(i, r)| &texts[*i][r.clone()])
843 .is_sorted()
844 );
845 let mut expected = BTreeSet::new();
846 for text in &texts {
847 for i in 0..text.len() {
848 for j in i + 1..=text.len() {
849 expected.insert(text[i..j].to_vec());
850 }
851 }
852 }
853 assert_eq!(substrings.len(), expected.len());
854 for (a, b) in substrings.iter().zip(expected.iter()) {
855 assert_eq!(&texts[a.0][a.1.clone()], b.as_slice());
856 }
857 }
858 }
859
860 #[test]
861 fn test_multiple_suffix_tree_kth_distinct_substring() {
862 let mut rng = Xorshift::default();
863 for _ in 0..200 {
864 let k = rng.random(1usize..=5);
865 let csize = rng.random(1usize..=10);
866 let mut texts = Vec::with_capacity(k);
867 for _ in 0..k {
868 let n = rng.random(0usize..=30);
869 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
870 texts.push(s);
871 }
872 let st = MultipleSuffixTree::new(texts.clone());
873 let kth = st.kth_substrings();
874
875 let mut set = BTreeSet::new();
876 for text in &texts {
877 for i in 0..text.len() {
878 for j in i + 1..=text.len() {
879 set.insert(text[i..j].to_vec());
880 }
881 }
882 }
883 let substrings: Vec<_> = set.into_iter().collect();
884 for (idx, expected) in substrings.iter().enumerate() {
885 let (text_idx, range) = kth.kth_distinct_substring(idx as u64).unwrap();
886 assert_eq!(&texts[text_idx][range.clone()], expected.as_slice());
887 }
888 assert_eq!(kth.kth_distinct_substring(substrings.len() as u64), None);
889 let mut index_map = BTreeMap::new();
890 for (idx, expected) in substrings.iter().enumerate() {
891 index_map.insert(expected.clone(), idx as _);
892 }
893 for (text_idx, text) in texts.iter().enumerate() {
894 for i in 0..text.len() {
895 for j in i + 1..=text.len() {
896 let key = text[i..j].to_vec();
897 let expected = *index_map.get(&key).unwrap();
898 assert_eq!(kth.index_of_distinct_substring((text_idx, i..j)), expected);
899 }
900 }
901 }
902 }
903 }
904
905 #[test]
906 fn test_multiple_suffix_tree_kth_substring() {
907 let mut rng = Xorshift::default();
908 for _ in 0..200 {
909 let k = rng.random(1usize..=5);
910 let csize = rng.random(1usize..=10);
911 let mut texts = Vec::with_capacity(k);
912 for _ in 0..k {
913 let n = rng.random(0usize..=30);
914 let s: Vec<_> = rng.random_iter(0usize..csize).take(n).collect();
915 texts.push(s);
916 }
917 let st = MultipleSuffixTree::new(texts.clone());
918 let kth = st.kth_substrings();
919
920 let mut substrings = Vec::new();
921 for node_id in 1..st.node_size() {
922 let node = st.node(node_id);
923 for depth in st.depth_range(node_id) {
924 for sa_idx in node.sa_range.clone() {
925 let start = st.suffix_array()[sa_idx];
926 let (text_idx, pos) = st.position_map()[start];
927 substrings.push((text_idx, pos..pos + depth));
928 }
929 }
930 }
931
932 for (idx, (text_idx, range)) in substrings.iter().enumerate() {
933 let got = kth.kth_substring(idx as u64).unwrap();
934 assert_eq!(got, (*text_idx, range.clone()));
935 assert_eq!(kth.index_of_substring((*text_idx, range.clone())), idx as _);
936 }
937 assert_eq!(kth.kth_substring(substrings.len() as u64), None);
938 }
939 }
940}