lattices/
set_union_with_tombstones.rs

1//! Module containing the [`SetUnionWithTombstones`] lattice and aliases for different datastructures.
2//!
3//! See [`crate::tombstone`] for documentation on choosing a tombstone implementation.
4
5use std::cmp::Ordering::{self, *};
6use std::collections::{BTreeSet, HashSet};
7
8use cc_traits::{Collection, Get, Remove};
9
10use crate::cc_traits::{Iter, Len, Set};
11use crate::collections::{ArraySet, EmptySet, OptionSet, SingletonSet};
12use crate::tombstone::{FstTombstoneSet, RoaringTombstoneSet, TombstoneSet};
13use crate::{IsBot, IsTop, LatticeFrom, LatticeOrd, Merge};
14
15/// Set-union lattice with tombstones.
16///
17/// When an item is deleted from the SetUnionWithTombstones, it is removed from `set` and added to `tombstones`.
18/// This also is an invariant, if an item appears in `tombstones` it must not also be in `set`.
19///
20/// Merging set-union lattices is done by unioning the keys of both the (set and tombstone) sets,
21/// and then performing `set` = `set` - `tombstones`, to preserve the above invariant.
22///
23/// This implementation with two separate sets means that the actual set implementation can be decided
24/// for both the regular set and the tombstone set. This enables efficient storage strategies like using
25/// [`crate::tombstone::RoaringTombstoneSet`] for tombstones (see [`SetUnionWithTombstonesRoaring`]), which provides space-efficient
26/// bitmap compression for the tombstone set while keeping the main set flexible.
27#[derive(Default, Clone, Debug)]
28pub struct SetUnionWithTombstones<Set, TombstoneSet> {
29    set: Set,
30    tombstones: TombstoneSet,
31}
32
33impl<Set, TombstoneSet> SetUnionWithTombstones<Set, TombstoneSet> {
34    /// Create a new `SetUnionWithTombstones` from a `Set` and `TombstoneSet`.
35    pub fn new(set: Set, tombstones: TombstoneSet) -> Self {
36        Self { set, tombstones }
37    }
38
39    /// Create a new `SetUnionWithTombstones` from an `Into<Set>` and an `Into<TombstonesSet>`.
40    pub fn new_from(set: impl Into<Set>, tombstones: impl Into<TombstoneSet>) -> Self {
41        Self::new(set.into(), tombstones.into())
42    }
43
44    /// Reveal the inner value as a shared reference.
45    pub fn as_reveal_ref(&self) -> (&Set, &TombstoneSet) {
46        (&self.set, &self.tombstones)
47    }
48
49    /// Reveal the inner value as an exclusive reference.
50    pub fn as_reveal_mut(&mut self) -> (&mut Set, &mut TombstoneSet) {
51        (&mut self.set, &mut self.tombstones)
52    }
53
54    /// Gets the inner by value, consuming self.
55    pub fn into_reveal(self) -> (Set, TombstoneSet) {
56        (self.set, self.tombstones)
57    }
58}
59
60// Merge implementation using TombstoneSet trait for optimized union operations
61impl<Item, SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther>
62    Merge<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
63    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
64where
65    SetSelf: Extend<Item> + Len + for<'a> Remove<&'a Item>,
66    SetOther: IntoIterator<Item = Item>,
67    TombstoneSetSelf: TombstoneSet<Item>,
68    TombstoneSetOther: IntoIterator<Item = Item>,
69{
70    fn merge(&mut self, other: SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> bool {
71        let old_set_len = self.set.len();
72        let old_tombstones_len = self.tombstones.len();
73
74        // Merge other set into self, don't include anything deleted by the current tombstone set.
75        self.set.extend(
76            other
77                .set
78                .into_iter()
79                .filter(|x| !self.tombstones.contains(x)),
80        );
81
82        // Combine the tombstone sets. Also need to remove any items in the remote tombstone set that currently exist in the local set.
83        self.tombstones
84            .extend(other.tombstones.into_iter().inspect(|x| {
85                self.set.remove(x);
86            }));
87
88        // if either there are new items in the real set, or the tombstone set increased
89        old_set_len < self.set.len() || old_tombstones_len < self.tombstones.len()
90    }
91}
92
93impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
94    LatticeFrom<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
95    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
96where
97    SetSelf: FromIterator<Item>,
98    SetOther: IntoIterator<Item = Item>,
99    TombstoneSetSelf: FromIterator<Item>,
100    TombstoneSetOther: IntoIterator<Item = Item>,
101{
102    fn lattice_from(other: SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> Self {
103        Self {
104            set: other.set.into_iter().collect(),
105            tombstones: other.tombstones.into_iter().collect(),
106        }
107    }
108}
109
110impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
111    PartialOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
112    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
113where
114    SetSelf: Set<Item, Item = Item> + Iter,
115    SetOther: Set<Item, Item = Item> + Iter,
116    TombstoneSetSelf: Set<Item, Item = Item> + Iter,
117    TombstoneSetOther: Set<Item, Item = Item> + Iter,
118{
119    fn partial_cmp(
120        &self,
121        other: &SetUnionWithTombstones<SetOther, TombstoneSetOther>,
122    ) -> Option<Ordering> {
123        fn set_cmp<I, A, B>(a: &A, b: &B) -> Option<Ordering>
124        where
125            A: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
126            B: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
127        {
128            match a.len().cmp(&b.len()) {
129                Less => {
130                    if a.iter().all(|key| b.contains(&*key)) {
131                        Some(Less)
132                    } else {
133                        None
134                    }
135                }
136                Equal => {
137                    if a.iter().all(|key| b.contains(&*key)) {
138                        Some(Equal)
139                    } else {
140                        None
141                    }
142                }
143                Greater => {
144                    if b.iter().all(|key| a.contains(&*key)) {
145                        Some(Greater)
146                    } else {
147                        None
148                    }
149                }
150            }
151        }
152
153        fn set_cmp_filter<I, A, B, C, D>(a: &A, b: &B, f1: &C, f2: &D) -> Option<Ordering>
154        where
155            A: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
156            B: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
157            C: for<'a> Get<&'a I>,
158            D: for<'a> Get<&'a I>,
159        {
160            let is_a_greater_than_b = a
161                .iter()
162                .filter(|key| !f2.contains(key))
163                .any(|key| !b.contains(&*key));
164
165            let is_b_greater_than_a = b
166                .iter()
167                .filter(|key| !f1.contains(key))
168                .any(|key| !a.contains(&*key));
169
170            match (is_a_greater_than_b, is_b_greater_than_a) {
171                (true, true) => None,
172                (true, false) => Some(Greater),
173                (false, true) => Some(Less),
174                (false, false) => Some(Equal),
175            }
176        }
177
178        match set_cmp(&self.tombstones, &other.tombstones) {
179            Some(Less) => {
180                match set_cmp_filter(&self.set, &other.set, &self.tombstones, &other.tombstones) {
181                    Some(Greater) => None,
182                    Some(Less) => Some(Less),
183                    Some(Equal) => Some(Less),
184                    None => None,
185                }
186            }
187            Some(Equal) => set_cmp(&self.set, &other.set),
188            Some(Greater) => {
189                match set_cmp_filter(&self.set, &other.set, &self.tombstones, &other.tombstones) {
190                    Some(Greater) => Some(Greater),
191                    Some(Equal) => Some(Greater),
192                    Some(Less) => None,
193                    None => None,
194                }
195            }
196            None => None,
197        }
198    }
199}
200impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther>
201    LatticeOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
202    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
203where
204    Self: PartialOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>,
205{
206}
207
208impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
209    PartialEq<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
210    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
211where
212    SetSelf: Set<Item, Item = Item> + Iter,
213    SetOther: Set<Item, Item = Item> + Iter,
214    TombstoneSetSelf: Set<Item, Item = Item> + Iter,
215    TombstoneSetOther: Set<Item, Item = Item> + Iter,
216{
217    fn eq(&self, other: &SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> bool {
218        if self.set.len() != other.set.len() || self.tombstones.len() != other.tombstones.len() {
219            return false;
220        }
221
222        self.set.iter().all(|key| other.set.contains(&*key))
223            && self
224                .tombstones
225                .iter()
226                .all(|key| other.tombstones.contains(&*key))
227    }
228}
229impl<SetSelf, TombstoneSetSelf> Eq for SetUnionWithTombstones<SetSelf, TombstoneSetSelf> where
230    Self: PartialEq
231{
232}
233
234impl<Set, TombstoneSet> IsBot for SetUnionWithTombstones<Set, TombstoneSet>
235where
236    Set: Len,
237    TombstoneSet: Len,
238{
239    fn is_bot(&self) -> bool {
240        self.set.is_empty() && self.tombstones.is_empty()
241    }
242}
243
244impl<Set, TombstoneSet> IsTop for SetUnionWithTombstones<Set, TombstoneSet> {
245    fn is_top(&self) -> bool {
246        false
247    }
248}
249
250/// [`std::collections::HashSet`]-backed [`SetUnionWithTombstones`] lattice.
251pub type SetUnionWithTombstonesHashSet<Item> = SetUnionWithTombstones<HashSet<Item>, HashSet<Item>>;
252
253/// [`std::collections::BTreeSet`]-backed [`SetUnionWithTombstones`] lattice.
254pub type SetUnionWithTombstonesBTreeSet<Item> =
255    SetUnionWithTombstones<BTreeSet<Item>, BTreeSet<Item>>;
256
257/// [`Vec`]-backed [`SetUnionWithTombstones`] lattice.
258pub type SetUnionWithTombstonesVec<Item> = SetUnionWithTombstones<Vec<Item>, Vec<Item>>;
259
260/// [`crate::collections::ArraySet`]-backed [`SetUnionWithTombstones`] lattice.
261pub type SetUnionWithTombstonesArray<Item, const N: usize> =
262    SetUnionWithTombstones<ArraySet<Item, N>, ArraySet<Item, N>>;
263
264/// [`crate::collections::SingletonSet`]-backed [`SetUnionWithTombstones`] lattice.
265pub type SetUnionWithTombstonesSingletonSet<Item> =
266    SetUnionWithTombstones<SingletonSet<Item>, SingletonSet<Item>>;
267
268/// [`Option`]-backed [`SetUnionWithTombstones`] lattice.
269pub type SetUnionWithTombstonesOptionSet<Item> =
270    SetUnionWithTombstones<OptionSet<Item>, OptionSet<Item>>;
271
272/// [`crate::collections::SingletonSet`]-backed [`SetUnionWithTombstones`] lattice.
273pub type SetUnionWithTombstonesTombstoneOnlySet<Item> =
274    SetUnionWithTombstones<EmptySet<Item>, SingletonSet<Item>>;
275
276/// [`crate::tombstone::RoaringTombstoneSet`]-backed tombstone set with [`std::collections::HashSet`] for the main set.
277/// Provides space-efficient tombstone storage for u64 integer keys.
278pub type SetUnionWithTombstonesRoaring = SetUnionWithTombstones<HashSet<u64>, RoaringTombstoneSet>;
279
280/// FST-backed tombstone set with [`std::collections::HashSet`] for the main set.
281/// Provides space-efficient, collision-free tombstone storage for String keys.
282pub type SetUnionWithTombstonesFstString =
283    SetUnionWithTombstones<HashSet<String>, FstTombstoneSet<String>>;
284
285#[cfg(test)]
286mod test {
287    use super::*;
288    use crate::test::check_all;
289
290    #[test]
291    fn delete_one() {
292        let mut x = SetUnionWithTombstonesHashSet::new_from([1], []);
293        let y = SetUnionWithTombstonesTombstoneOnlySet::new_from(EmptySet::default(), 1);
294
295        assert_eq!(x.partial_cmp(&y), Some(Less));
296
297        x.merge(y);
298
299        assert!(x.as_reveal_mut().1.contains(&1));
300    }
301
302    #[test]
303    fn test_specific_cases() {
304        assert_eq!(
305            SetUnionWithTombstonesHashSet::new_from([], [0])
306                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([0], [])),
307            Some(Greater),
308        );
309
310        assert_eq!(
311            SetUnionWithTombstonesHashSet::new_from([0], [1])
312                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
313            Some(Greater),
314        );
315
316        assert_eq!(
317            SetUnionWithTombstonesHashSet::new_from([], [0])
318                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
319            Some(Greater),
320        );
321
322        assert_eq!(
323            SetUnionWithTombstonesHashSet::new_from([], [0])
324                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
325            Some(Greater),
326        );
327    }
328
329    #[test]
330    fn consistency() {
331        check_all(&[
332            SetUnionWithTombstonesHashSet::new_from([], []),
333            SetUnionWithTombstonesHashSet::new_from([0], []),
334            SetUnionWithTombstonesHashSet::new_from([], [0]),
335            SetUnionWithTombstonesHashSet::new_from([1], []),
336            SetUnionWithTombstonesHashSet::new_from([], [1]),
337            SetUnionWithTombstonesHashSet::new_from([0, 1], []),
338            SetUnionWithTombstonesHashSet::new_from([], [0, 1]),
339            SetUnionWithTombstonesHashSet::new_from([0], [1]),
340            SetUnionWithTombstonesHashSet::new_from([1], [0]),
341        ]);
342    }
343
344    #[test]
345    fn roaring_basic() {
346        let mut x = SetUnionWithTombstonesRoaring::new_from(
347            HashSet::from([1, 2, 3]),
348            RoaringTombstoneSet::new(),
349        );
350        let mut y = SetUnionWithTombstonesRoaring::new_from(
351            HashSet::from([2, 3, 4]),
352            RoaringTombstoneSet::new(),
353        );
354
355        // Add tombstone for 2
356        y.as_reveal_mut().1.insert(2);
357
358        x.merge(y);
359
360        // Should have 1, 3, 4 (2 is tombstoned)
361        assert!(!x.as_reveal_ref().0.contains(&2));
362        assert!(x.as_reveal_ref().0.contains(&1));
363        assert!(x.as_reveal_ref().0.contains(&3));
364        assert!(x.as_reveal_ref().0.contains(&4));
365        assert!(x.as_reveal_ref().1.contains(&2));
366    }
367
368    #[test]
369    fn roaring_merge_efficiency() {
370        // Test that merging roaring bitmaps works correctly
371        let mut x = SetUnionWithTombstonesRoaring::new_from(
372            HashSet::from([1, 2, 3, 4, 5]),
373            RoaringTombstoneSet::new(),
374        );
375        x.as_reveal_mut().1.insert(10);
376        x.as_reveal_mut().1.insert(20);
377
378        let mut y = SetUnionWithTombstonesRoaring::new_from(
379            HashSet::from([6, 7, 8]),
380            RoaringTombstoneSet::new(),
381        );
382        y.as_reveal_mut().1.insert(30);
383        y.as_reveal_mut().1.insert(2); // Tombstone for 2
384
385        x.merge(y);
386
387        // Should have all tombstones
388        assert!(x.as_reveal_ref().1.contains(&10));
389        assert!(x.as_reveal_ref().1.contains(&20));
390        assert!(x.as_reveal_ref().1.contains(&30));
391        assert!(x.as_reveal_ref().1.contains(&2));
392
393        // Should not have 2 in the set
394        assert!(!x.as_reveal_ref().0.contains(&2));
395
396        // Should have all other items
397        assert!(x.as_reveal_ref().0.contains(&1));
398        assert!(x.as_reveal_ref().0.contains(&3));
399        assert!(x.as_reveal_ref().0.contains(&6));
400        assert!(x.as_reveal_ref().0.contains(&7));
401    }
402
403    #[test]
404    fn fst_string_basic() {
405        let mut x = SetUnionWithTombstonesFstString::new_from(
406            HashSet::from([
407                "apple".to_string(),
408                "banana".to_string(),
409                "cherry".to_string(),
410            ]),
411            FstTombstoneSet::new(),
412        );
413        let mut y = SetUnionWithTombstonesFstString::new_from(
414            HashSet::from(["banana".to_string(), "date".to_string()]),
415            FstTombstoneSet::new(),
416        );
417
418        // Add tombstone for "banana"
419        y.as_reveal_mut().1.extend(vec!["banana".to_string()]);
420
421        x.merge(y);
422
423        // Should have apple, cherry, date (banana is tombstoned)
424        assert!(!x.as_reveal_ref().0.contains("banana"));
425        assert!(x.as_reveal_ref().0.contains("apple"));
426        assert!(x.as_reveal_ref().0.contains("cherry"));
427        assert!(x.as_reveal_ref().0.contains("date"));
428        assert!(x.as_reveal_ref().1.contains(b"banana"));
429    }
430
431    #[test]
432    fn fst_merge_efficiency() {
433        // Test that FST union works correctly with multiple tombstones
434        let mut x = SetUnionWithTombstonesFstString::new_from(
435            HashSet::from([
436                "a".to_string(),
437                "b".to_string(),
438                "c".to_string(),
439                "d".to_string(),
440            ]),
441            FstTombstoneSet::from_iter(vec!["x".to_string(), "y".to_string()]),
442        );
443
444        let y = SetUnionWithTombstonesFstString::new_from(
445            HashSet::from(["e".to_string(), "f".to_string()]),
446            FstTombstoneSet::from_iter(vec!["z".to_string(), "b".to_string()]),
447        );
448
449        x.merge(y);
450
451        // Should have all tombstones
452        assert!(x.as_reveal_ref().1.contains(b"x"));
453        assert!(x.as_reveal_ref().1.contains(b"y"));
454        assert!(x.as_reveal_ref().1.contains(b"z"));
455        assert!(x.as_reveal_ref().1.contains(b"b"));
456
457        // Should not have "b" in the set
458        assert!(!x.as_reveal_ref().0.contains("b"));
459
460        // Should have all other items
461        assert!(x.as_reveal_ref().0.contains("a"));
462        assert!(x.as_reveal_ref().0.contains("c"));
463        assert!(x.as_reveal_ref().0.contains("d"));
464        assert!(x.as_reveal_ref().0.contains("e"));
465        assert!(x.as_reveal_ref().0.contains("f"));
466    }
467
468    #[test]
469    fn roaring_empty_merge() {
470        // Test merging empty sets
471        let mut x =
472            SetUnionWithTombstonesRoaring::new_from(HashSet::new(), RoaringTombstoneSet::new());
473        let y = SetUnionWithTombstonesRoaring::new_from(HashSet::new(), RoaringTombstoneSet::new());
474
475        assert!(!x.merge(y)); // Should return false (no change)
476        assert!(x.as_reveal_ref().0.is_empty());
477        assert_eq!(x.as_reveal_ref().1.len(), 0);
478    }
479
480    #[test]
481    fn roaring_idempotency() {
482        // Test that merging the same data twice doesn't change the result
483        let mut x = SetUnionWithTombstonesRoaring::new_from(
484            HashSet::from([1u64, 2, 3]),
485            RoaringTombstoneSet::new(),
486        );
487        let y = SetUnionWithTombstonesRoaring::new_from(
488            HashSet::from([2u64, 3, 4]),
489            RoaringTombstoneSet::from_iter(vec![2u64]),
490        );
491        let z = y.clone();
492
493        x.merge(y);
494        let first_result = x.clone();
495
496        x.merge(z);
497
498        // Should be identical after second merge
499        assert_eq!(x.as_reveal_ref().0, first_result.as_reveal_ref().0);
500        assert_eq!(
501            x.as_reveal_ref().1.len(),
502            first_result.as_reveal_ref().1.len()
503        );
504    }
505
506    #[test]
507    fn fst_commutativity() {
508        // Test that merge order doesn't matter
509        let a = SetUnionWithTombstonesFstString::new_from(
510            HashSet::from(["a".to_string(), "b".to_string()]),
511            FstTombstoneSet::from_iter(vec!["x".to_string()]),
512        );
513        let b = SetUnionWithTombstonesFstString::new_from(
514            HashSet::from(["c".to_string(), "d".to_string()]),
515            FstTombstoneSet::from_iter(vec!["y".to_string()]),
516        );
517
518        let mut x1 = a.clone();
519        let mut x2 = b.clone();
520
521        x1.merge(b.clone());
522        x2.merge(a.clone());
523
524        // Results should be the same regardless of merge order
525        assert_eq!(x1.as_reveal_ref().0, x2.as_reveal_ref().0);
526        assert_eq!(x1.as_reveal_ref().1.len(), x2.as_reveal_ref().1.len());
527    }
528}