Skip to main content

lattices/
set_union_with_tombstones.rs

1//! Module containing the [`SetUnionWithTombstones`] lattice and aliases for different datastructures.
2//!
3//! See [`crate::tombstone`] for documentation on choosing a tombstone implementation.
4
5#[cfg(feature = "alloc")]
6use alloc::collections::BTreeSet;
7use core::cmp::Ordering::{self, *};
8#[cfg(feature = "std")]
9use std::collections::HashSet;
10#[cfg(feature = "std")]
11use std::string::String;
12
13#[cfg(feature = "std")]
14use cc_traits::Remove;
15use cc_traits::{Collection, Get};
16
17#[cfg(feature = "std")]
18use crate::Merge;
19use crate::cc_traits::{Iter, Len, Set};
20use crate::collections::{ArraySet, EmptySet, OptionSet, SingletonSet};
21#[cfg(feature = "std")]
22use crate::tombstone::{FstTombstoneSet, RoaringTombstoneSet, TombstoneSet};
23use crate::{IsBot, IsTop, LatticeFrom, LatticeOrd};
24
25/// Set-union lattice with tombstones.
26///
27/// When an item is deleted from the SetUnionWithTombstones, it is removed from `set` and added to `tombstones`.
28/// This also is an invariant, if an item appears in `tombstones` it must not also be in `set`.
29///
30/// Merging set-union lattices is done by unioning the keys of both the (set and tombstone) sets,
31/// and then performing `set` = `set` - `tombstones`, to preserve the above invariant.
32///
33/// This implementation with two separate sets means that the actual set implementation can be decided
34/// for both the regular set and the tombstone set. This enables efficient storage strategies like using
35/// [`crate::tombstone::RoaringTombstoneSet`] for tombstones (see [`SetUnionWithTombstonesRoaring`]), which provides space-efficient
36/// bitmap compression for the tombstone set while keeping the main set flexible.
37#[derive(Default, Clone, Debug)]
38pub struct SetUnionWithTombstones<Set, TombstoneSet> {
39    set: Set,
40    tombstones: TombstoneSet,
41}
42
43impl<Set, TombstoneSet> SetUnionWithTombstones<Set, TombstoneSet> {
44    /// Create a new `SetUnionWithTombstones` from a `Set` and `TombstoneSet`.
45    pub fn new(set: Set, tombstones: TombstoneSet) -> Self {
46        Self { set, tombstones }
47    }
48
49    /// Create a new `SetUnionWithTombstones` from an `Into<Set>` and an `Into<TombstonesSet>`.
50    pub fn new_from(set: impl Into<Set>, tombstones: impl Into<TombstoneSet>) -> Self {
51        Self::new(set.into(), tombstones.into())
52    }
53
54    /// Reveal the inner value as a shared reference.
55    pub fn as_reveal_ref(&self) -> (&Set, &TombstoneSet) {
56        (&self.set, &self.tombstones)
57    }
58
59    /// Reveal the inner value as an exclusive reference.
60    pub fn as_reveal_mut(&mut self) -> (&mut Set, &mut TombstoneSet) {
61        (&mut self.set, &mut self.tombstones)
62    }
63
64    /// Gets the inner by value, consuming self.
65    pub fn into_reveal(self) -> (Set, TombstoneSet) {
66        (self.set, self.tombstones)
67    }
68}
69
70// Merge implementation using TombstoneSet trait for optimized union operations
71#[cfg(feature = "std")]
72impl<Item, SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther>
73    Merge<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
74    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
75where
76    SetSelf: Extend<Item> + Len + for<'a> Remove<&'a Item>,
77    SetOther: IntoIterator<Item = Item>,
78    TombstoneSetSelf: TombstoneSet<Item>,
79    TombstoneSetOther: IntoIterator<Item = Item>,
80{
81    fn merge(&mut self, other: SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> bool {
82        let old_set_len = self.set.len();
83        let old_tombstones_len = self.tombstones.len();
84
85        // Merge other set into self, don't include anything deleted by the current tombstone set.
86        self.set.extend(
87            other
88                .set
89                .into_iter()
90                .filter(|x| !self.tombstones.contains(x)),
91        );
92
93        // Combine the tombstone sets. Also need to remove any items in the remote tombstone set that currently exist in the local set.
94        self.tombstones
95            .extend(other.tombstones.into_iter().inspect(|x| {
96                self.set.remove(x);
97            }));
98
99        // if either there are new items in the real set, or the tombstone set increased
100        old_set_len < self.set.len() || old_tombstones_len < self.tombstones.len()
101    }
102}
103
104impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
105    LatticeFrom<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
106    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
107where
108    SetSelf: FromIterator<Item>,
109    SetOther: IntoIterator<Item = Item>,
110    TombstoneSetSelf: FromIterator<Item>,
111    TombstoneSetOther: IntoIterator<Item = Item>,
112{
113    fn lattice_from(other: SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> Self {
114        Self {
115            set: other.set.into_iter().collect(),
116            tombstones: other.tombstones.into_iter().collect(),
117        }
118    }
119}
120
121impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
122    PartialOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
123    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
124where
125    SetSelf: Set<Item, Item = Item> + Iter,
126    SetOther: Set<Item, Item = Item> + Iter,
127    TombstoneSetSelf: Set<Item, Item = Item> + Iter,
128    TombstoneSetOther: Set<Item, Item = Item> + Iter,
129{
130    fn partial_cmp(
131        &self,
132        other: &SetUnionWithTombstones<SetOther, TombstoneSetOther>,
133    ) -> Option<Ordering> {
134        fn set_cmp<I, A, B>(a: &A, b: &B) -> Option<Ordering>
135        where
136            A: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
137            B: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
138        {
139            match a.len().cmp(&b.len()) {
140                Less => {
141                    if a.iter().all(|key| b.contains(&*key)) {
142                        Some(Less)
143                    } else {
144                        None
145                    }
146                }
147                Equal => {
148                    if a.iter().all(|key| b.contains(&*key)) {
149                        Some(Equal)
150                    } else {
151                        None
152                    }
153                }
154                Greater => {
155                    if b.iter().all(|key| a.contains(&*key)) {
156                        Some(Greater)
157                    } else {
158                        None
159                    }
160                }
161            }
162        }
163
164        fn set_cmp_filter<I, A, B, C, D>(a: &A, b: &B, f1: &C, f2: &D) -> Option<Ordering>
165        where
166            A: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
167            B: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
168            C: for<'a> Get<&'a I>,
169            D: for<'a> Get<&'a I>,
170        {
171            let is_a_greater_than_b = a
172                .iter()
173                .filter(|key| !f2.contains(key))
174                .any(|key| !b.contains(&*key));
175
176            let is_b_greater_than_a = b
177                .iter()
178                .filter(|key| !f1.contains(key))
179                .any(|key| !a.contains(&*key));
180
181            match (is_a_greater_than_b, is_b_greater_than_a) {
182                (true, true) => None,
183                (true, false) => Some(Greater),
184                (false, true) => Some(Less),
185                (false, false) => Some(Equal),
186            }
187        }
188
189        match set_cmp(&self.tombstones, &other.tombstones) {
190            Some(Less) => {
191                match set_cmp_filter(&self.set, &other.set, &self.tombstones, &other.tombstones) {
192                    Some(Greater) => None,
193                    Some(Less) => Some(Less),
194                    Some(Equal) => Some(Less),
195                    None => None,
196                }
197            }
198            Some(Equal) => set_cmp(&self.set, &other.set),
199            Some(Greater) => {
200                match set_cmp_filter(&self.set, &other.set, &self.tombstones, &other.tombstones) {
201                    Some(Greater) => Some(Greater),
202                    Some(Equal) => Some(Greater),
203                    Some(Less) => None,
204                    None => None,
205                }
206            }
207            None => None,
208        }
209    }
210}
211impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther>
212    LatticeOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
213    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
214where
215    Self: PartialOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>,
216{
217}
218
219impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
220    PartialEq<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
221    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
222where
223    SetSelf: Set<Item, Item = Item> + Iter,
224    SetOther: Set<Item, Item = Item> + Iter,
225    TombstoneSetSelf: Set<Item, Item = Item> + Iter,
226    TombstoneSetOther: Set<Item, Item = Item> + Iter,
227{
228    fn eq(&self, other: &SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> bool {
229        if self.set.len() != other.set.len() || self.tombstones.len() != other.tombstones.len() {
230            return false;
231        }
232
233        self.set.iter().all(|key| other.set.contains(&*key))
234            && self
235                .tombstones
236                .iter()
237                .all(|key| other.tombstones.contains(&*key))
238    }
239}
240impl<SetSelf, TombstoneSetSelf> Eq for SetUnionWithTombstones<SetSelf, TombstoneSetSelf> where
241    Self: PartialEq
242{
243}
244
245impl<Set, TombstoneSet> IsBot for SetUnionWithTombstones<Set, TombstoneSet>
246where
247    Set: Len,
248    TombstoneSet: Len,
249{
250    fn is_bot(&self) -> bool {
251        self.set.is_empty() && self.tombstones.is_empty()
252    }
253}
254
255impl<Set, TombstoneSet> IsTop for SetUnionWithTombstones<Set, TombstoneSet> {
256    fn is_top(&self) -> bool {
257        false
258    }
259}
260
261/// [`std::collections::HashSet`]-backed [`SetUnionWithTombstones`] lattice.
262#[cfg(feature = "std")]
263pub type SetUnionWithTombstonesHashSet<Item> = SetUnionWithTombstones<HashSet<Item>, HashSet<Item>>;
264
265/// [`std::collections::BTreeSet`]-backed [`SetUnionWithTombstones`] lattice.
266#[cfg(feature = "alloc")]
267pub type SetUnionWithTombstonesBTreeSet<Item> =
268    SetUnionWithTombstones<BTreeSet<Item>, BTreeSet<Item>>;
269
270/// [`Vec`](alloc::vec::Vec)-backed [`SetUnionWithTombstones`] lattice.
271#[cfg(feature = "alloc")]
272pub type SetUnionWithTombstonesVec<Item> =
273    SetUnionWithTombstones<alloc::vec::Vec<Item>, alloc::vec::Vec<Item>>;
274
275/// [`crate::collections::ArraySet`]-backed [`SetUnionWithTombstones`] lattice.
276pub type SetUnionWithTombstonesArray<Item, const N: usize> =
277    SetUnionWithTombstones<ArraySet<Item, N>, ArraySet<Item, N>>;
278
279/// [`crate::collections::SingletonSet`]-backed [`SetUnionWithTombstones`] lattice.
280pub type SetUnionWithTombstonesSingletonSet<Item> =
281    SetUnionWithTombstones<SingletonSet<Item>, SingletonSet<Item>>;
282
283/// [`Option`]-backed [`SetUnionWithTombstones`] lattice.
284pub type SetUnionWithTombstonesOptionSet<Item> =
285    SetUnionWithTombstones<OptionSet<Item>, OptionSet<Item>>;
286
287/// [`crate::collections::SingletonSet`]-backed [`SetUnionWithTombstones`] lattice.
288pub type SetUnionWithTombstonesTombstoneOnlySet<Item> =
289    SetUnionWithTombstones<EmptySet<Item>, SingletonSet<Item>>;
290
291/// [`crate::tombstone::RoaringTombstoneSet`]-backed tombstone set with [`std::collections::HashSet`] for the main set.
292/// Provides space-efficient tombstone storage for u64 integer keys.
293#[cfg(feature = "std")]
294pub type SetUnionWithTombstonesRoaring = SetUnionWithTombstones<HashSet<u64>, RoaringTombstoneSet>;
295
296/// FST-backed tombstone set with [`std::collections::HashSet`] for the main set.
297/// Provides space-efficient, collision-free tombstone storage for String keys.
298#[cfg(feature = "std")]
299pub type SetUnionWithTombstonesFstString =
300    SetUnionWithTombstones<HashSet<String>, FstTombstoneSet<String>>;
301
302#[cfg(test)]
303mod test {
304    use std::borrow::ToOwned;
305
306    use super::*;
307    use crate::test::check_all;
308
309    #[test]
310    fn delete_one() {
311        let mut x = SetUnionWithTombstonesHashSet::new_from([1], []);
312        let y = SetUnionWithTombstonesTombstoneOnlySet::new_from(EmptySet::default(), 1);
313
314        assert_eq!(x.partial_cmp(&y), Some(Less));
315
316        x.merge(y);
317
318        assert!(x.as_reveal_mut().1.contains(&1));
319    }
320
321    #[test]
322    fn test_specific_cases() {
323        assert_eq!(
324            SetUnionWithTombstonesHashSet::new_from([], [0])
325                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([0], [])),
326            Some(Greater),
327        );
328
329        assert_eq!(
330            SetUnionWithTombstonesHashSet::new_from([0], [1])
331                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
332            Some(Greater),
333        );
334
335        assert_eq!(
336            SetUnionWithTombstonesHashSet::new_from([], [0])
337                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
338            Some(Greater),
339        );
340
341        assert_eq!(
342            SetUnionWithTombstonesHashSet::new_from([], [0])
343                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
344            Some(Greater),
345        );
346    }
347
348    #[test]
349    fn consistency() {
350        check_all(&[
351            SetUnionWithTombstonesHashSet::new_from([], []),
352            SetUnionWithTombstonesHashSet::new_from([0], []),
353            SetUnionWithTombstonesHashSet::new_from([], [0]),
354            SetUnionWithTombstonesHashSet::new_from([1], []),
355            SetUnionWithTombstonesHashSet::new_from([], [1]),
356            SetUnionWithTombstonesHashSet::new_from([0, 1], []),
357            SetUnionWithTombstonesHashSet::new_from([], [0, 1]),
358            SetUnionWithTombstonesHashSet::new_from([0], [1]),
359            SetUnionWithTombstonesHashSet::new_from([1], [0]),
360        ]);
361    }
362
363    #[test]
364    fn roaring_basic() {
365        let mut x = SetUnionWithTombstonesRoaring::new_from(
366            HashSet::from([1, 2, 3]),
367            RoaringTombstoneSet::new(),
368        );
369        let mut y = SetUnionWithTombstonesRoaring::new_from(
370            HashSet::from([2, 3, 4]),
371            RoaringTombstoneSet::new(),
372        );
373
374        // Add tombstone for 2
375        y.as_reveal_mut().1.insert(2);
376
377        x.merge(y);
378
379        // Should have 1, 3, 4 (2 is tombstoned)
380        assert!(!x.as_reveal_ref().0.contains(&2));
381        assert!(x.as_reveal_ref().0.contains(&1));
382        assert!(x.as_reveal_ref().0.contains(&3));
383        assert!(x.as_reveal_ref().0.contains(&4));
384        assert!(x.as_reveal_ref().1.contains(&2));
385    }
386
387    #[test]
388    fn roaring_merge_efficiency() {
389        // Test that merging roaring bitmaps works correctly
390        let mut x = SetUnionWithTombstonesRoaring::new_from(
391            HashSet::from([1, 2, 3, 4, 5]),
392            RoaringTombstoneSet::new(),
393        );
394        x.as_reveal_mut().1.insert(10);
395        x.as_reveal_mut().1.insert(20);
396
397        let mut y = SetUnionWithTombstonesRoaring::new_from(
398            HashSet::from([6, 7, 8]),
399            RoaringTombstoneSet::new(),
400        );
401        y.as_reveal_mut().1.insert(30);
402        y.as_reveal_mut().1.insert(2); // Tombstone for 2
403
404        x.merge(y);
405
406        // Should have all tombstones
407        assert!(x.as_reveal_ref().1.contains(&10));
408        assert!(x.as_reveal_ref().1.contains(&20));
409        assert!(x.as_reveal_ref().1.contains(&30));
410        assert!(x.as_reveal_ref().1.contains(&2));
411
412        // Should not have 2 in the set
413        assert!(!x.as_reveal_ref().0.contains(&2));
414
415        // Should have all other items
416        assert!(x.as_reveal_ref().0.contains(&1));
417        assert!(x.as_reveal_ref().0.contains(&3));
418        assert!(x.as_reveal_ref().0.contains(&6));
419        assert!(x.as_reveal_ref().0.contains(&7));
420    }
421
422    #[test]
423    fn fst_string_basic() {
424        let mut x = SetUnionWithTombstonesFstString::new_from(
425            HashSet::from(["apple".to_owned(), "banana".to_owned(), "cherry".to_owned()]),
426            FstTombstoneSet::new(),
427        );
428        let mut y = SetUnionWithTombstonesFstString::new_from(
429            HashSet::from(["banana".to_owned(), "date".to_owned()]),
430            FstTombstoneSet::new(),
431        );
432
433        // Add tombstone for "banana"
434        y.as_reveal_mut().1.extend(["banana".to_owned()]);
435
436        x.merge(y);
437
438        // Should have apple, cherry, date (banana is tombstoned)
439        assert!(!x.as_reveal_ref().0.contains("banana"));
440        assert!(x.as_reveal_ref().0.contains("apple"));
441        assert!(x.as_reveal_ref().0.contains("cherry"));
442        assert!(x.as_reveal_ref().0.contains("date"));
443        assert!(x.as_reveal_ref().1.contains(b"banana"));
444    }
445
446    #[test]
447    fn fst_merge_efficiency() {
448        // Test that FST union works correctly with multiple tombstones
449        let mut x = SetUnionWithTombstonesFstString::new_from(
450            HashSet::from([
451                "a".to_owned(),
452                "b".to_owned(),
453                "c".to_owned(),
454                "d".to_owned(),
455            ]),
456            FstTombstoneSet::from_iter(["x".to_owned(), "y".to_owned()]),
457        );
458
459        let y = SetUnionWithTombstonesFstString::new_from(
460            HashSet::from(["e".to_owned(), "f".to_owned()]),
461            FstTombstoneSet::from_iter(["z".to_owned(), "b".to_owned()]),
462        );
463
464        x.merge(y);
465
466        // Should have all tombstones
467        assert!(x.as_reveal_ref().1.contains(b"x"));
468        assert!(x.as_reveal_ref().1.contains(b"y"));
469        assert!(x.as_reveal_ref().1.contains(b"z"));
470        assert!(x.as_reveal_ref().1.contains(b"b"));
471
472        // Should not have "b" in the set
473        assert!(!x.as_reveal_ref().0.contains("b"));
474
475        // Should have all other items
476        assert!(x.as_reveal_ref().0.contains("a"));
477        assert!(x.as_reveal_ref().0.contains("c"));
478        assert!(x.as_reveal_ref().0.contains("d"));
479        assert!(x.as_reveal_ref().0.contains("e"));
480        assert!(x.as_reveal_ref().0.contains("f"));
481    }
482
483    #[test]
484    fn roaring_empty_merge() {
485        // Test merging empty sets
486        let mut x =
487            SetUnionWithTombstonesRoaring::new_from(HashSet::new(), RoaringTombstoneSet::new());
488        let y = SetUnionWithTombstonesRoaring::new_from(HashSet::new(), RoaringTombstoneSet::new());
489
490        assert!(!x.merge(y)); // Should return false (no change)
491        assert!(x.as_reveal_ref().0.is_empty());
492        assert_eq!(x.as_reveal_ref().1.len(), 0);
493    }
494
495    #[test]
496    fn roaring_idempotency() {
497        // Test that merging the same data twice doesn't change the result
498        let mut x = SetUnionWithTombstonesRoaring::new_from(
499            HashSet::from([1u64, 2, 3]),
500            RoaringTombstoneSet::new(),
501        );
502        let y = SetUnionWithTombstonesRoaring::new_from(
503            HashSet::from([2u64, 3, 4]),
504            RoaringTombstoneSet::from_iter([2u64]),
505        );
506        let z = y.clone();
507
508        x.merge(y);
509        let first_result = x.clone();
510
511        x.merge(z);
512
513        // Should be identical after second merge
514        assert_eq!(x.as_reveal_ref().0, first_result.as_reveal_ref().0);
515        assert_eq!(
516            x.as_reveal_ref().1.len(),
517            first_result.as_reveal_ref().1.len()
518        );
519    }
520
521    #[test]
522    fn fst_commutativity() {
523        // Test that merge order doesn't matter
524        let a = SetUnionWithTombstonesFstString::new_from(
525            HashSet::from(["a".to_owned(), "b".to_owned()]),
526            FstTombstoneSet::from_iter(["x".to_owned()]),
527        );
528        let b = SetUnionWithTombstonesFstString::new_from(
529            HashSet::from(["c".to_owned(), "d".to_owned()]),
530            FstTombstoneSet::from_iter(["y".to_owned()]),
531        );
532
533        let mut x1 = a.clone();
534        let mut x2 = b.clone();
535
536        x1.merge(b);
537        x2.merge(a);
538
539        // Results should be the same regardless of merge order
540        assert_eq!(x1.as_reveal_ref().0, x2.as_reveal_ref().0);
541        assert_eq!(x1.as_reveal_ref().1.len(), x2.as_reveal_ref().1.len());
542    }
543}