Skip to main content

lattices/
set_union_with_tombstones.rs

1//! Module containing the [`SetUnionWithTombstones`] lattice and aliases for different datastructures.
2//!
3//! See [`crate::tombstone`] for documentation on choosing a tombstone implementation.
4
5#[cfg(feature = "alloc")]
6use alloc::collections::BTreeSet;
7use core::cmp::Ordering::{self, *};
8#[cfg(feature = "std")]
9use std::collections::HashSet;
10#[cfg(feature = "std")]
11use std::string::String;
12
13use cc_traits::{Collection, Get, Remove};
14
15use crate::cc_traits::{Iter, Len, Set};
16use crate::collections::{ArraySet, EmptySet, OptionSet, SingletonSet};
17#[cfg(feature = "std")]
18use crate::tombstone::{FstTombstoneSet, RoaringTombstoneSet, TombstoneSet};
19use crate::{IsBot, IsTop, LatticeFrom, LatticeOrd, Merge};
20
21/// Set-union lattice with tombstones.
22///
23/// When an item is deleted from the SetUnionWithTombstones, it is removed from `set` and added to `tombstones`.
24/// This also is an invariant, if an item appears in `tombstones` it must not also be in `set`.
25///
26/// Merging set-union lattices is done by unioning the keys of both the (set and tombstone) sets,
27/// and then performing `set` = `set` - `tombstones`, to preserve the above invariant.
28///
29/// This implementation with two separate sets means that the actual set implementation can be decided
30/// for both the regular set and the tombstone set. This enables efficient storage strategies like using
31/// [`crate::tombstone::RoaringTombstoneSet`] for tombstones (see [`SetUnionWithTombstonesRoaring`]), which provides space-efficient
32/// bitmap compression for the tombstone set while keeping the main set flexible.
33#[derive(Default, Clone, Debug)]
34pub struct SetUnionWithTombstones<Set, TombstoneSet> {
35    set: Set,
36    tombstones: TombstoneSet,
37}
38
39impl<Set, TombstoneSet> SetUnionWithTombstones<Set, TombstoneSet> {
40    /// Create a new `SetUnionWithTombstones` from a `Set` and `TombstoneSet`.
41    pub fn new(set: Set, tombstones: TombstoneSet) -> Self {
42        Self { set, tombstones }
43    }
44
45    /// Create a new `SetUnionWithTombstones` from an `Into<Set>` and an `Into<TombstonesSet>`.
46    pub fn new_from(set: impl Into<Set>, tombstones: impl Into<TombstoneSet>) -> Self {
47        Self::new(set.into(), tombstones.into())
48    }
49
50    /// Reveal the inner value as a shared reference.
51    pub fn as_reveal_ref(&self) -> (&Set, &TombstoneSet) {
52        (&self.set, &self.tombstones)
53    }
54
55    /// Reveal the inner value as an exclusive reference.
56    pub fn as_reveal_mut(&mut self) -> (&mut Set, &mut TombstoneSet) {
57        (&mut self.set, &mut self.tombstones)
58    }
59
60    /// Gets the inner by value, consuming self.
61    pub fn into_reveal(self) -> (Set, TombstoneSet) {
62        (self.set, self.tombstones)
63    }
64}
65
66// Merge implementation using TombstoneSet trait for optimized union operations
67#[cfg(feature = "std")]
68impl<Item, SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther>
69    Merge<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
70    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
71where
72    SetSelf: Extend<Item> + Len + for<'a> Remove<&'a Item>,
73    SetOther: IntoIterator<Item = Item>,
74    TombstoneSetSelf: TombstoneSet<Item>,
75    TombstoneSetOther: IntoIterator<Item = Item>,
76{
77    fn merge(&mut self, other: SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> bool {
78        let old_set_len = self.set.len();
79        let old_tombstones_len = self.tombstones.len();
80
81        // Merge other set into self, don't include anything deleted by the current tombstone set.
82        self.set.extend(
83            other
84                .set
85                .into_iter()
86                .filter(|x| !self.tombstones.contains(x)),
87        );
88
89        // Combine the tombstone sets. Also need to remove any items in the remote tombstone set that currently exist in the local set.
90        self.tombstones
91            .extend(other.tombstones.into_iter().inspect(|x| {
92                self.set.remove(x);
93            }));
94
95        // if either there are new items in the real set, or the tombstone set increased
96        old_set_len < self.set.len() || old_tombstones_len < self.tombstones.len()
97    }
98}
99
100impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
101    LatticeFrom<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
102    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
103where
104    SetSelf: FromIterator<Item>,
105    SetOther: IntoIterator<Item = Item>,
106    TombstoneSetSelf: FromIterator<Item>,
107    TombstoneSetOther: IntoIterator<Item = Item>,
108{
109    fn lattice_from(other: SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> Self {
110        Self {
111            set: other.set.into_iter().collect(),
112            tombstones: other.tombstones.into_iter().collect(),
113        }
114    }
115}
116
117impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
118    PartialOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
119    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
120where
121    SetSelf: Set<Item, Item = Item> + Iter,
122    SetOther: Set<Item, Item = Item> + Iter,
123    TombstoneSetSelf: Set<Item, Item = Item> + Iter,
124    TombstoneSetOther: Set<Item, Item = Item> + Iter,
125{
126    fn partial_cmp(
127        &self,
128        other: &SetUnionWithTombstones<SetOther, TombstoneSetOther>,
129    ) -> Option<Ordering> {
130        fn set_cmp<I, A, B>(a: &A, b: &B) -> Option<Ordering>
131        where
132            A: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
133            B: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
134        {
135            match a.len().cmp(&b.len()) {
136                Less => {
137                    if a.iter().all(|key| b.contains(&*key)) {
138                        Some(Less)
139                    } else {
140                        None
141                    }
142                }
143                Equal => {
144                    if a.iter().all(|key| b.contains(&*key)) {
145                        Some(Equal)
146                    } else {
147                        None
148                    }
149                }
150                Greater => {
151                    if b.iter().all(|key| a.contains(&*key)) {
152                        Some(Greater)
153                    } else {
154                        None
155                    }
156                }
157            }
158        }
159
160        fn set_cmp_filter<I, A, B, C, D>(a: &A, b: &B, f1: &C, f2: &D) -> Option<Ordering>
161        where
162            A: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
163            B: Collection<Item = I> + Iter + for<'a> Get<&'a I> + Len,
164            C: for<'a> Get<&'a I>,
165            D: for<'a> Get<&'a I>,
166        {
167            let is_a_greater_than_b = a
168                .iter()
169                .filter(|key| !f2.contains(key))
170                .any(|key| !b.contains(&*key));
171
172            let is_b_greater_than_a = b
173                .iter()
174                .filter(|key| !f1.contains(key))
175                .any(|key| !a.contains(&*key));
176
177            match (is_a_greater_than_b, is_b_greater_than_a) {
178                (true, true) => None,
179                (true, false) => Some(Greater),
180                (false, true) => Some(Less),
181                (false, false) => Some(Equal),
182            }
183        }
184
185        match set_cmp(&self.tombstones, &other.tombstones) {
186            Some(Less) => {
187                match set_cmp_filter(&self.set, &other.set, &self.tombstones, &other.tombstones) {
188                    Some(Greater) => None,
189                    Some(Less) => Some(Less),
190                    Some(Equal) => Some(Less),
191                    None => None,
192                }
193            }
194            Some(Equal) => set_cmp(&self.set, &other.set),
195            Some(Greater) => {
196                match set_cmp_filter(&self.set, &other.set, &self.tombstones, &other.tombstones) {
197                    Some(Greater) => Some(Greater),
198                    Some(Equal) => Some(Greater),
199                    Some(Less) => None,
200                    None => None,
201                }
202            }
203            None => None,
204        }
205    }
206}
207impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther>
208    LatticeOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
209    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
210where
211    Self: PartialOrd<SetUnionWithTombstones<SetOther, TombstoneSetOther>>,
212{
213}
214
215impl<SetSelf, TombstoneSetSelf, SetOther, TombstoneSetOther, Item>
216    PartialEq<SetUnionWithTombstones<SetOther, TombstoneSetOther>>
217    for SetUnionWithTombstones<SetSelf, TombstoneSetSelf>
218where
219    SetSelf: Set<Item, Item = Item> + Iter,
220    SetOther: Set<Item, Item = Item> + Iter,
221    TombstoneSetSelf: Set<Item, Item = Item> + Iter,
222    TombstoneSetOther: Set<Item, Item = Item> + Iter,
223{
224    fn eq(&self, other: &SetUnionWithTombstones<SetOther, TombstoneSetOther>) -> bool {
225        if self.set.len() != other.set.len() || self.tombstones.len() != other.tombstones.len() {
226            return false;
227        }
228
229        self.set.iter().all(|key| other.set.contains(&*key))
230            && self
231                .tombstones
232                .iter()
233                .all(|key| other.tombstones.contains(&*key))
234    }
235}
236impl<SetSelf, TombstoneSetSelf> Eq for SetUnionWithTombstones<SetSelf, TombstoneSetSelf> where
237    Self: PartialEq
238{
239}
240
241impl<Set, TombstoneSet> IsBot for SetUnionWithTombstones<Set, TombstoneSet>
242where
243    Set: Len,
244    TombstoneSet: Len,
245{
246    fn is_bot(&self) -> bool {
247        self.set.is_empty() && self.tombstones.is_empty()
248    }
249}
250
251impl<Set, TombstoneSet> IsTop for SetUnionWithTombstones<Set, TombstoneSet> {
252    fn is_top(&self) -> bool {
253        false
254    }
255}
256
257/// [`std::collections::HashSet`]-backed [`SetUnionWithTombstones`] lattice.
258#[cfg(feature = "std")]
259pub type SetUnionWithTombstonesHashSet<Item> = SetUnionWithTombstones<HashSet<Item>, HashSet<Item>>;
260
261/// [`std::collections::BTreeSet`]-backed [`SetUnionWithTombstones`] lattice.
262#[cfg(feature = "alloc")]
263pub type SetUnionWithTombstonesBTreeSet<Item> =
264    SetUnionWithTombstones<BTreeSet<Item>, BTreeSet<Item>>;
265
266/// [`Vec`](alloc::vec::Vec)-backed [`SetUnionWithTombstones`] lattice.
267#[cfg(feature = "alloc")]
268pub type SetUnionWithTombstonesVec<Item> =
269    SetUnionWithTombstones<alloc::vec::Vec<Item>, alloc::vec::Vec<Item>>;
270
271/// [`crate::collections::ArraySet`]-backed [`SetUnionWithTombstones`] lattice.
272pub type SetUnionWithTombstonesArray<Item, const N: usize> =
273    SetUnionWithTombstones<ArraySet<Item, N>, ArraySet<Item, N>>;
274
275/// [`crate::collections::SingletonSet`]-backed [`SetUnionWithTombstones`] lattice.
276pub type SetUnionWithTombstonesSingletonSet<Item> =
277    SetUnionWithTombstones<SingletonSet<Item>, SingletonSet<Item>>;
278
279/// [`Option`]-backed [`SetUnionWithTombstones`] lattice.
280pub type SetUnionWithTombstonesOptionSet<Item> =
281    SetUnionWithTombstones<OptionSet<Item>, OptionSet<Item>>;
282
283/// [`crate::collections::SingletonSet`]-backed [`SetUnionWithTombstones`] lattice.
284pub type SetUnionWithTombstonesTombstoneOnlySet<Item> =
285    SetUnionWithTombstones<EmptySet<Item>, SingletonSet<Item>>;
286
287/// [`crate::tombstone::RoaringTombstoneSet`]-backed tombstone set with [`std::collections::HashSet`] for the main set.
288/// Provides space-efficient tombstone storage for u64 integer keys.
289#[cfg(feature = "std")]
290pub type SetUnionWithTombstonesRoaring = SetUnionWithTombstones<HashSet<u64>, RoaringTombstoneSet>;
291
292/// FST-backed tombstone set with [`std::collections::HashSet`] for the main set.
293/// Provides space-efficient, collision-free tombstone storage for String keys.
294#[cfg(feature = "std")]
295pub type SetUnionWithTombstonesFstString =
296    SetUnionWithTombstones<HashSet<String>, FstTombstoneSet<String>>;
297
298#[cfg(test)]
299mod test {
300    use std::borrow::ToOwned;
301
302    use super::*;
303    use crate::test::check_all;
304
305    #[test]
306    fn delete_one() {
307        let mut x = SetUnionWithTombstonesHashSet::new_from([1], []);
308        let y = SetUnionWithTombstonesTombstoneOnlySet::new_from(EmptySet::default(), 1);
309
310        assert_eq!(x.partial_cmp(&y), Some(Less));
311
312        x.merge(y);
313
314        assert!(x.as_reveal_mut().1.contains(&1));
315    }
316
317    #[test]
318    fn test_specific_cases() {
319        assert_eq!(
320            SetUnionWithTombstonesHashSet::new_from([], [0])
321                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([0], [])),
322            Some(Greater),
323        );
324
325        assert_eq!(
326            SetUnionWithTombstonesHashSet::new_from([0], [1])
327                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
328            Some(Greater),
329        );
330
331        assert_eq!(
332            SetUnionWithTombstonesHashSet::new_from([], [0])
333                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
334            Some(Greater),
335        );
336
337        assert_eq!(
338            SetUnionWithTombstonesHashSet::new_from([], [0])
339                .partial_cmp(&SetUnionWithTombstonesHashSet::new_from([], [])),
340            Some(Greater),
341        );
342    }
343
344    #[test]
345    fn consistency() {
346        check_all(&[
347            SetUnionWithTombstonesHashSet::new_from([], []),
348            SetUnionWithTombstonesHashSet::new_from([0], []),
349            SetUnionWithTombstonesHashSet::new_from([], [0]),
350            SetUnionWithTombstonesHashSet::new_from([1], []),
351            SetUnionWithTombstonesHashSet::new_from([], [1]),
352            SetUnionWithTombstonesHashSet::new_from([0, 1], []),
353            SetUnionWithTombstonesHashSet::new_from([], [0, 1]),
354            SetUnionWithTombstonesHashSet::new_from([0], [1]),
355            SetUnionWithTombstonesHashSet::new_from([1], [0]),
356        ]);
357    }
358
359    #[test]
360    fn roaring_basic() {
361        let mut x = SetUnionWithTombstonesRoaring::new_from(
362            HashSet::from([1, 2, 3]),
363            RoaringTombstoneSet::new(),
364        );
365        let mut y = SetUnionWithTombstonesRoaring::new_from(
366            HashSet::from([2, 3, 4]),
367            RoaringTombstoneSet::new(),
368        );
369
370        // Add tombstone for 2
371        y.as_reveal_mut().1.insert(2);
372
373        x.merge(y);
374
375        // Should have 1, 3, 4 (2 is tombstoned)
376        assert!(!x.as_reveal_ref().0.contains(&2));
377        assert!(x.as_reveal_ref().0.contains(&1));
378        assert!(x.as_reveal_ref().0.contains(&3));
379        assert!(x.as_reveal_ref().0.contains(&4));
380        assert!(x.as_reveal_ref().1.contains(&2));
381    }
382
383    #[test]
384    fn roaring_merge_efficiency() {
385        // Test that merging roaring bitmaps works correctly
386        let mut x = SetUnionWithTombstonesRoaring::new_from(
387            HashSet::from([1, 2, 3, 4, 5]),
388            RoaringTombstoneSet::new(),
389        );
390        x.as_reveal_mut().1.insert(10);
391        x.as_reveal_mut().1.insert(20);
392
393        let mut y = SetUnionWithTombstonesRoaring::new_from(
394            HashSet::from([6, 7, 8]),
395            RoaringTombstoneSet::new(),
396        );
397        y.as_reveal_mut().1.insert(30);
398        y.as_reveal_mut().1.insert(2); // Tombstone for 2
399
400        x.merge(y);
401
402        // Should have all tombstones
403        assert!(x.as_reveal_ref().1.contains(&10));
404        assert!(x.as_reveal_ref().1.contains(&20));
405        assert!(x.as_reveal_ref().1.contains(&30));
406        assert!(x.as_reveal_ref().1.contains(&2));
407
408        // Should not have 2 in the set
409        assert!(!x.as_reveal_ref().0.contains(&2));
410
411        // Should have all other items
412        assert!(x.as_reveal_ref().0.contains(&1));
413        assert!(x.as_reveal_ref().0.contains(&3));
414        assert!(x.as_reveal_ref().0.contains(&6));
415        assert!(x.as_reveal_ref().0.contains(&7));
416    }
417
418    #[test]
419    fn fst_string_basic() {
420        let mut x = SetUnionWithTombstonesFstString::new_from(
421            HashSet::from(["apple".to_owned(), "banana".to_owned(), "cherry".to_owned()]),
422            FstTombstoneSet::new(),
423        );
424        let mut y = SetUnionWithTombstonesFstString::new_from(
425            HashSet::from(["banana".to_owned(), "date".to_owned()]),
426            FstTombstoneSet::new(),
427        );
428
429        // Add tombstone for "banana"
430        y.as_reveal_mut().1.extend(["banana".to_owned()]);
431
432        x.merge(y);
433
434        // Should have apple, cherry, date (banana is tombstoned)
435        assert!(!x.as_reveal_ref().0.contains("banana"));
436        assert!(x.as_reveal_ref().0.contains("apple"));
437        assert!(x.as_reveal_ref().0.contains("cherry"));
438        assert!(x.as_reveal_ref().0.contains("date"));
439        assert!(x.as_reveal_ref().1.contains(b"banana"));
440    }
441
442    #[test]
443    fn fst_merge_efficiency() {
444        // Test that FST union works correctly with multiple tombstones
445        let mut x = SetUnionWithTombstonesFstString::new_from(
446            HashSet::from([
447                "a".to_owned(),
448                "b".to_owned(),
449                "c".to_owned(),
450                "d".to_owned(),
451            ]),
452            FstTombstoneSet::from_iter(["x".to_owned(), "y".to_owned()]),
453        );
454
455        let y = SetUnionWithTombstonesFstString::new_from(
456            HashSet::from(["e".to_owned(), "f".to_owned()]),
457            FstTombstoneSet::from_iter(["z".to_owned(), "b".to_owned()]),
458        );
459
460        x.merge(y);
461
462        // Should have all tombstones
463        assert!(x.as_reveal_ref().1.contains(b"x"));
464        assert!(x.as_reveal_ref().1.contains(b"y"));
465        assert!(x.as_reveal_ref().1.contains(b"z"));
466        assert!(x.as_reveal_ref().1.contains(b"b"));
467
468        // Should not have "b" in the set
469        assert!(!x.as_reveal_ref().0.contains("b"));
470
471        // Should have all other items
472        assert!(x.as_reveal_ref().0.contains("a"));
473        assert!(x.as_reveal_ref().0.contains("c"));
474        assert!(x.as_reveal_ref().0.contains("d"));
475        assert!(x.as_reveal_ref().0.contains("e"));
476        assert!(x.as_reveal_ref().0.contains("f"));
477    }
478
479    #[test]
480    fn roaring_empty_merge() {
481        // Test merging empty sets
482        let mut x =
483            SetUnionWithTombstonesRoaring::new_from(HashSet::new(), RoaringTombstoneSet::new());
484        let y = SetUnionWithTombstonesRoaring::new_from(HashSet::new(), RoaringTombstoneSet::new());
485
486        assert!(!x.merge(y)); // Should return false (no change)
487        assert!(x.as_reveal_ref().0.is_empty());
488        assert_eq!(x.as_reveal_ref().1.len(), 0);
489    }
490
491    #[test]
492    fn roaring_idempotency() {
493        // Test that merging the same data twice doesn't change the result
494        let mut x = SetUnionWithTombstonesRoaring::new_from(
495            HashSet::from([1u64, 2, 3]),
496            RoaringTombstoneSet::new(),
497        );
498        let y = SetUnionWithTombstonesRoaring::new_from(
499            HashSet::from([2u64, 3, 4]),
500            RoaringTombstoneSet::from_iter([2u64]),
501        );
502        let z = y.clone();
503
504        x.merge(y);
505        let first_result = x.clone();
506
507        x.merge(z);
508
509        // Should be identical after second merge
510        assert_eq!(x.as_reveal_ref().0, first_result.as_reveal_ref().0);
511        assert_eq!(
512            x.as_reveal_ref().1.len(),
513            first_result.as_reveal_ref().1.len()
514        );
515    }
516
517    #[test]
518    fn fst_commutativity() {
519        // Test that merge order doesn't matter
520        let a = SetUnionWithTombstonesFstString::new_from(
521            HashSet::from(["a".to_owned(), "b".to_owned()]),
522            FstTombstoneSet::from_iter(["x".to_owned()]),
523        );
524        let b = SetUnionWithTombstonesFstString::new_from(
525            HashSet::from(["c".to_owned(), "d".to_owned()]),
526            FstTombstoneSet::from_iter(["y".to_owned()]),
527        );
528
529        let mut x1 = a.clone();
530        let mut x2 = b.clone();
531
532        x1.merge(b);
533        x2.merge(a);
534
535        // Results should be the same regardless of merge order
536        assert_eq!(x1.as_reveal_ref().0, x2.as_reveal_ref().0);
537        assert_eq!(x1.as_reveal_ref().1.len(), x2.as_reveal_ref().1.len());
538    }
539}