Skip to main content

lattices/
map_union_with_tombstones.rs

1//! Module containing the [`MapUnionWithTombstones`] lattice and aliases for different datastructures.
2//!
3//! See [`crate::tombstone`] for documentation on choosing a tombstone implementation.
4
5use core::cmp::Ordering::{self, *};
6use core::fmt::Debug;
7#[cfg(feature = "std")]
8use std::collections::{HashMap, HashSet};
9#[cfg(feature = "std")]
10use std::string::String;
11
12#[cfg(feature = "std")]
13use cc_traits::Remove;
14use cc_traits::{Get, Iter, Len};
15
16#[cfg(feature = "std")]
17use crate::Merge;
18#[cfg(feature = "std")]
19use crate::cc_traits::GetMut;
20use crate::cc_traits::{Keyed, Map, MapIter, SimpleKeyedRef};
21use crate::collections::{EmptyMap, EmptySet, SingletonMap, SingletonSet};
22#[cfg(feature = "std")]
23use crate::tombstone::{FstTombstoneSet, RoaringTombstoneSet, TombstoneSet};
24use crate::{IsBot, IsTop, LatticeFrom, LatticeOrd};
25
26/// Map-union-with-tombstones compound lattice.
27///
28/// When a key is deleted from the map-union-with-tombstones lattice, it is removed from the underlying `map` and placed into
29/// the `tombstones` set.
30///
31/// This forms the first invariant for this data structure. A key should appear either nowhere, in `map` or in `tombstones`.
32/// but never in `map` and `tombstones` at the same time.
33///
34/// merging is done by merging the underlying `map` and then merging the `tombstones` set, then doing `map` = `map` - `tombstones`.
35///
36/// The implementation of `tombstones` can be any set-like thing. This allows a user to plug in their own set-like implementation.
37/// For example, if the user knows that keys will be created and deleted strictly sequentially, then they could create a highly optimized set implementation
38/// which would just be a single integer, correpsonding to the current key value that the set is up to. Queries for keys below that integer would return true,
39/// queries for keys above it would return false.
40#[derive(Copy, Clone, Debug, Default)]
41#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
42pub struct MapUnionWithTombstones<Map, TombstoneSet> {
43    map: Map,
44    tombstones: TombstoneSet,
45}
46
47impl<Map, TombstoneSet> MapUnionWithTombstones<Map, TombstoneSet> {
48    /// Create a new `MapUnionWithTombstones` from a `Map` and a `TombstoneSet`.
49    pub fn new(map: Map, tombstones: TombstoneSet) -> Self {
50        Self { map, tombstones }
51    }
52
53    /// Create a new `MapUnionWithTombstones` from an `Into<Map>` and an `Into<TombstoneSet>`.
54    pub fn new_from(map: impl Into<Map>, tombstones: impl Into<TombstoneSet>) -> Self {
55        Self::new(map.into(), tombstones.into())
56    }
57
58    /// Reveal the inner value as a shared reference.
59    pub fn as_reveal_ref(&self) -> (&Map, &TombstoneSet) {
60        (&self.map, &self.tombstones)
61    }
62
63    /// Reveal the inner value as an exclusive reference.
64    pub fn as_reveal_mut(&mut self) -> (&mut Map, &mut TombstoneSet) {
65        (&mut self.map, &mut self.tombstones)
66    }
67
68    /// Gets the inner by value, consuming self.
69    pub fn into_reveal(self) -> (Map, TombstoneSet) {
70        (self.map, self.tombstones)
71    }
72}
73
74/// Merge implementation using TombstoneSet trait for optimized union operations
75#[cfg(feature = "std")]
76impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
77    Merge<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
78    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
79where
80    MapSelf: Keyed<Key = K, Item = ValSelf>
81        + Extend<(K, ValSelf)>
82        + for<'a> GetMut<&'a K, Item = ValSelf>
83        + for<'b> Remove<&'b K>,
84    MapOther: IntoIterator<Item = (K, ValOther)>,
85    ValSelf: Merge<ValOther> + LatticeFrom<ValOther>,
86    ValOther: IsBot,
87    TombstoneSetSelf: TombstoneSet<K>,
88    TombstoneSetOther: IntoIterator<Item = K>,
89{
90    fn merge(&mut self, other: MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> bool {
91        use alloc::vec::Vec;
92
93        let mut changed = false;
94
95        // Collect other.tombstones into a vec to avoid borrowing issues
96        let other_tombstones: Vec<_> = other.tombstones.into_iter().collect();
97        let old_tombstones_len = self.tombstones.len();
98
99        // This vec collect is needed to prevent simultaneous mut references `self.0.extend` and
100        // `self.0.get_mut`.
101        // TODO(mingwei): This could be fixed with a different structure, maybe some sort of
102        // `Collection` entry API.
103        let iter: Vec<_> = other
104            .map
105            .into_iter()
106            .filter(|(k_other, val_other)| {
107                !val_other.is_bot() && !self.tombstones.contains(k_other)
108            })
109            .filter_map(|(k_other, val_other)| {
110                match self.map.get_mut(&k_other) {
111                    // Key collision, merge into `self`.
112                    Some(mut val_self) => {
113                        changed |= val_self.merge(val_other);
114                        None
115                    }
116                    // New value, convert for extending.
117                    None => {
118                        changed = true;
119                        Some((k_other, ValSelf::lattice_from(val_other)))
120                    }
121                }
122            })
123            .collect();
124        self.map.extend(iter);
125
126        // Extend tombstones and remove tombstoned keys from the map
127        self.tombstones
128            .extend(other_tombstones.into_iter().inspect(|k| {
129                self.map.remove(k);
130            }));
131
132        if old_tombstones_len != self.tombstones.len() {
133            changed = true;
134        }
135
136        changed
137    }
138}
139
140impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
141    LatticeFrom<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
142    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
143where
144    MapSelf: Keyed<Key = K, Item = ValSelf> + FromIterator<(K, ValSelf)>,
145    MapOther: IntoIterator<Item = (K, ValOther)>,
146    ValSelf: LatticeFrom<ValOther>,
147    TombstoneSetSelf: FromIterator<K>,
148    TombstoneSetOther: IntoIterator<Item = K>,
149{
150    fn lattice_from(other: MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> Self {
151        Self {
152            map: other
153                .map
154                .into_iter()
155                .map(|(k_other, val_other)| (k_other, LatticeFrom::lattice_from(val_other)))
156                .collect(),
157            tombstones: other.tombstones.into_iter().collect(),
158        }
159    }
160}
161
162impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
163    PartialOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
164    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
165where
166    MapSelf: Map<K, ValSelf, Key = K, Item = ValSelf> + MapIter + SimpleKeyedRef,
167    MapOther: Map<K, ValOther, Key = K, Item = ValOther> + MapIter + SimpleKeyedRef,
168    ValSelf: PartialOrd<ValOther> + IsBot,
169    ValOther: IsBot,
170    TombstoneSetSelf: Len + Iter<Item = K> + for<'a> Get<&'a K>,
171    TombstoneSetOther: Len + Iter<Item = K> + for<'a> Get<&'a K>,
172{
173    fn partial_cmp(
174        &self,
175        other: &MapUnionWithTombstones<MapOther, TombstoneSetOther>,
176    ) -> Option<Ordering> {
177        let self_tombstones_greater = self
178            .tombstones
179            .iter()
180            .any(|k| !other.tombstones.contains(&*k));
181
182        let other_tombstones_greater = other
183            .tombstones
184            .iter()
185            .any(|k| !self.tombstones.contains(&*k));
186
187        if self_tombstones_greater && other_tombstones_greater {
188            return None;
189        }
190
191        let mut self_any_greater = false;
192        let mut other_any_greater = false;
193        let self_keys = self
194            .map
195            .iter()
196            .filter(|(k, v)| {
197                !v.is_bot() && !self.tombstones.contains(k) && !other.tombstones.contains(k)
198            })
199            .map(|(k, _v)| <MapSelf as SimpleKeyedRef>::into_ref(k));
200        let other_keys = other
201            .map
202            .iter()
203            .filter(|(k, v)| {
204                !v.is_bot() && !self.tombstones.contains(k) && !other.tombstones.contains(k)
205            })
206            .map(|(k, _v)| <MapOther as SimpleKeyedRef>::into_ref(k));
207
208        for k in self_keys.chain(other_keys) {
209            match (self.map.get(k), other.map.get(k)) {
210                (Some(self_value), Some(other_value)) => {
211                    match self_value.partial_cmp(&*other_value)? {
212                        Less => {
213                            other_any_greater = true;
214                        }
215                        Greater => {
216                            self_any_greater = true;
217                        }
218                        Equal => {}
219                    }
220                }
221                (Some(_), None) => {
222                    self_any_greater = true;
223                }
224                (None, Some(_)) => {
225                    other_any_greater = true;
226                }
227                (None, None) => unreachable!(),
228            }
229            if self_any_greater && other_any_greater {
230                return None;
231            }
232        }
233        match (
234            self_any_greater,
235            other_any_greater,
236            self_tombstones_greater,
237            other_tombstones_greater,
238        ) {
239            (false, false, false, false) => Some(Equal),
240
241            (false, false, true, false) => Some(Greater),
242            (false, false, false, true) => Some(Less),
243
244            (true, false, false, false) => Some(Greater),
245            (false, true, false, false) => Some(Less),
246
247            (true, false, true, false) => Some(Greater),
248            (false, true, false, true) => Some(Less),
249
250            (true, false, false, true) => None,
251            (false, true, true, false) => None,
252
253            (true, true, _, _) => unreachable!(),
254            (_, _, true, true) => unreachable!(),
255        }
256    }
257}
258impl<MapSelf, MapOther, TombstoneSetSelf, TombstoneSetOther>
259    LatticeOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
260    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
261where
262    Self: PartialOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>,
263{
264}
265
266impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
267    PartialEq<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
268    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
269where
270    MapSelf: Map<K, ValSelf, Key = K, Item = ValSelf> + MapIter + SimpleKeyedRef,
271    MapOther: Map<K, ValOther, Key = K, Item = ValOther> + MapIter + SimpleKeyedRef,
272    ValSelf: PartialEq<ValOther> + IsBot,
273    ValOther: IsBot,
274    TombstoneSetSelf: Len + Iter<Item = K> + for<'a> Get<&'a K>,
275    TombstoneSetOther: Len + Iter<Item = K> + for<'b> Get<&'b K>,
276{
277    fn eq(&self, other: &MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> bool {
278        if self.tombstones.len() != other.tombstones.len() {
279            return false;
280        }
281
282        if self
283            .tombstones
284            .iter()
285            .any(|k| !other.tombstones.contains(&*k))
286        {
287            return false;
288        }
289
290        if other
291            .tombstones
292            .iter()
293            .any(|k| !self.tombstones.contains(&*k))
294        {
295            return false;
296        }
297
298        let self_keys = self
299            .map
300            .iter()
301            .filter(|(_k, v)| !v.is_bot())
302            .map(|(k, _v)| <MapSelf as SimpleKeyedRef>::into_ref(k));
303        let other_keys = other
304            .map
305            .iter()
306            .filter(|(_k, v)| !v.is_bot())
307            .map(|(k, _v)| <MapOther as SimpleKeyedRef>::into_ref(k));
308        for k in self_keys.chain(other_keys) {
309            match (self.map.get(k), other.map.get(k)) {
310                (Some(self_value), Some(other_value)) => {
311                    if *self_value != *other_value {
312                        return false;
313                    }
314                }
315                (None, None) => unreachable!(),
316                _ => {
317                    return false;
318                }
319            }
320        }
321
322        true
323    }
324}
325impl<MapSelf, TombstoneSetSelf> Eq for MapUnionWithTombstones<MapSelf, TombstoneSetSelf> where
326    Self: PartialEq
327{
328}
329
330impl<Map, TombstoneSet> IsBot for MapUnionWithTombstones<Map, TombstoneSet>
331where
332    Map: Iter,
333    Map::Item: IsBot,
334    TombstoneSet: Len,
335{
336    fn is_bot(&self) -> bool {
337        self.map.iter().all(|v| v.is_bot()) && self.tombstones.is_empty()
338    }
339}
340
341impl<Map, TombstoneSet> IsTop for MapUnionWithTombstones<Map, TombstoneSet> {
342    fn is_top(&self) -> bool {
343        false
344    }
345}
346
347/// [`std::collections::HashMap`]-backed [`MapUnionWithTombstones`] lattice.
348#[cfg(feature = "std")]
349pub type MapUnionHashMapWithTombstoneHashSet<K, Val> =
350    MapUnionWithTombstones<HashMap<K, Val>, HashSet<K>>;
351
352/// [`crate::collections::SingletonMap`]-backed [`MapUnionWithTombstones`] lattice.
353pub type MapUnionWithTombstonesSingletonMapOnly<K, Val> =
354    MapUnionWithTombstones<SingletonMap<K, Val>, EmptySet<K>>;
355
356/// [`crate::collections::SingletonSet`]-backed [`MapUnionWithTombstones`] lattice.
357pub type MapUnionWithTombstonesTombstoneSingletonSetOnly<K, Val> =
358    MapUnionWithTombstones<EmptyMap<K, Val>, SingletonSet<K>>;
359
360/// [`crate::tombstone::RoaringTombstoneSet`]-backed tombstone set with [`HashMap`] for the main map.
361/// Provides space-efficient tombstone storage for u64 integer keys.
362#[cfg(feature = "std")]
363pub type MapUnionWithTombstonesRoaring<Val> =
364    MapUnionWithTombstones<HashMap<u64, Val>, RoaringTombstoneSet>;
365
366/// FST-backed tombstone set with [`HashMap`] for the main map.
367/// Provides space-efficient, collision-free tombstone storage for String keys.
368#[cfg(feature = "std")]
369pub type MapUnionWithTombstonesFstString<Val> =
370    MapUnionWithTombstones<HashMap<String, Val>, FstTombstoneSet<String>>;
371
372#[cfg(test)]
373mod test {
374    use std::borrow::ToOwned;
375
376    use super::*;
377    use crate::NaiveLatticeOrd;
378    use crate::set_union::{SetUnion, SetUnionHashSet, SetUnionSingletonSet};
379    use crate::test::check_all;
380
381    #[test]
382    fn test_map_union() {
383        type K = &'static str;
384        type V = usize;
385
386        type M = MapUnionWithTombstones<HashMap<K, SetUnionHashSet<V>>, HashSet<K>>;
387        type S = MapUnionWithTombstones<SingletonMap<K, SetUnionSingletonSet<V>>, EmptySet<K>>;
388        type T = MapUnionWithTombstones<EmptyMap<K, SetUnion<EmptySet<V>>>, SingletonSet<K>>;
389
390        let mut my_map_a = M::default();
391        let my_map_b = S::new(
392            SingletonMap("hello", SetUnion::new(SingletonSet(100))),
393            Default::default(),
394        );
395
396        let my_map_c = T::new(Default::default(), SingletonSet("hello"));
397
398        my_map_a.merge(my_map_b);
399        my_map_a.merge(my_map_c);
400
401        assert!(!my_map_a.as_reveal_ref().0.contains_key("hello"));
402    }
403
404    #[test]
405    fn contrain1() {
406        type T = MapUnionWithTombstones<HashMap<i32, SetUnion<HashSet<i32>>>, HashSet<i32>>;
407
408        let a = T::new_from([], HashSet::from_iter([0]));
409        let b = T::new_from(
410            [(0, SetUnionHashSet::new_from([0]))],
411            HashSet::from_iter([]),
412        );
413
414        assert_eq!(a.naive_cmp(&b), Some(Greater));
415        assert_eq!(a.partial_cmp(&b), Some(Greater));
416
417        let a = T::new_from([], HashSet::from_iter([1]));
418        let b = T::new_from([(0, SetUnionHashSet::new_from([0]))], HashSet::default());
419
420        assert_eq!(a.naive_cmp(&b), None);
421        assert_eq!(a.partial_cmp(&b), None);
422    }
423
424    #[cfg(feature = "alloc")]
425    #[test]
426    fn consistency() {
427        use alloc::vec::Vec;
428
429        type K = &'static str;
430        type V = SetUnion<HashSet<i32>>;
431
432        type M = MapUnionWithTombstones<HashMap<K, V>, HashSet<K>>;
433
434        let mut test_vec = Vec::new();
435
436        #[rustfmt::skip]
437        {
438            test_vec.push(M::new_from([], HashSet::from_iter([])));
439
440            test_vec.push(M::new_from([], HashSet::from_iter(["a"])));
441            test_vec.push(M::new_from([], HashSet::from_iter(["b"])));
442            test_vec.push(M::new_from([], HashSet::from_iter(["a", "b"])));
443
444            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([]))], HashSet::from_iter([])));
445            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([0]))], HashSet::from_iter([])));
446            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([1]))], HashSet::from_iter([])));
447            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([0, 1]))], HashSet::from_iter([])));
448
449            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([]))], HashSet::from_iter([])));
450            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([0]))], HashSet::from_iter([])));
451            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([1]))], HashSet::from_iter([])));
452            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([0, 1]))], HashSet::from_iter([])));
453        };
454
455        check_all(&test_vec);
456    }
457
458    /// Check that a key with a value of bottom is the same as an empty map, etc.
459    #[test]
460    fn test_collapses_bot() {
461        type K = &'static str;
462        type V = SetUnion<HashSet<i32>>;
463
464        type A = MapUnionWithTombstones<HashMap<K, V>, HashSet<K>>;
465        type B = MapUnionWithTombstones<SingletonMap<K, V>, HashSet<K>>;
466
467        let map_empty = A::default();
468
469        let map_a_bot = B::new(SingletonMap("a", Default::default()), Default::default());
470        let map_b_bot = B::new(SingletonMap("b", Default::default()), Default::default());
471
472        assert_eq!(map_empty, map_a_bot);
473        assert_eq!(map_empty, map_b_bot);
474        assert_eq!(map_a_bot, map_b_bot);
475    }
476
477    #[test]
478    fn roaring_u64_basic() {
479        let mut x = MapUnionWithTombstonesRoaring::new_from(
480            HashMap::from([
481                (1u64, SetUnionHashSet::new_from([10])),
482                (2, SetUnionHashSet::new_from([20])),
483            ]),
484            RoaringTombstoneSet::new(),
485        );
486        let mut y = MapUnionWithTombstonesRoaring::new_from(
487            HashMap::from([
488                (2u64, SetUnionHashSet::new_from([21])),
489                (3, SetUnionHashSet::new_from([30])),
490            ]),
491            RoaringTombstoneSet::new(),
492        );
493
494        // Add tombstone for key 2
495        y.as_reveal_mut().1.insert(2);
496
497        x.merge(y);
498
499        // Should have keys 1 and 3, but not 2 (tombstoned)
500        assert!(!x.as_reveal_ref().0.contains_key(&2));
501        assert!(x.as_reveal_ref().0.contains_key(&1));
502        assert!(x.as_reveal_ref().0.contains_key(&3));
503        assert!(x.as_reveal_ref().1.contains(&2));
504    }
505
506    #[test]
507    fn fst_string_basic() {
508        let mut x = MapUnionWithTombstonesFstString::new_from(
509            HashMap::from([
510                ("apple".to_owned(), SetUnionHashSet::new_from([1])),
511                ("banana".to_owned(), SetUnionHashSet::new_from([2])),
512            ]),
513            FstTombstoneSet::new(),
514        );
515        let mut y = MapUnionWithTombstonesFstString::new_from(
516            HashMap::from([
517                ("banana".to_owned(), SetUnionHashSet::new_from([3])),
518                ("cherry".to_owned(), SetUnionHashSet::new_from([4])),
519            ]),
520            FstTombstoneSet::new(),
521        );
522
523        // Add tombstone for "banana"
524        y.as_reveal_mut().1.extend(["banana".to_owned()]);
525
526        x.merge(y);
527
528        // Should have "apple" and "cherry", but not "banana" (tombstoned)
529        assert!(!x.as_reveal_ref().0.contains_key("banana"));
530        assert!(x.as_reveal_ref().0.contains_key("apple"));
531        assert!(x.as_reveal_ref().0.contains_key("cherry"));
532        assert!(x.as_reveal_ref().1.contains(b"banana"));
533    }
534
535    #[test]
536    fn roaring_merge_efficiency() {
537        // Test that merging roaring bitmaps works correctly
538        let mut x = MapUnionWithTombstonesRoaring::new_from(
539            HashMap::from([
540                (1u64, SetUnionHashSet::new_from([1])),
541                (2, SetUnionHashSet::new_from([2])),
542            ]),
543            RoaringTombstoneSet::from_iter([10u64, 20]),
544        );
545
546        let y = MapUnionWithTombstonesRoaring::new_from(
547            HashMap::from([(3u64, SetUnionHashSet::new_from([3]))]),
548            RoaringTombstoneSet::from_iter([30u64, 2]),
549        );
550
551        x.merge(y);
552
553        // Should have all tombstones
554        assert!(x.as_reveal_ref().1.contains(&10));
555        assert!(x.as_reveal_ref().1.contains(&20));
556        assert!(x.as_reveal_ref().1.contains(&30));
557        assert!(x.as_reveal_ref().1.contains(&2));
558
559        // Should not have key 2 in the map
560        assert!(!x.as_reveal_ref().0.contains_key(&2));
561
562        // Should have keys 1 and 3
563        assert!(x.as_reveal_ref().0.contains_key(&1));
564        assert!(x.as_reveal_ref().0.contains_key(&3));
565    }
566}