Skip to main content

lattices/
map_union_with_tombstones.rs

1//! Module containing the [`MapUnionWithTombstones`] lattice and aliases for different datastructures.
2//!
3//! See [`crate::tombstone`] for documentation on choosing a tombstone implementation.
4
5use core::cmp::Ordering::{self, *};
6use core::fmt::Debug;
7#[cfg(feature = "std")]
8use std::collections::{HashMap, HashSet};
9#[cfg(feature = "std")]
10use std::string::String;
11
12use cc_traits::{Get, Iter, Len, Remove};
13
14use crate::cc_traits::{GetMut, Keyed, Map, MapIter, SimpleKeyedRef};
15use crate::collections::{EmptyMap, EmptySet, SingletonMap, SingletonSet};
16#[cfg(feature = "std")]
17use crate::tombstone::{FstTombstoneSet, RoaringTombstoneSet, TombstoneSet};
18use crate::{IsBot, IsTop, LatticeFrom, LatticeOrd, Merge};
19
20/// Map-union-with-tombstones compound lattice.
21///
22/// When a key is deleted from the map-union-with-tombstones lattice, it is removed from the underlying `map` and placed into
23/// the `tombstones` set.
24///
25/// This forms the first invariant for this data structure. A key should appear either nowhere, in `map` or in `tombstones`.
26/// but never in `map` and `tombstones` at the same time.
27///
28/// merging is done by merging the underlying `map` and then merging the `tombstones` set, then doing `map` = `map` - `tombstones`.
29///
30/// The implementation of `tombstones` can be any set-like thing. This allows a user to plug in their own set-like implementation.
31/// For example, if the user knows that keys will be created and deleted strictly sequentially, then they could create a highly optimized set implementation
32/// which would just be a single integer, correpsonding to the current key value that the set is up to. Queries for keys below that integer would return true,
33/// queries for keys above it would return false.
34#[derive(Copy, Clone, Debug, Default)]
35#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
36pub struct MapUnionWithTombstones<Map, TombstoneSet> {
37    map: Map,
38    tombstones: TombstoneSet,
39}
40
41impl<Map, TombstoneSet> MapUnionWithTombstones<Map, TombstoneSet> {
42    /// Create a new `MapUnionWithTombstones` from a `Map` and a `TombstoneSet`.
43    pub fn new(map: Map, tombstones: TombstoneSet) -> Self {
44        Self { map, tombstones }
45    }
46
47    /// Create a new `MapUnionWithTombstones` from an `Into<Map>` and an `Into<TombstoneSet>`.
48    pub fn new_from(map: impl Into<Map>, tombstones: impl Into<TombstoneSet>) -> Self {
49        Self::new(map.into(), tombstones.into())
50    }
51
52    /// Reveal the inner value as a shared reference.
53    pub fn as_reveal_ref(&self) -> (&Map, &TombstoneSet) {
54        (&self.map, &self.tombstones)
55    }
56
57    /// Reveal the inner value as an exclusive reference.
58    pub fn as_reveal_mut(&mut self) -> (&mut Map, &mut TombstoneSet) {
59        (&mut self.map, &mut self.tombstones)
60    }
61
62    /// Gets the inner by value, consuming self.
63    pub fn into_reveal(self) -> (Map, TombstoneSet) {
64        (self.map, self.tombstones)
65    }
66}
67
68/// Merge implementation using TombstoneSet trait for optimized union operations
69#[cfg(feature = "std")]
70impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
71    Merge<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
72    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
73where
74    MapSelf: Keyed<Key = K, Item = ValSelf>
75        + Extend<(K, ValSelf)>
76        + for<'a> GetMut<&'a K, Item = ValSelf>
77        + for<'b> Remove<&'b K>,
78    MapOther: IntoIterator<Item = (K, ValOther)>,
79    ValSelf: Merge<ValOther> + LatticeFrom<ValOther>,
80    ValOther: IsBot,
81    TombstoneSetSelf: TombstoneSet<K>,
82    TombstoneSetOther: IntoIterator<Item = K>,
83{
84    fn merge(&mut self, other: MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> bool {
85        use alloc::vec::Vec;
86
87        let mut changed = false;
88
89        // Collect other.tombstones into a vec to avoid borrowing issues
90        let other_tombstones: Vec<_> = other.tombstones.into_iter().collect();
91        let old_tombstones_len = self.tombstones.len();
92
93        // This vec collect is needed to prevent simultaneous mut references `self.0.extend` and
94        // `self.0.get_mut`.
95        // TODO(mingwei): This could be fixed with a different structure, maybe some sort of
96        // `Collection` entry API.
97        let iter: Vec<_> = other
98            .map
99            .into_iter()
100            .filter(|(k_other, val_other)| {
101                !val_other.is_bot() && !self.tombstones.contains(k_other)
102            })
103            .filter_map(|(k_other, val_other)| {
104                match self.map.get_mut(&k_other) {
105                    // Key collision, merge into `self`.
106                    Some(mut val_self) => {
107                        changed |= val_self.merge(val_other);
108                        None
109                    }
110                    // New value, convert for extending.
111                    None => {
112                        changed = true;
113                        Some((k_other, ValSelf::lattice_from(val_other)))
114                    }
115                }
116            })
117            .collect();
118        self.map.extend(iter);
119
120        // Extend tombstones and remove tombstoned keys from the map
121        self.tombstones
122            .extend(other_tombstones.into_iter().inspect(|k| {
123                self.map.remove(k);
124            }));
125
126        if old_tombstones_len != self.tombstones.len() {
127            changed = true;
128        }
129
130        changed
131    }
132}
133
134impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
135    LatticeFrom<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
136    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
137where
138    MapSelf: Keyed<Key = K, Item = ValSelf> + FromIterator<(K, ValSelf)>,
139    MapOther: IntoIterator<Item = (K, ValOther)>,
140    ValSelf: LatticeFrom<ValOther>,
141    TombstoneSetSelf: FromIterator<K>,
142    TombstoneSetOther: IntoIterator<Item = K>,
143{
144    fn lattice_from(other: MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> Self {
145        Self {
146            map: other
147                .map
148                .into_iter()
149                .map(|(k_other, val_other)| (k_other, LatticeFrom::lattice_from(val_other)))
150                .collect(),
151            tombstones: other.tombstones.into_iter().collect(),
152        }
153    }
154}
155
156impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
157    PartialOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
158    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
159where
160    MapSelf: Map<K, ValSelf, Key = K, Item = ValSelf> + MapIter + SimpleKeyedRef,
161    MapOther: Map<K, ValOther, Key = K, Item = ValOther> + MapIter + SimpleKeyedRef,
162    ValSelf: PartialOrd<ValOther> + IsBot,
163    ValOther: IsBot,
164    TombstoneSetSelf: Len + Iter<Item = K> + for<'a> Get<&'a K>,
165    TombstoneSetOther: Len + Iter<Item = K> + for<'a> Get<&'a K>,
166{
167    fn partial_cmp(
168        &self,
169        other: &MapUnionWithTombstones<MapOther, TombstoneSetOther>,
170    ) -> Option<Ordering> {
171        let self_tombstones_greater = self
172            .tombstones
173            .iter()
174            .any(|k| !other.tombstones.contains(&*k));
175
176        let other_tombstones_greater = other
177            .tombstones
178            .iter()
179            .any(|k| !self.tombstones.contains(&*k));
180
181        if self_tombstones_greater && other_tombstones_greater {
182            return None;
183        }
184
185        let mut self_any_greater = false;
186        let mut other_any_greater = false;
187        let self_keys = self
188            .map
189            .iter()
190            .filter(|(k, v)| {
191                !v.is_bot() && !self.tombstones.contains(k) && !other.tombstones.contains(k)
192            })
193            .map(|(k, _v)| <MapSelf as SimpleKeyedRef>::into_ref(k));
194        let other_keys = other
195            .map
196            .iter()
197            .filter(|(k, v)| {
198                !v.is_bot() && !self.tombstones.contains(k) && !other.tombstones.contains(k)
199            })
200            .map(|(k, _v)| <MapOther as SimpleKeyedRef>::into_ref(k));
201
202        for k in self_keys.chain(other_keys) {
203            match (self.map.get(k), other.map.get(k)) {
204                (Some(self_value), Some(other_value)) => {
205                    match self_value.partial_cmp(&*other_value)? {
206                        Less => {
207                            other_any_greater = true;
208                        }
209                        Greater => {
210                            self_any_greater = true;
211                        }
212                        Equal => {}
213                    }
214                }
215                (Some(_), None) => {
216                    self_any_greater = true;
217                }
218                (None, Some(_)) => {
219                    other_any_greater = true;
220                }
221                (None, None) => unreachable!(),
222            }
223            if self_any_greater && other_any_greater {
224                return None;
225            }
226        }
227        match (
228            self_any_greater,
229            other_any_greater,
230            self_tombstones_greater,
231            other_tombstones_greater,
232        ) {
233            (false, false, false, false) => Some(Equal),
234
235            (false, false, true, false) => Some(Greater),
236            (false, false, false, true) => Some(Less),
237
238            (true, false, false, false) => Some(Greater),
239            (false, true, false, false) => Some(Less),
240
241            (true, false, true, false) => Some(Greater),
242            (false, true, false, true) => Some(Less),
243
244            (true, false, false, true) => None,
245            (false, true, true, false) => None,
246
247            (true, true, _, _) => unreachable!(),
248            (_, _, true, true) => unreachable!(),
249        }
250    }
251}
252impl<MapSelf, MapOther, TombstoneSetSelf, TombstoneSetOther>
253    LatticeOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
254    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
255where
256    Self: PartialOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>,
257{
258}
259
260impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
261    PartialEq<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
262    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
263where
264    MapSelf: Map<K, ValSelf, Key = K, Item = ValSelf> + MapIter + SimpleKeyedRef,
265    MapOther: Map<K, ValOther, Key = K, Item = ValOther> + MapIter + SimpleKeyedRef,
266    ValSelf: PartialEq<ValOther> + IsBot,
267    ValOther: IsBot,
268    TombstoneSetSelf: Len + Iter<Item = K> + for<'a> Get<&'a K>,
269    TombstoneSetOther: Len + Iter<Item = K> + for<'b> Get<&'b K>,
270{
271    fn eq(&self, other: &MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> bool {
272        if self.tombstones.len() != other.tombstones.len() {
273            return false;
274        }
275
276        if self
277            .tombstones
278            .iter()
279            .any(|k| !other.tombstones.contains(&*k))
280        {
281            return false;
282        }
283
284        if other
285            .tombstones
286            .iter()
287            .any(|k| !self.tombstones.contains(&*k))
288        {
289            return false;
290        }
291
292        let self_keys = self
293            .map
294            .iter()
295            .filter(|(_k, v)| !v.is_bot())
296            .map(|(k, _v)| <MapSelf as SimpleKeyedRef>::into_ref(k));
297        let other_keys = other
298            .map
299            .iter()
300            .filter(|(_k, v)| !v.is_bot())
301            .map(|(k, _v)| <MapOther as SimpleKeyedRef>::into_ref(k));
302        for k in self_keys.chain(other_keys) {
303            match (self.map.get(k), other.map.get(k)) {
304                (Some(self_value), Some(other_value)) => {
305                    if *self_value != *other_value {
306                        return false;
307                    }
308                }
309                (None, None) => unreachable!(),
310                _ => {
311                    return false;
312                }
313            }
314        }
315
316        true
317    }
318}
319impl<MapSelf, TombstoneSetSelf> Eq for MapUnionWithTombstones<MapSelf, TombstoneSetSelf> where
320    Self: PartialEq
321{
322}
323
324impl<Map, TombstoneSet> IsBot for MapUnionWithTombstones<Map, TombstoneSet>
325where
326    Map: Iter,
327    Map::Item: IsBot,
328    TombstoneSet: Len,
329{
330    fn is_bot(&self) -> bool {
331        self.map.iter().all(|v| v.is_bot()) && self.tombstones.is_empty()
332    }
333}
334
335impl<Map, TombstoneSet> IsTop for MapUnionWithTombstones<Map, TombstoneSet> {
336    fn is_top(&self) -> bool {
337        false
338    }
339}
340
341/// [`std::collections::HashMap`]-backed [`MapUnionWithTombstones`] lattice.
342#[cfg(feature = "std")]
343pub type MapUnionHashMapWithTombstoneHashSet<K, Val> =
344    MapUnionWithTombstones<HashMap<K, Val>, HashSet<K>>;
345
346/// [`crate::collections::SingletonMap`]-backed [`MapUnionWithTombstones`] lattice.
347pub type MapUnionWithTombstonesSingletonMapOnly<K, Val> =
348    MapUnionWithTombstones<SingletonMap<K, Val>, EmptySet<K>>;
349
350/// [`crate::collections::SingletonSet`]-backed [`MapUnionWithTombstones`] lattice.
351pub type MapUnionWithTombstonesTombstoneSingletonSetOnly<K, Val> =
352    MapUnionWithTombstones<EmptyMap<K, Val>, SingletonSet<K>>;
353
354/// [`crate::tombstone::RoaringTombstoneSet`]-backed tombstone set with [`HashMap`] for the main map.
355/// Provides space-efficient tombstone storage for u64 integer keys.
356#[cfg(feature = "std")]
357pub type MapUnionWithTombstonesRoaring<Val> =
358    MapUnionWithTombstones<HashMap<u64, Val>, RoaringTombstoneSet>;
359
360/// FST-backed tombstone set with [`HashMap`] for the main map.
361/// Provides space-efficient, collision-free tombstone storage for String keys.
362#[cfg(feature = "std")]
363pub type MapUnionWithTombstonesFstString<Val> =
364    MapUnionWithTombstones<HashMap<String, Val>, FstTombstoneSet<String>>;
365
366#[cfg(test)]
367mod test {
368    use std::borrow::ToOwned;
369
370    use super::*;
371    use crate::NaiveLatticeOrd;
372    use crate::set_union::{SetUnion, SetUnionHashSet, SetUnionSingletonSet};
373    use crate::test::check_all;
374
375    #[test]
376    fn test_map_union() {
377        type K = &'static str;
378        type V = usize;
379
380        type M = MapUnionWithTombstones<HashMap<K, SetUnionHashSet<V>>, HashSet<K>>;
381        type S = MapUnionWithTombstones<SingletonMap<K, SetUnionSingletonSet<V>>, EmptySet<K>>;
382        type T = MapUnionWithTombstones<EmptyMap<K, SetUnion<EmptySet<V>>>, SingletonSet<K>>;
383
384        let mut my_map_a = M::default();
385        let my_map_b = S::new(
386            SingletonMap("hello", SetUnion::new(SingletonSet(100))),
387            Default::default(),
388        );
389
390        let my_map_c = T::new(Default::default(), SingletonSet("hello"));
391
392        my_map_a.merge(my_map_b);
393        my_map_a.merge(my_map_c);
394
395        assert!(!my_map_a.as_reveal_ref().0.contains_key("hello"));
396    }
397
398    #[test]
399    fn contrain1() {
400        type T = MapUnionWithTombstones<HashMap<i32, SetUnion<HashSet<i32>>>, HashSet<i32>>;
401
402        let a = T::new_from([], HashSet::from_iter([0]));
403        let b = T::new_from(
404            [(0, SetUnionHashSet::new_from([0]))],
405            HashSet::from_iter([]),
406        );
407
408        assert_eq!(a.naive_cmp(&b), Some(Greater));
409        assert_eq!(a.partial_cmp(&b), Some(Greater));
410
411        let a = T::new_from([], HashSet::from_iter([1]));
412        let b = T::new_from([(0, SetUnionHashSet::new_from([0]))], HashSet::default());
413
414        assert_eq!(a.naive_cmp(&b), None);
415        assert_eq!(a.partial_cmp(&b), None);
416    }
417
418    #[cfg(feature = "alloc")]
419    #[test]
420    fn consistency() {
421        use alloc::vec::Vec;
422
423        type K = &'static str;
424        type V = SetUnion<HashSet<i32>>;
425
426        type M = MapUnionWithTombstones<HashMap<K, V>, HashSet<K>>;
427
428        let mut test_vec = Vec::new();
429
430        #[rustfmt::skip]
431        {
432            test_vec.push(M::new_from([], HashSet::from_iter([])));
433
434            test_vec.push(M::new_from([], HashSet::from_iter(["a"])));
435            test_vec.push(M::new_from([], HashSet::from_iter(["b"])));
436            test_vec.push(M::new_from([], HashSet::from_iter(["a", "b"])));
437
438            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([]))], HashSet::from_iter([])));
439            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([0]))], HashSet::from_iter([])));
440            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([1]))], HashSet::from_iter([])));
441            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([0, 1]))], HashSet::from_iter([])));
442
443            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([]))], HashSet::from_iter([])));
444            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([0]))], HashSet::from_iter([])));
445            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([1]))], HashSet::from_iter([])));
446            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([0, 1]))], HashSet::from_iter([])));
447        };
448
449        check_all(&test_vec);
450    }
451
452    /// Check that a key with a value of bottom is the same as an empty map, etc.
453    #[test]
454    fn test_collapses_bot() {
455        type K = &'static str;
456        type V = SetUnion<HashSet<i32>>;
457
458        type A = MapUnionWithTombstones<HashMap<K, V>, HashSet<K>>;
459        type B = MapUnionWithTombstones<SingletonMap<K, V>, HashSet<K>>;
460
461        let map_empty = A::default();
462
463        let map_a_bot = B::new(SingletonMap("a", Default::default()), Default::default());
464        let map_b_bot = B::new(SingletonMap("b", Default::default()), Default::default());
465
466        assert_eq!(map_empty, map_a_bot);
467        assert_eq!(map_empty, map_b_bot);
468        assert_eq!(map_a_bot, map_b_bot);
469    }
470
471    #[test]
472    fn roaring_u64_basic() {
473        let mut x = MapUnionWithTombstonesRoaring::new_from(
474            HashMap::from([
475                (1u64, SetUnionHashSet::new_from([10])),
476                (2, SetUnionHashSet::new_from([20])),
477            ]),
478            RoaringTombstoneSet::new(),
479        );
480        let mut y = MapUnionWithTombstonesRoaring::new_from(
481            HashMap::from([
482                (2u64, SetUnionHashSet::new_from([21])),
483                (3, SetUnionHashSet::new_from([30])),
484            ]),
485            RoaringTombstoneSet::new(),
486        );
487
488        // Add tombstone for key 2
489        y.as_reveal_mut().1.insert(2);
490
491        x.merge(y);
492
493        // Should have keys 1 and 3, but not 2 (tombstoned)
494        assert!(!x.as_reveal_ref().0.contains_key(&2));
495        assert!(x.as_reveal_ref().0.contains_key(&1));
496        assert!(x.as_reveal_ref().0.contains_key(&3));
497        assert!(x.as_reveal_ref().1.contains(&2));
498    }
499
500    #[test]
501    fn fst_string_basic() {
502        let mut x = MapUnionWithTombstonesFstString::new_from(
503            HashMap::from([
504                ("apple".to_owned(), SetUnionHashSet::new_from([1])),
505                ("banana".to_owned(), SetUnionHashSet::new_from([2])),
506            ]),
507            FstTombstoneSet::new(),
508        );
509        let mut y = MapUnionWithTombstonesFstString::new_from(
510            HashMap::from([
511                ("banana".to_owned(), SetUnionHashSet::new_from([3])),
512                ("cherry".to_owned(), SetUnionHashSet::new_from([4])),
513            ]),
514            FstTombstoneSet::new(),
515        );
516
517        // Add tombstone for "banana"
518        y.as_reveal_mut().1.extend(["banana".to_owned()]);
519
520        x.merge(y);
521
522        // Should have "apple" and "cherry", but not "banana" (tombstoned)
523        assert!(!x.as_reveal_ref().0.contains_key("banana"));
524        assert!(x.as_reveal_ref().0.contains_key("apple"));
525        assert!(x.as_reveal_ref().0.contains_key("cherry"));
526        assert!(x.as_reveal_ref().1.contains(b"banana"));
527    }
528
529    #[test]
530    fn roaring_merge_efficiency() {
531        // Test that merging roaring bitmaps works correctly
532        let mut x = MapUnionWithTombstonesRoaring::new_from(
533            HashMap::from([
534                (1u64, SetUnionHashSet::new_from([1])),
535                (2, SetUnionHashSet::new_from([2])),
536            ]),
537            RoaringTombstoneSet::from_iter([10u64, 20]),
538        );
539
540        let y = MapUnionWithTombstonesRoaring::new_from(
541            HashMap::from([(3u64, SetUnionHashSet::new_from([3]))]),
542            RoaringTombstoneSet::from_iter([30u64, 2]),
543        );
544
545        x.merge(y);
546
547        // Should have all tombstones
548        assert!(x.as_reveal_ref().1.contains(&10));
549        assert!(x.as_reveal_ref().1.contains(&20));
550        assert!(x.as_reveal_ref().1.contains(&30));
551        assert!(x.as_reveal_ref().1.contains(&2));
552
553        // Should not have key 2 in the map
554        assert!(!x.as_reveal_ref().0.contains_key(&2));
555
556        // Should have keys 1 and 3
557        assert!(x.as_reveal_ref().0.contains_key(&1));
558        assert!(x.as_reveal_ref().0.contains_key(&3));
559    }
560}