lattices/
map_union_with_tombstones.rs

1//! Module containing the [`MapUnionWithTombstones`] lattice and aliases for different datastructures.
2//!
3//! See [`crate::tombstone`] for documentation on choosing a tombstone implementation.
4
5use std::cmp::Ordering::{self, *};
6use std::collections::{HashMap, HashSet};
7use std::fmt::Debug;
8
9use cc_traits::{Get, Iter, Len, Remove};
10
11use crate::cc_traits::{GetMut, Keyed, Map, MapIter, SimpleKeyedRef};
12use crate::collections::{EmptyMap, EmptySet, SingletonMap, SingletonSet};
13use crate::tombstone::{FstTombstoneSet, RoaringTombstoneSet, TombstoneSet};
14use crate::{IsBot, IsTop, LatticeFrom, LatticeOrd, Merge};
15
16/// Map-union-with-tombstones compound lattice.
17///
18/// When a key is deleted from the map-union-with-tombstones lattice, it is removed from the underlying `map` and placed into
19/// the `tombstones` set.
20///
21/// This forms the first invariant for this data structure. A key should appear either nowhere, in `map` or in `tombstones`.
22/// but never in `map` and `tombstones` at the same time.
23///
24/// merging is done by merging the underlying `map` and then merging the `tombstones` set, then doing `map` = `map` - `tombstones`.
25///
26/// The implementation of `tombstones` can be any set-like thing. This allows a user to plug in their own set-like implementation.
27/// For example, if the user knows that keys will be created and deleted strictly sequentially, then they could create a highly optimized set implementation
28/// which would just be a single integer, correpsonding to the current key value that the set is up to. Queries for keys below that integer would return true,
29/// queries for keys above it would return false.
30#[derive(Copy, Clone, Debug, Default)]
31#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
32pub struct MapUnionWithTombstones<Map, TombstoneSet> {
33    map: Map,
34    tombstones: TombstoneSet,
35}
36
37impl<Map, TombstoneSet> MapUnionWithTombstones<Map, TombstoneSet> {
38    /// Create a new `MapUnionWithTombstones` from a `Map` and a `TombstoneSet`.
39    pub fn new(map: Map, tombstones: TombstoneSet) -> Self {
40        Self { map, tombstones }
41    }
42
43    /// Create a new `MapUnionWithTombstones` from an `Into<Map>` and an `Into<TombstoneSet>`.
44    pub fn new_from(map: impl Into<Map>, tombstones: impl Into<TombstoneSet>) -> Self {
45        Self::new(map.into(), tombstones.into())
46    }
47
48    /// Reveal the inner value as a shared reference.
49    pub fn as_reveal_ref(&self) -> (&Map, &TombstoneSet) {
50        (&self.map, &self.tombstones)
51    }
52
53    /// Reveal the inner value as an exclusive reference.
54    pub fn as_reveal_mut(&mut self) -> (&mut Map, &mut TombstoneSet) {
55        (&mut self.map, &mut self.tombstones)
56    }
57
58    /// Gets the inner by value, consuming self.
59    pub fn into_reveal(self) -> (Map, TombstoneSet) {
60        (self.map, self.tombstones)
61    }
62}
63
64// Merge implementation using TombstoneSet trait for optimized union operations
65impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
66    Merge<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
67    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
68where
69    MapSelf: Keyed<Key = K, Item = ValSelf>
70        + Extend<(K, ValSelf)>
71        + for<'a> GetMut<&'a K, Item = ValSelf>
72        + for<'b> Remove<&'b K>,
73    MapOther: IntoIterator<Item = (K, ValOther)>,
74    ValSelf: Merge<ValOther> + LatticeFrom<ValOther>,
75    ValOther: IsBot,
76    TombstoneSetSelf: TombstoneSet<K>,
77    TombstoneSetOther: IntoIterator<Item = K>,
78{
79    fn merge(&mut self, other: MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> bool {
80        let mut changed = false;
81
82        // Collect other.tombstones into a vec to avoid borrowing issues
83        let other_tombstones: Vec<_> = other.tombstones.into_iter().collect();
84        let old_tombstones_len = self.tombstones.len();
85
86        // This vec collect is needed to prevent simultaneous mut references `self.0.extend` and
87        // `self.0.get_mut`.
88        // TODO(mingwei): This could be fixed with a different structure, maybe some sort of
89        // `Collection` entry API.
90        let iter: Vec<_> = other
91            .map
92            .into_iter()
93            .filter(|(k_other, val_other)| {
94                !val_other.is_bot() && !self.tombstones.contains(k_other)
95            })
96            .filter_map(|(k_other, val_other)| {
97                match self.map.get_mut(&k_other) {
98                    // Key collision, merge into `self`.
99                    Some(mut val_self) => {
100                        changed |= val_self.merge(val_other);
101                        None
102                    }
103                    // New value, convert for extending.
104                    None => {
105                        changed = true;
106                        Some((k_other, ValSelf::lattice_from(val_other)))
107                    }
108                }
109            })
110            .collect();
111        self.map.extend(iter);
112
113        // Extend tombstones and remove tombstoned keys from the map
114        self.tombstones
115            .extend(other_tombstones.into_iter().inspect(|k| {
116                self.map.remove(k);
117            }));
118
119        if old_tombstones_len != self.tombstones.len() {
120            changed = true;
121        }
122
123        changed
124    }
125}
126
127impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
128    LatticeFrom<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
129    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
130where
131    MapSelf: Keyed<Key = K, Item = ValSelf> + FromIterator<(K, ValSelf)>,
132    MapOther: IntoIterator<Item = (K, ValOther)>,
133    ValSelf: LatticeFrom<ValOther>,
134    TombstoneSetSelf: FromIterator<K>,
135    TombstoneSetOther: IntoIterator<Item = K>,
136{
137    fn lattice_from(other: MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> Self {
138        Self {
139            map: other
140                .map
141                .into_iter()
142                .map(|(k_other, val_other)| (k_other, LatticeFrom::lattice_from(val_other)))
143                .collect(),
144            tombstones: other.tombstones.into_iter().collect(),
145        }
146    }
147}
148
149impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
150    PartialOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
151    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
152where
153    MapSelf: Map<K, ValSelf, Key = K, Item = ValSelf> + MapIter + SimpleKeyedRef,
154    MapOther: Map<K, ValOther, Key = K, Item = ValOther> + MapIter + SimpleKeyedRef,
155    ValSelf: PartialOrd<ValOther> + IsBot,
156    ValOther: IsBot,
157    TombstoneSetSelf: Len + Iter<Item = K> + for<'a> Get<&'a K>,
158    TombstoneSetOther: Len + Iter<Item = K> + for<'a> Get<&'a K>,
159{
160    fn partial_cmp(
161        &self,
162        other: &MapUnionWithTombstones<MapOther, TombstoneSetOther>,
163    ) -> Option<Ordering> {
164        let self_tombstones_greater = self
165            .tombstones
166            .iter()
167            .any(|k| !other.tombstones.contains(&*k));
168
169        let other_tombstones_greater = other
170            .tombstones
171            .iter()
172            .any(|k| !self.tombstones.contains(&*k));
173
174        if self_tombstones_greater && other_tombstones_greater {
175            return None;
176        }
177
178        let mut self_any_greater = false;
179        let mut other_any_greater = false;
180        let self_keys = self
181            .map
182            .iter()
183            .filter(|(k, v)| {
184                !v.is_bot() && !self.tombstones.contains(k) && !other.tombstones.contains(k)
185            })
186            .map(|(k, _v)| <MapSelf as SimpleKeyedRef>::into_ref(k));
187        let other_keys = other
188            .map
189            .iter()
190            .filter(|(k, v)| {
191                !v.is_bot() && !self.tombstones.contains(k) && !other.tombstones.contains(k)
192            })
193            .map(|(k, _v)| <MapOther as SimpleKeyedRef>::into_ref(k));
194
195        for k in self_keys.chain(other_keys) {
196            match (self.map.get(k), other.map.get(k)) {
197                (Some(self_value), Some(other_value)) => {
198                    match self_value.partial_cmp(&*other_value)? {
199                        Less => {
200                            other_any_greater = true;
201                        }
202                        Greater => {
203                            self_any_greater = true;
204                        }
205                        Equal => {}
206                    }
207                }
208                (Some(_), None) => {
209                    self_any_greater = true;
210                }
211                (None, Some(_)) => {
212                    other_any_greater = true;
213                }
214                (None, None) => unreachable!(),
215            }
216            if self_any_greater && other_any_greater {
217                return None;
218            }
219        }
220        match (
221            self_any_greater,
222            other_any_greater,
223            self_tombstones_greater,
224            other_tombstones_greater,
225        ) {
226            (false, false, false, false) => Some(Equal),
227
228            (false, false, true, false) => Some(Greater),
229            (false, false, false, true) => Some(Less),
230
231            (true, false, false, false) => Some(Greater),
232            (false, true, false, false) => Some(Less),
233
234            (true, false, true, false) => Some(Greater),
235            (false, true, false, true) => Some(Less),
236
237            (true, false, false, true) => None,
238            (false, true, true, false) => None,
239
240            (true, true, _, _) => unreachable!(),
241            (_, _, true, true) => unreachable!(),
242        }
243    }
244}
245impl<MapSelf, MapOther, TombstoneSetSelf, TombstoneSetOther>
246    LatticeOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
247    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
248where
249    Self: PartialOrd<MapUnionWithTombstones<MapOther, TombstoneSetOther>>,
250{
251}
252
253impl<MapSelf, MapOther, K, ValSelf, ValOther, TombstoneSetSelf, TombstoneSetOther>
254    PartialEq<MapUnionWithTombstones<MapOther, TombstoneSetOther>>
255    for MapUnionWithTombstones<MapSelf, TombstoneSetSelf>
256where
257    MapSelf: Map<K, ValSelf, Key = K, Item = ValSelf> + MapIter + SimpleKeyedRef,
258    MapOther: Map<K, ValOther, Key = K, Item = ValOther> + MapIter + SimpleKeyedRef,
259    ValSelf: PartialEq<ValOther> + IsBot,
260    ValOther: IsBot,
261    TombstoneSetSelf: Len + Iter<Item = K> + for<'a> Get<&'a K>,
262    TombstoneSetOther: Len + Iter<Item = K> + for<'b> Get<&'b K>,
263{
264    fn eq(&self, other: &MapUnionWithTombstones<MapOther, TombstoneSetOther>) -> bool {
265        if self.tombstones.len() != other.tombstones.len() {
266            return false;
267        }
268
269        if self
270            .tombstones
271            .iter()
272            .any(|k| !other.tombstones.contains(&*k))
273        {
274            return false;
275        }
276
277        if other
278            .tombstones
279            .iter()
280            .any(|k| !self.tombstones.contains(&*k))
281        {
282            return false;
283        }
284
285        let self_keys = self
286            .map
287            .iter()
288            .filter(|(_k, v)| !v.is_bot())
289            .map(|(k, _v)| <MapSelf as SimpleKeyedRef>::into_ref(k));
290        let other_keys = other
291            .map
292            .iter()
293            .filter(|(_k, v)| !v.is_bot())
294            .map(|(k, _v)| <MapOther as SimpleKeyedRef>::into_ref(k));
295        for k in self_keys.chain(other_keys) {
296            match (self.map.get(k), other.map.get(k)) {
297                (Some(self_value), Some(other_value)) => {
298                    if *self_value != *other_value {
299                        return false;
300                    }
301                }
302                (None, None) => unreachable!(),
303                _ => {
304                    return false;
305                }
306            }
307        }
308
309        true
310    }
311}
312impl<MapSelf, TombstoneSetSelf> Eq for MapUnionWithTombstones<MapSelf, TombstoneSetSelf> where
313    Self: PartialEq
314{
315}
316
317impl<Map, TombstoneSet> IsBot for MapUnionWithTombstones<Map, TombstoneSet>
318where
319    Map: Iter,
320    Map::Item: IsBot,
321    TombstoneSet: Len,
322{
323    fn is_bot(&self) -> bool {
324        self.map.iter().all(|v| v.is_bot()) && self.tombstones.is_empty()
325    }
326}
327
328impl<Map, TombstoneSet> IsTop for MapUnionWithTombstones<Map, TombstoneSet> {
329    fn is_top(&self) -> bool {
330        false
331    }
332}
333
334/// [`std::collections::HashMap`]-backed [`MapUnionWithTombstones`] lattice.
335pub type MapUnionHashMapWithTombstoneHashSet<K, Val> =
336    MapUnionWithTombstones<HashMap<K, Val>, HashSet<K>>;
337
338/// [`crate::collections::SingletonMap`]-backed [`MapUnionWithTombstones`] lattice.
339pub type MapUnionWithTombstonesSingletonMapOnly<K, Val> =
340    MapUnionWithTombstones<SingletonMap<K, Val>, EmptySet<K>>;
341
342/// [`crate::collections::SingletonSet`]-backed [`MapUnionWithTombstones`] lattice.
343pub type MapUnionWithTombstonesTombstoneSingletonSetOnly<K, Val> =
344    MapUnionWithTombstones<EmptyMap<K, Val>, SingletonSet<K>>;
345
346/// [`crate::tombstone::RoaringTombstoneSet`]-backed tombstone set with [`HashMap`] for the main map.
347/// Provides space-efficient tombstone storage for u64 integer keys.
348pub type MapUnionWithTombstonesRoaring<Val> =
349    MapUnionWithTombstones<HashMap<u64, Val>, RoaringTombstoneSet>;
350
351/// FST-backed tombstone set with [`HashMap`] for the main map.
352/// Provides space-efficient, collision-free tombstone storage for String keys.
353pub type MapUnionWithTombstonesFstString<Val> =
354    MapUnionWithTombstones<HashMap<String, Val>, FstTombstoneSet<String>>;
355
356#[cfg(test)]
357mod test {
358    use super::*;
359    use crate::NaiveLatticeOrd;
360    use crate::set_union::{SetUnion, SetUnionHashSet, SetUnionSingletonSet};
361    use crate::test::check_all;
362
363    #[test]
364    fn test_map_union() {
365        type K = &'static str;
366        type V = usize;
367
368        type M = MapUnionWithTombstones<HashMap<K, SetUnionHashSet<V>>, HashSet<K>>;
369        type S = MapUnionWithTombstones<SingletonMap<K, SetUnionSingletonSet<V>>, EmptySet<K>>;
370        type T = MapUnionWithTombstones<EmptyMap<K, SetUnion<EmptySet<V>>>, SingletonSet<K>>;
371
372        let mut my_map_a = M::default();
373        let my_map_b = S::new(
374            SingletonMap("hello", SetUnion::new(SingletonSet(100))),
375            Default::default(),
376        );
377
378        let my_map_c = T::new(Default::default(), SingletonSet("hello"));
379
380        my_map_a.merge(my_map_b);
381        my_map_a.merge(my_map_c);
382
383        assert!(!my_map_a.as_reveal_ref().0.contains_key("hello"));
384    }
385
386    #[test]
387    fn contrain1() {
388        type T = MapUnionWithTombstones<HashMap<i32, SetUnion<HashSet<i32>>>, HashSet<i32>>;
389
390        let a = T::new_from([], HashSet::from_iter([0]));
391        let b = T::new_from(
392            [(0, SetUnionHashSet::new_from([0]))],
393            HashSet::from_iter([]),
394        );
395
396        assert_eq!(a.naive_cmp(&b), Some(Greater));
397        assert_eq!(a.partial_cmp(&b), Some(Greater));
398
399        let a = T::new_from([], HashSet::from_iter([1]));
400        let b = T::new_from([(0, SetUnionHashSet::new_from([0]))], HashSet::default());
401
402        assert_eq!(a.naive_cmp(&b), None);
403        assert_eq!(a.partial_cmp(&b), None);
404    }
405
406    #[test]
407    fn consistency() {
408        type K = &'static str;
409        type V = SetUnion<HashSet<i32>>;
410
411        type M = MapUnionWithTombstones<HashMap<K, V>, HashSet<K>>;
412
413        let mut test_vec = Vec::new();
414
415        #[rustfmt::skip]
416        {
417            test_vec.push(M::new_from([], HashSet::from_iter([])));
418
419            test_vec.push(M::new_from([], HashSet::from_iter(["a"])));
420            test_vec.push(M::new_from([], HashSet::from_iter(["b"])));
421            test_vec.push(M::new_from([], HashSet::from_iter(["a", "b"])));
422
423            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([]))], HashSet::from_iter([])));
424            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([0]))], HashSet::from_iter([])));
425            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([1]))], HashSet::from_iter([])));
426            test_vec.push(M::new_from([("a", SetUnionHashSet::new_from([0, 1]))], HashSet::from_iter([])));
427
428            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([]))], HashSet::from_iter([])));
429            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([0]))], HashSet::from_iter([])));
430            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([1]))], HashSet::from_iter([])));
431            test_vec.push(M::new_from([("b", SetUnionHashSet::new_from([0, 1]))], HashSet::from_iter([])));
432        };
433
434        check_all(&test_vec);
435    }
436
437    /// Check that a key with a value of bottom is the same as an empty map, etc.
438    #[test]
439    fn test_collapses_bot() {
440        type K = &'static str;
441        type V = SetUnion<HashSet<i32>>;
442
443        type A = MapUnionWithTombstones<HashMap<K, V>, HashSet<K>>;
444        type B = MapUnionWithTombstones<SingletonMap<K, V>, HashSet<K>>;
445
446        let map_empty = A::default();
447
448        let map_a_bot = B::new(SingletonMap("a", Default::default()), Default::default());
449        let map_b_bot = B::new(SingletonMap("b", Default::default()), Default::default());
450
451        assert_eq!(map_empty, map_a_bot);
452        assert_eq!(map_empty, map_b_bot);
453        assert_eq!(map_a_bot, map_b_bot);
454    }
455
456    #[test]
457    fn roaring_u64_basic() {
458        let mut x = MapUnionWithTombstonesRoaring::new_from(
459            HashMap::from([
460                (1u64, SetUnionHashSet::new_from([10])),
461                (2, SetUnionHashSet::new_from([20])),
462            ]),
463            RoaringTombstoneSet::new(),
464        );
465        let mut y = MapUnionWithTombstonesRoaring::new_from(
466            HashMap::from([
467                (2u64, SetUnionHashSet::new_from([21])),
468                (3, SetUnionHashSet::new_from([30])),
469            ]),
470            RoaringTombstoneSet::new(),
471        );
472
473        // Add tombstone for key 2
474        y.as_reveal_mut().1.insert(2);
475
476        x.merge(y);
477
478        // Should have keys 1 and 3, but not 2 (tombstoned)
479        assert!(!x.as_reveal_ref().0.contains_key(&2));
480        assert!(x.as_reveal_ref().0.contains_key(&1));
481        assert!(x.as_reveal_ref().0.contains_key(&3));
482        assert!(x.as_reveal_ref().1.contains(&2));
483    }
484
485    #[test]
486    fn fst_string_basic() {
487        let mut x = MapUnionWithTombstonesFstString::new_from(
488            HashMap::from([
489                ("apple".to_string(), SetUnionHashSet::new_from([1])),
490                ("banana".to_string(), SetUnionHashSet::new_from([2])),
491            ]),
492            FstTombstoneSet::new(),
493        );
494        let mut y = MapUnionWithTombstonesFstString::new_from(
495            HashMap::from([
496                ("banana".to_string(), SetUnionHashSet::new_from([3])),
497                ("cherry".to_string(), SetUnionHashSet::new_from([4])),
498            ]),
499            FstTombstoneSet::new(),
500        );
501
502        // Add tombstone for "banana"
503        y.as_reveal_mut().1.extend(vec!["banana".to_string()]);
504
505        x.merge(y);
506
507        // Should have "apple" and "cherry", but not "banana" (tombstoned)
508        assert!(!x.as_reveal_ref().0.contains_key("banana"));
509        assert!(x.as_reveal_ref().0.contains_key("apple"));
510        assert!(x.as_reveal_ref().0.contains_key("cherry"));
511        assert!(x.as_reveal_ref().1.contains(b"banana"));
512    }
513
514    #[test]
515    fn roaring_merge_efficiency() {
516        // Test that merging roaring bitmaps works correctly
517        let mut x = MapUnionWithTombstonesRoaring::new_from(
518            HashMap::from([
519                (1u64, SetUnionHashSet::new_from([1])),
520                (2, SetUnionHashSet::new_from([2])),
521            ]),
522            RoaringTombstoneSet::from_iter(vec![10u64, 20]),
523        );
524
525        let y = MapUnionWithTombstonesRoaring::new_from(
526            HashMap::from([(3u64, SetUnionHashSet::new_from([3]))]),
527            RoaringTombstoneSet::from_iter(vec![30u64, 2]),
528        );
529
530        x.merge(y);
531
532        // Should have all tombstones
533        assert!(x.as_reveal_ref().1.contains(&10));
534        assert!(x.as_reveal_ref().1.contains(&20));
535        assert!(x.as_reveal_ref().1.contains(&30));
536        assert!(x.as_reveal_ref().1.contains(&2));
537
538        // Should not have key 2 in the map
539        assert!(!x.as_reveal_ref().0.contains_key(&2));
540
541        // Should have keys 1 and 3
542        assert!(x.as_reveal_ref().0.contains_key(&1));
543        assert!(x.as_reveal_ref().0.contains_key(&3));
544    }
545}