variadics/
variadic_collections.rs

1use std::fmt;
2use std::hash::{BuildHasher, Hash, RandomState};
3
4use hashbrown::hash_table::{Entry, HashTable};
5
6use crate::{PartialEqVariadic, VariadicExt, VecVariadic};
7
8/// Trait for a set of Variadic Tuples
9pub trait VariadicCollection: Extend<Self::Schema> {
10    /// The Schema (aka Variadic type) associated with tuples in this set
11    type Schema: PartialEqVariadic;
12
13    /// Insert an element into the set, return true if successful
14    fn insert(&mut self, element: Self::Schema) -> bool;
15
16    /// Iterate over the elements of the set
17    fn iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
18
19    /// Return number of elements in the set
20    fn len(&self) -> usize;
21
22    /// Return true if empty
23    fn is_empty(&self) -> bool;
24
25    /// iterate and drain items from the set without deallocating the container
26    fn drain(&mut self) -> impl Iterator<Item = Self::Schema>;
27
28    /// Check for containment
29    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool;
30}
31
32/// trait for sets or multisets of variadics
33pub trait VariadicSet: VariadicCollection {}
34
35/// HashSet that stores Variadics of owned values but allows
36/// for lookups with RefVariadics as well
37#[derive(Clone)]
38pub struct VariadicHashSet<T, S = RandomState> {
39    table: HashTable<T>,
40    hasher: S,
41}
42
43impl<T> VariadicHashSet<T> {
44    /// Creates a new `VariadicHashSet` with a default hasher.
45    pub fn new() -> Self {
46        Self {
47            table: HashTable::new(),
48            hasher: RandomState::default(),
49        }
50    }
51}
52
53impl<T> Default for VariadicHashSet<T> {
54    fn default() -> Self {
55        Self::new()
56    }
57}
58
59impl<T> fmt::Debug for VariadicHashSet<T>
60where
61    T: fmt::Debug + VariadicExt + PartialEqVariadic + Eq + Hash,
62    for<'a> T::AsRefVar<'a>: Hash + fmt::Debug,
63{
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        f.debug_set().entries(self.iter()).finish()
66    }
67}
68
69impl<T, S> VariadicHashSet<T, S>
70where
71    T: PartialEqVariadic,
72    for<'a> T::AsRefVar<'a>: Hash,
73    S: BuildHasher,
74{
75    /// given a RefVariadic lookup key, get a RefVariadic version of a tuple in the set
76    pub fn get<'a>(&'a self, ref_var: T::AsRefVar<'_>) -> Option<&'a T> {
77        let hash = self.hasher.hash_one(ref_var);
78        self.table.find(hash, |item| {
79            <T as PartialEqVariadic>::eq_ref(ref_var, item.as_ref_var())
80        })
81    }
82}
83impl<T, S> VariadicCollection for VariadicHashSet<T, S>
84where
85    T: VariadicExt + PartialEqVariadic + Eq + Hash,
86    for<'a> T::AsRefVar<'a>: Hash,
87    S: BuildHasher,
88{
89    type Schema = T;
90
91    fn insert(&mut self, element: T) -> bool {
92        // let hash = Self::get_hash(&self.hasher, element.as_ref_var());
93        let hash = self.hasher.hash_one(element.as_ref_var());
94        let entry = self.table.entry(
95            hash,
96            |item| <T as PartialEqVariadic>::eq(&element, item),
97            |item| self.hasher.hash_one(item.as_ref_var()),
98        );
99        match entry {
100            Entry::Occupied(_occupied_entry) => false,
101            Entry::Vacant(vacant_entry) => {
102                vacant_entry.insert(element);
103                true
104            }
105        }
106    }
107
108    fn len(&self) -> usize {
109        self.table.len()
110    }
111
112    fn is_empty(&self) -> bool {
113        self.table.len() == 0
114    }
115
116    fn drain(&mut self) -> impl Iterator<Item = Self::Schema> {
117        self.table.drain()
118    }
119
120    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool {
121        self.get(value).is_some()
122    }
123
124    fn iter(&self) -> impl Iterator<Item = T::AsRefVar<'_>> {
125        self.table.iter().map(|item| item.as_ref_var())
126    }
127}
128impl<T, S> VariadicSet for VariadicHashSet<T, S>
129where
130    T: VariadicExt + PartialEqVariadic + Eq + Hash,
131    for<'a> T::AsRefVar<'a>: Hash,
132    S: BuildHasher,
133{
134}
135
136impl<T, S> IntoIterator for VariadicHashSet<T, S>
137where
138    T: VariadicExt + PartialEqVariadic,
139{
140    type Item = T;
141    type IntoIter = hashbrown::hash_table::IntoIter<T>;
142
143    #[inline]
144    fn into_iter(self) -> Self::IntoIter {
145        self.table.into_iter()
146    }
147}
148
149impl<T, S> VariadicHashSet<T, S> {
150    /// allocate a new VariadicHashSet with a specific hasher
151    pub fn with_hasher(hasher: S) -> Self {
152        Self {
153            table: HashTable::new(),
154            hasher,
155        }
156    }
157    /// allocate a new VariadicHashSet with a specific hasher and capacity
158    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
159        Self {
160            table: HashTable::with_capacity(capacity),
161            hasher,
162        }
163    }
164}
165
166// THIS CODE ADAPTED FROM hashbrown::HashMap
167impl<K, S> Extend<K> for VariadicHashSet<K, S>
168where
169    K: Eq + Hash + PartialEqVariadic,
170    S: BuildHasher,
171    for<'a> K::AsRefVar<'a>: Hash,
172{
173    fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
174        // Keys may be already present or show multiple times in the iterator.
175        // Reserve the entire hint lower bound if the map is empty.
176        // Otherwise reserve half the hint (rounded up), so the map
177        // will only resize twice in the worst case.
178        let iter = iter.into_iter();
179        let reserve = if self.is_empty() {
180            iter.size_hint().0
181        } else {
182            iter.size_hint().0.div_ceil(2)
183        };
184        self.table
185            .reserve(reserve, |item| self.hasher.hash_one(item));
186
187        iter.for_each(move |k| {
188            self.insert(k);
189        });
190    }
191}
192
193impl<T, S> PartialEq for VariadicHashSet<T, S>
194where
195    T: Eq + Hash + PartialEqVariadic,
196    S: BuildHasher,
197    for<'a> T::AsRefVar<'a>: Hash,
198{
199    fn eq(&self, other: &Self) -> bool {
200        if self.len() != other.len() {
201            return false;
202        }
203        self.iter().all(|key| other.get(key).is_some())
204    }
205}
206
207impl<T, S> FromIterator<T> for VariadicHashSet<T, S>
208where
209    T: Eq + Hash + PartialEqVariadic,
210    S: BuildHasher + Default,
211    for<'a> T::AsRefVar<'a>: Hash,
212    // A: Default + Allocator,
213{
214    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
215        let mut set = Self::with_hasher(Default::default());
216        set.extend(iter);
217        set
218    }
219}
220
221/// Trait for a multiset of Tuples
222pub trait VariadicMultiset: VariadicCollection {}
223
224/// HashMap keyed on Variadics of (owned value, count) pairs, allows
225/// for lookups with RefVariadics.
226#[derive(Clone)]
227pub struct VariadicCountedHashSet<K, S = RandomState>
228where
229    K: VariadicExt,
230{
231    table: HashTable<(K, usize)>,
232    hasher: S,
233    len: usize,
234}
235
236impl<K> VariadicCountedHashSet<K>
237where
238    K: VariadicExt,
239{
240    /// Creates a new `VariadicCountedHashSet` with a default hasher.
241    pub fn new() -> Self {
242        Self {
243            table: HashTable::new(),
244            hasher: RandomState::default(),
245            len: 0,
246        }
247    }
248}
249
250impl<K> Default for VariadicCountedHashSet<K>
251where
252    K: VariadicExt,
253{
254    fn default() -> Self {
255        Self::new()
256    }
257}
258
259impl<K> fmt::Debug for VariadicCountedHashSet<K>
260where
261    K: fmt::Debug + VariadicExt + PartialEqVariadic,
262    for<'a> K::AsRefVar<'a>: Hash + fmt::Debug,
263{
264    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
265        f.debug_set().entries(self.table.iter()).finish()
266    }
267}
268
269impl<K, S> VariadicCountedHashSet<K, S>
270where
271    K: PartialEqVariadic,
272    for<'a> K::AsRefVar<'a>: Hash,
273    S: BuildHasher,
274{
275    /// given a RefVariadic lookup key, get a RefVariadic version of an entry in the map
276    pub fn get<'a>(&'a self, ref_var: K::AsRefVar<'_>) -> Option<&'a (K, usize)> {
277        let hash = self.hasher.hash_one(ref_var);
278        self.table.find(hash, |(key, _val)| {
279            <K as PartialEqVariadic>::eq_ref(ref_var, key.as_ref_var())
280        })
281    }
282}
283
284impl<K, S> VariadicCollection for VariadicCountedHashSet<K, S>
285where
286    K: VariadicExt + PartialEqVariadic + Eq + Hash + Clone,
287    for<'a> K::AsRefVar<'a>: Hash,
288    S: BuildHasher,
289{
290    type Schema = K;
291
292    fn insert(&mut self, element: K) -> bool {
293        let hash = self.hasher.hash_one(element.as_ref_var());
294        self.table
295            .entry(
296                hash,
297                |(item, _count)| <K as PartialEqVariadic>::eq(&element, item),
298                |(item, _count)| self.hasher.hash_one(item.as_ref_var()),
299            )
300            .and_modify(|(_, count)| *count += 1)
301            .or_insert((element, 1));
302        self.len += 1;
303        true
304    }
305
306    fn len(&self) -> usize {
307        self.len
308    }
309
310    fn is_empty(&self) -> bool {
311        self.len() == 0
312    }
313
314    fn drain(&mut self) -> impl Iterator<Item = Self::Schema> {
315        // TODO: this shouldn't clone the last copy of each k!
316        // particularly bad when there's typically only 1 copy per item
317        self.len = 0;
318        self.table
319            .drain()
320            .flat_map(|(k, num)| (0..num).map(move |_i| k.clone()))
321    }
322
323    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool {
324        self.get(value).is_some()
325    }
326
327    fn iter(&self) -> impl Iterator<Item = K::AsRefVar<'_>> {
328        self.table
329            .iter()
330            .flat_map(|(k, num)| (0..*num).map(move |_i| k.as_ref_var()))
331    }
332}
333
334impl<K, S> VariadicMultiset for VariadicCountedHashSet<K, S>
335where
336    K: VariadicExt + PartialEqVariadic + Eq + Hash + Clone,
337    for<'a> K::AsRefVar<'a>: Hash,
338    S: BuildHasher,
339{
340}
341
342impl<T> IntoIterator for VariadicCountedHashSet<T>
343where
344    T: VariadicExt + PartialEqVariadic + Clone,
345{
346    type Item = T;
347    type IntoIter = DuplicateCounted<hashbrown::hash_table::IntoIter<(T, usize)>, T>;
348
349    #[inline]
350    fn into_iter(self) -> Self::IntoIter {
351        DuplicateCounted {
352            iter: self.table.into_iter(),
353            state: None,
354        }
355    }
356}
357
358/// Iterator helper for [`VariadicCountedHashSet::into_iter`].
359#[derive(Clone)]
360pub struct DuplicateCounted<Iter, Item> {
361    iter: Iter,
362    state: Option<(Item, usize)>,
363}
364impl<Iter, Item> Iterator for DuplicateCounted<Iter, Item>
365where
366    Iter: Iterator<Item = (Item, usize)>,
367    Item: Clone,
368{
369    type Item = Item;
370
371    fn next(&mut self) -> Option<Self::Item> {
372        loop {
373            match self.state.take() {
374                Some((item, 1)) => {
375                    self.state = None;
376                    return Some(item);
377                }
378                None | Some((_, 0)) => match self.iter.next() {
379                    Some(state) => self.state = Some(state),
380                    None => return None,
381                },
382                Some((item, many)) => {
383                    let out = Some(item.clone());
384                    self.state = Some((item, many - 1));
385                    return out;
386                }
387            }
388        }
389    }
390}
391
392impl<K, S> VariadicCountedHashSet<K, S>
393where
394    K: VariadicExt,
395{
396    /// allocate a new VariadicCountedHashSet with a specific hasher
397    pub fn with_hasher(hasher: S) -> Self {
398        Self {
399            table: HashTable::new(),
400            hasher,
401            len: 0,
402        }
403    }
404    /// allocate a new VariadicCountedHashSet with a specific hasher and capacity
405    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
406        Self {
407            table: HashTable::with_capacity(capacity),
408            hasher,
409            len: 0,
410        }
411    }
412}
413
414// THIS CODE ADAPTED FROM hashbrown::HashTable
415impl<K, S> Extend<K> for VariadicCountedHashSet<K, S>
416where
417    K: Eq + Hash + PartialEqVariadic + Clone,
418    S: BuildHasher,
419    for<'a> K::AsRefVar<'a>: Hash,
420{
421    fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
422        // Keys may be already present or show multiple times in the iterator.
423        // Reserve the entire hint lower bound if the map is empty.
424        // Otherwise reserve half the hint (rounded up), so the map
425        // will only resize twice in the worst case.
426        let iter = iter.into_iter();
427        let reserve = if self.is_empty() {
428            iter.size_hint().0
429        } else {
430            iter.size_hint().0.div_ceil(2)
431        };
432        self.table
433            .reserve(reserve, |item| self.hasher.hash_one(item));
434        iter.for_each(move |key| {
435            // TODO: super inefficient. Need a insert_with_count method
436            self.insert(key);
437        });
438    }
439}
440
441impl<T, S> PartialEq for VariadicCountedHashSet<T, S>
442where
443    T: Eq + Hash + PartialEqVariadic + Clone,
444    S: BuildHasher,
445    for<'a> T::AsRefVar<'a>: Hash,
446{
447    fn eq(&self, other: &Self) -> bool {
448        if self.len() != other.len() {
449            return false;
450        }
451
452        // let v: Vec<&(T, usize)> =
453        self.table.iter().all(|(key, count)| {
454            if let Some((_, match_val)) = other.get(key.as_ref_var()) {
455                match_val == count
456            } else {
457                false
458            }
459        })
460    }
461}
462
463impl<T, S> FromIterator<T> for VariadicCountedHashSet<T, S>
464where
465    T: Eq + Hash + PartialEqVariadic + Clone,
466    S: BuildHasher + Default,
467    for<'a> T::AsRefVar<'a>: Hash,
468{
469    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
470        let mut set = Self::with_hasher(Default::default());
471        set.extend(iter);
472        set
473    }
474}
475
476/// Column storage for Variadic tuples of type Schema
477/// An alternative to VariadicHashMultiset
478#[derive(Clone)]
479pub struct VariadicColumnMultiset<Schema>
480where
481    Schema: VariadicExt + Eq + Hash,
482{
483    columns: Schema::IntoVec,
484    last_offset: usize,
485}
486
487impl<T> VariadicColumnMultiset<T>
488where
489    T: VariadicExt + Eq + Hash,
490{
491    /// initialize an empty columnar multiset
492    pub fn new() -> Self {
493        Self {
494            columns: <T::IntoVec as Default>::default(),
495            last_offset: 0,
496        }
497    }
498}
499
500impl<T> Default for VariadicColumnMultiset<T>
501where
502    T: VariadicExt + Eq + Hash,
503{
504    fn default() -> Self {
505        Self::new()
506    }
507}
508
509impl<Schema> VariadicCollection for VariadicColumnMultiset<Schema>
510where
511    Schema: PartialEqVariadic + Eq + Hash,
512    for<'a> <Schema as VariadicExt>::AsRefVar<'a>: Hash,
513{
514    type Schema = Schema;
515
516    fn insert(&mut self, element: Schema) -> bool {
517        if self.last_offset == 0 {
518            self.columns = element.into_singleton_vec()
519        } else {
520            self.columns.push(element);
521        }
522        self.last_offset += 1;
523        true
524    }
525
526    fn iter(&self) -> impl Iterator<Item = <Schema as VariadicExt>::AsRefVar<'_>> {
527        self.columns.zip_vecs()
528    }
529
530    fn len(&self) -> usize {
531        self.last_offset
532    }
533
534    fn is_empty(&self) -> bool {
535        self.len() == 0
536    }
537
538    fn drain(&mut self) -> impl Iterator<Item = Self::Schema> {
539        self.last_offset = 0;
540        self.columns.drain(0..)
541    }
542
543    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool {
544        self.iter()
545            .any(|t| <Schema as PartialEqVariadic>::eq_ref(t, value))
546    }
547}
548
549impl<Schema> VariadicMultiset for VariadicColumnMultiset<Schema>
550where
551    Schema: PartialEqVariadic + Eq + Hash,
552    for<'a> <Schema as VariadicExt>::AsRefVar<'a>: Hash,
553{
554}
555
556impl<T> fmt::Debug for VariadicColumnMultiset<T>
557where
558    T: fmt::Debug + VariadicExt + PartialEqVariadic + Eq + Hash,
559    for<'a> T::AsRefVar<'a>: Hash + fmt::Debug,
560{
561    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
562        f.debug_set().entries(self.iter()).finish()
563    }
564}
565
566impl<Schema> IntoIterator for VariadicColumnMultiset<Schema>
567where
568    Schema: PartialEqVariadic + Eq + Hash,
569{
570    type Item = Schema;
571    type IntoIter = <Schema::IntoVec as VecVariadic>::IntoZip;
572
573    #[inline]
574    fn into_iter(self) -> Self::IntoIter {
575        self.columns.into_zip()
576    }
577}
578
579impl<K> Extend<K> for VariadicColumnMultiset<K>
580where
581    K: Eq + Hash + PartialEqVariadic,
582    for<'a> K::AsRefVar<'a>: Hash,
583{
584    fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
585        let iter = iter.into_iter();
586        iter.for_each(move |k| {
587            self.insert(k);
588        });
589    }
590}
591
592#[cfg(test)]
593mod test {
594    use super::*;
595    use crate::{var_expr, var_type};
596
597    type TestSchema = var_type!(i16, i32, i64, &'static str);
598
599    #[test]
600    fn test_collections() {
601        let test_data: Vec<TestSchema> = vec![
602            var_expr!(1, 1, 1, "hello"),
603            var_expr!(1, 1, 1, "hello"),
604            var_expr!(1, 1, 1, "world"),
605            var_expr!(1, 1, 2, "world"),
606        ];
607
608        let mut hash_set: VariadicHashSet<TestSchema> = Default::default();
609        hash_set.extend(test_data.clone());
610        let mut multi_set: VariadicCountedHashSet<TestSchema> = Default::default();
611        let hash = multi_set
612            .hasher
613            .hash_one(var_expr!(1, 1, 1, "world").as_ref_var());
614        let hash2 = multi_set
615            .hasher
616            .hash_one(var_expr!(1, 1, 1, "world").as_ref_var());
617        assert_eq!(hash, hash2);
618        multi_set.extend(test_data.clone());
619        let mut columnar: VariadicColumnMultiset<TestSchema> = Default::default();
620        columnar.extend(test_data.clone());
621
622        assert_eq!(multi_set.len(), 4);
623        assert_eq!(columnar.len(), 4);
624        assert_eq!(hash_set.len(), 3);
625
626        hash_set.insert(var_expr!(1, 1, 1, "hello"));
627        hash_set.insert(var_expr!(2, 1, 1, "dup"));
628        hash_set.insert(var_expr!(2, 1, 1, "dup"));
629        multi_set.insert(var_expr!(1, 1, 1, "hello"));
630        multi_set.insert(var_expr!(2, 1, 1, "dup"));
631        multi_set.insert(var_expr!(2, 1, 1, "dup"));
632        columnar.insert(var_expr!(1, 1, 1, "hello"));
633        columnar.insert(var_expr!(2, 1, 1, "dup"));
634        columnar.insert(var_expr!(2, 1, 1, "dup"));
635
636        assert_eq!(multi_set.len(), 7);
637        assert_eq!(columnar.len(), 7);
638        assert_eq!(hash_set.len(), 4);
639
640        assert!(test_data.iter().all(|t| hash_set.contains(t.as_ref_var())));
641        assert!(test_data.iter().all(|t| multi_set.contains(t.as_ref_var())));
642        assert!(test_data.iter().all(|t| columnar.contains(t.as_ref_var())));
643
644        let _hs = hash_set.drain().collect::<Vec<_>>();
645        let _ms = multi_set.drain().collect::<Vec<_>>();
646        let _c = columnar.drain().collect::<Vec<_>>();
647        assert_eq!(hash_set.len(), 0);
648        assert_eq!(multi_set.len(), 0);
649        assert_eq!(columnar.len(), 0);
650    }
651}