Skip to main content

variadics/
variadic_collections.rs

1use core::fmt;
2use core::hash::{BuildHasher, Hash};
3
4use hashbrown::hash_table::{Entry, HashTable};
5
6#[cfg(feature = "alloc")]
7use crate::VecVariadic;
8use crate::{PartialEqVariadic, VariadicExt};
9
10/// Trait for a set of Variadic Tuples
11pub trait VariadicCollection: Extend<Self::Schema> {
12    /// The Schema (aka Variadic type) associated with tuples in this set
13    type Schema: PartialEqVariadic;
14
15    /// Insert an element into the set, return true if successful
16    fn insert(&mut self, element: Self::Schema) -> bool;
17
18    /// Iterate over the elements of the set
19    fn iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
20
21    /// Return number of elements in the set
22    fn len(&self) -> usize;
23
24    /// Return true if empty
25    fn is_empty(&self) -> bool;
26
27    /// iterate and drain items from the set without deallocating the container
28    fn drain(&mut self) -> impl Iterator<Item = Self::Schema>;
29
30    /// Check for containment
31    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool;
32}
33
34/// trait for sets or multisets of variadics
35pub trait VariadicSet: VariadicCollection {}
36
37/// Default implementation of a set.
38#[cfg(feature = "std")]
39pub type VariadicHashSetStd<T> = VariadicHashSet<T, std::hash::RandomState>;
40
41/// HashSet that stores Variadics of owned values but allows
42/// for lookups with RefVariadics as well
43#[derive(Clone)]
44pub struct VariadicHashSet<T, S> {
45    table: HashTable<T>,
46    hasher: S,
47}
48
49#[cfg(feature = "std")]
50impl<T> VariadicHashSet<T, std::hash::RandomState> {
51    /// Creates a new `VariadicHashSet` with a default hasher.
52    pub fn new() -> Self {
53        Self {
54            table: HashTable::new(),
55            hasher: std::hash::RandomState::default(),
56        }
57    }
58}
59
60impl<T, S> Default for VariadicHashSet<T, S>
61where
62    S: Default,
63{
64    fn default() -> Self {
65        Self::with_hasher(S::default())
66    }
67}
68
69#[cfg(feature = "alloc")]
70impl<T, S> fmt::Debug for VariadicHashSet<T, S>
71where
72    T: fmt::Debug + VariadicExt + PartialEqVariadic + Eq + Hash,
73    for<'a> T::AsRefVar<'a>: Hash + fmt::Debug,
74    S: BuildHasher,
75{
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        f.debug_set().entries(self.iter()).finish()
78    }
79}
80
81impl<T, S> VariadicHashSet<T, S>
82where
83    T: PartialEqVariadic,
84    for<'a> T::AsRefVar<'a>: Hash,
85    S: BuildHasher,
86{
87    /// given a RefVariadic lookup key, get a RefVariadic version of a tuple in the set
88    pub fn get<'a>(&'a self, ref_var: T::AsRefVar<'_>) -> Option<&'a T> {
89        let hash = self.hasher.hash_one(ref_var);
90        self.table.find(hash, |item| {
91            <T as PartialEqVariadic>::eq_ref(ref_var, item.as_ref_var())
92        })
93    }
94}
95impl<T, S> VariadicCollection for VariadicHashSet<T, S>
96where
97    T: VariadicExt + PartialEqVariadic + Eq + Hash,
98    for<'a> T::AsRefVar<'a>: Hash,
99    S: BuildHasher,
100{
101    type Schema = T;
102
103    fn insert(&mut self, element: T) -> bool {
104        // let hash = Self::get_hash(&self.hasher, element.as_ref_var());
105        let hash = self.hasher.hash_one(element.as_ref_var());
106        let entry = self.table.entry(
107            hash,
108            |item| <T as PartialEqVariadic>::eq(&element, item),
109            |item| self.hasher.hash_one(item.as_ref_var()),
110        );
111        match entry {
112            Entry::Occupied(_occupied_entry) => false,
113            Entry::Vacant(vacant_entry) => {
114                vacant_entry.insert(element);
115                true
116            }
117        }
118    }
119
120    fn len(&self) -> usize {
121        self.table.len()
122    }
123
124    fn is_empty(&self) -> bool {
125        self.table.len() == 0
126    }
127
128    fn drain(&mut self) -> impl Iterator<Item = Self::Schema> {
129        self.table.drain()
130    }
131
132    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool {
133        self.get(value).is_some()
134    }
135
136    fn iter(&self) -> impl Iterator<Item = T::AsRefVar<'_>> {
137        self.table.iter().map(|item| item.as_ref_var())
138    }
139}
140impl<T, S> VariadicSet for VariadicHashSet<T, S>
141where
142    T: VariadicExt + PartialEqVariadic + Eq + Hash,
143    for<'a> T::AsRefVar<'a>: Hash,
144    S: BuildHasher,
145{
146}
147
148impl<T, S> IntoIterator for VariadicHashSet<T, S>
149where
150    T: VariadicExt + PartialEqVariadic,
151{
152    type Item = T;
153    type IntoIter = hashbrown::hash_table::IntoIter<T>;
154
155    #[inline]
156    fn into_iter(self) -> Self::IntoIter {
157        self.table.into_iter()
158    }
159}
160
161impl<T, S> VariadicHashSet<T, S> {
162    /// allocate a new VariadicHashSet with a specific hasher
163    pub fn with_hasher(hasher: S) -> Self {
164        Self {
165            table: HashTable::new(),
166            hasher,
167        }
168    }
169    /// allocate a new VariadicHashSet with a specific hasher and capacity
170    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
171        Self {
172            table: HashTable::with_capacity(capacity),
173            hasher,
174        }
175    }
176}
177
178// THIS CODE ADAPTED FROM hashbrown::HashMap
179impl<K, S> Extend<K> for VariadicHashSet<K, S>
180where
181    K: Eq + Hash + PartialEqVariadic,
182    S: BuildHasher,
183    for<'a> K::AsRefVar<'a>: Hash,
184{
185    fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
186        // Keys may be already present or show multiple times in the iterator.
187        // Reserve the entire hint lower bound if the map is empty.
188        // Otherwise reserve half the hint (rounded up), so the map
189        // will only resize twice in the worst case.
190        let iter = iter.into_iter();
191        let reserve = if self.is_empty() {
192            iter.size_hint().0
193        } else {
194            iter.size_hint().0.div_ceil(2)
195        };
196        self.table
197            .reserve(reserve, |item| self.hasher.hash_one(item));
198
199        iter.for_each(move |k| {
200            self.insert(k);
201        });
202    }
203}
204
205impl<T, S> PartialEq for VariadicHashSet<T, S>
206where
207    T: Eq + Hash + PartialEqVariadic,
208    S: BuildHasher,
209    for<'a> T::AsRefVar<'a>: Hash,
210{
211    fn eq(&self, other: &Self) -> bool {
212        if self.len() != other.len() {
213            return false;
214        }
215        self.iter().all(|key| other.get(key).is_some())
216    }
217}
218
219impl<T, S> FromIterator<T> for VariadicHashSet<T, S>
220where
221    T: Eq + Hash + PartialEqVariadic,
222    S: BuildHasher + Default,
223    for<'a> T::AsRefVar<'a>: Hash,
224    // A: Default + Allocator,
225{
226    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
227        let mut set = Self::with_hasher(Default::default());
228        set.extend(iter);
229        set
230    }
231}
232
233/// Default implementation of a counted set.
234#[cfg(feature = "std")]
235pub type VariadicCountedHashSetStd<K> = VariadicCountedHashSet<K, std::hash::RandomState>;
236
237/// Trait for a multiset of Tuples
238pub trait VariadicMultiset: VariadicCollection {}
239
240/// HashMap keyed on Variadics of (owned value, count) pairs, allows
241/// for lookups with RefVariadics.
242#[derive(Clone)]
243pub struct VariadicCountedHashSet<K, S>
244where
245    K: VariadicExt,
246{
247    table: HashTable<(K, usize)>,
248    hasher: S,
249    len: usize,
250}
251
252#[cfg(feature = "std")]
253impl<K> VariadicCountedHashSet<K, std::hash::RandomState>
254where
255    K: VariadicExt,
256{
257    /// Creates a new `VariadicCountedHashSet` with a default hasher.
258    pub fn new() -> Self {
259        Self {
260            table: HashTable::new(),
261            hasher: std::hash::RandomState::default(),
262            len: 0,
263        }
264    }
265}
266
267impl<K, S> Default for VariadicCountedHashSet<K, S>
268where
269    K: VariadicExt,
270    S: Default,
271{
272    fn default() -> Self {
273        Self::with_hasher(S::default())
274    }
275}
276
277#[cfg(feature = "alloc")]
278impl<K, S> fmt::Debug for VariadicCountedHashSet<K, S>
279where
280    K: fmt::Debug + VariadicExt + PartialEqVariadic,
281    for<'a> K::AsRefVar<'a>: Hash + fmt::Debug,
282{
283    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
284        f.debug_set().entries(self.table.iter()).finish()
285    }
286}
287
288impl<K, S> VariadicCountedHashSet<K, S>
289where
290    K: PartialEqVariadic,
291    for<'a> K::AsRefVar<'a>: Hash,
292    S: BuildHasher,
293{
294    /// given a RefVariadic lookup key, get a RefVariadic version of an entry in the map
295    pub fn get<'a>(&'a self, ref_var: K::AsRefVar<'_>) -> Option<&'a (K, usize)> {
296        let hash = self.hasher.hash_one(ref_var);
297        self.table.find(hash, |(key, _val)| {
298            <K as PartialEqVariadic>::eq_ref(ref_var, key.as_ref_var())
299        })
300    }
301}
302
303impl<K, S> VariadicCollection for VariadicCountedHashSet<K, S>
304where
305    K: VariadicExt + PartialEqVariadic + Eq + Hash + Clone,
306    for<'a> K::AsRefVar<'a>: Hash,
307    S: BuildHasher,
308{
309    type Schema = K;
310
311    fn insert(&mut self, element: K) -> bool {
312        let hash = self.hasher.hash_one(element.as_ref_var());
313        self.table
314            .entry(
315                hash,
316                |(item, _count)| <K as PartialEqVariadic>::eq(&element, item),
317                |(item, _count)| self.hasher.hash_one(item.as_ref_var()),
318            )
319            .and_modify(|(_, count)| *count += 1)
320            .or_insert((element, 1));
321        self.len += 1;
322        true
323    }
324
325    fn len(&self) -> usize {
326        self.len
327    }
328
329    fn is_empty(&self) -> bool {
330        self.len() == 0
331    }
332
333    fn drain(&mut self) -> impl Iterator<Item = Self::Schema> {
334        // TODO: this shouldn't clone the last copy of each k!
335        // particularly bad when there's typically only 1 copy per item
336        self.len = 0;
337        self.table
338            .drain()
339            .flat_map(|(k, num)| (0..num).map(move |_i| k.clone()))
340    }
341
342    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool {
343        self.get(value).is_some()
344    }
345
346    fn iter(&self) -> impl Iterator<Item = K::AsRefVar<'_>> {
347        self.table
348            .iter()
349            .flat_map(|(k, num)| (0..*num).map(move |_i| k.as_ref_var()))
350    }
351}
352
353impl<K, S> VariadicMultiset for VariadicCountedHashSet<K, S>
354where
355    K: VariadicExt + PartialEqVariadic + Eq + Hash + Clone,
356    for<'a> K::AsRefVar<'a>: Hash,
357    S: BuildHasher,
358{
359}
360
361impl<T, S> IntoIterator for VariadicCountedHashSet<T, S>
362where
363    T: VariadicExt + PartialEqVariadic + Clone,
364{
365    type Item = T;
366    type IntoIter = DuplicateCounted<hashbrown::hash_table::IntoIter<(T, usize)>, T>;
367
368    #[inline]
369    fn into_iter(self) -> Self::IntoIter {
370        DuplicateCounted {
371            iter: self.table.into_iter(),
372            state: None,
373        }
374    }
375}
376
377/// Iterator helper for [`VariadicCountedHashSet::into_iter`].
378#[derive(Clone)]
379pub struct DuplicateCounted<Iter, Item> {
380    iter: Iter,
381    state: Option<(Item, usize)>,
382}
383impl<Iter, Item> Iterator for DuplicateCounted<Iter, Item>
384where
385    Iter: Iterator<Item = (Item, usize)>,
386    Item: Clone,
387{
388    type Item = Item;
389
390    fn next(&mut self) -> Option<Self::Item> {
391        loop {
392            match self.state.take() {
393                Some((item, 1)) => {
394                    self.state = None;
395                    return Some(item);
396                }
397                None | Some((_, 0)) => match self.iter.next() {
398                    Some(state) => self.state = Some(state),
399                    None => return None,
400                },
401                Some((item, many)) => {
402                    let out = Some(item.clone());
403                    self.state = Some((item, many - 1));
404                    return out;
405                }
406            }
407        }
408    }
409}
410
411impl<K, S> VariadicCountedHashSet<K, S>
412where
413    K: VariadicExt,
414{
415    /// allocate a new VariadicCountedHashSet with a specific hasher
416    pub fn with_hasher(hasher: S) -> Self {
417        Self {
418            table: HashTable::new(),
419            hasher,
420            len: 0,
421        }
422    }
423    /// allocate a new VariadicCountedHashSet with a specific hasher and capacity
424    pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self {
425        Self {
426            table: HashTable::with_capacity(capacity),
427            hasher,
428            len: 0,
429        }
430    }
431}
432
433// THIS CODE ADAPTED FROM hashbrown::HashTable
434impl<K, S> Extend<K> for VariadicCountedHashSet<K, S>
435where
436    K: Eq + Hash + PartialEqVariadic + Clone,
437    S: BuildHasher,
438    for<'a> K::AsRefVar<'a>: Hash,
439{
440    fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
441        // Keys may be already present or show multiple times in the iterator.
442        // Reserve the entire hint lower bound if the map is empty.
443        // Otherwise reserve half the hint (rounded up), so the map
444        // will only resize twice in the worst case.
445        let iter = iter.into_iter();
446        let reserve = if self.is_empty() {
447            iter.size_hint().0
448        } else {
449            iter.size_hint().0.div_ceil(2)
450        };
451        self.table
452            .reserve(reserve, |item| self.hasher.hash_one(item));
453        iter.for_each(move |key| {
454            // TODO: super inefficient. Need a insert_with_count method
455            self.insert(key);
456        });
457    }
458}
459
460impl<T, S> PartialEq for VariadicCountedHashSet<T, S>
461where
462    T: Eq + Hash + PartialEqVariadic + Clone,
463    S: BuildHasher,
464    for<'a> T::AsRefVar<'a>: Hash,
465{
466    fn eq(&self, other: &Self) -> bool {
467        if self.len() != other.len() {
468            return false;
469        }
470
471        // let v: Vec<&(T, usize)> =
472        self.table.iter().all(|(key, count)| {
473            if let Some((_, match_val)) = other.get(key.as_ref_var()) {
474                match_val == count
475            } else {
476                false
477            }
478        })
479    }
480}
481
482impl<T, S> FromIterator<T> for VariadicCountedHashSet<T, S>
483where
484    T: Eq + Hash + PartialEqVariadic + Clone,
485    S: BuildHasher + Default,
486    for<'a> T::AsRefVar<'a>: Hash,
487{
488    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
489        let mut set = Self::with_hasher(Default::default());
490        set.extend(iter);
491        set
492    }
493}
494
495/// Column storage for Variadic tuples of type Schema
496/// An alternative to VariadicHashMultiset
497#[cfg(feature = "alloc")]
498#[derive(Clone)]
499pub struct VariadicColumnMultiset<Schema>
500where
501    Schema: VariadicExt + Eq + Hash,
502{
503    columns: Schema::IntoVec,
504    last_offset: usize,
505}
506
507#[cfg(feature = "alloc")]
508impl<T> VariadicColumnMultiset<T>
509where
510    T: VariadicExt + Eq + Hash,
511{
512    /// initialize an empty columnar multiset
513    pub fn new() -> Self {
514        Self {
515            columns: <T::IntoVec as Default>::default(),
516            last_offset: 0,
517        }
518    }
519}
520
521#[cfg(feature = "alloc")]
522impl<T> Default for VariadicColumnMultiset<T>
523where
524    T: VariadicExt + Eq + Hash,
525{
526    fn default() -> Self {
527        Self::new()
528    }
529}
530
531#[cfg(feature = "alloc")]
532impl<Schema> VariadicCollection for VariadicColumnMultiset<Schema>
533where
534    Schema: PartialEqVariadic + Eq + Hash,
535    for<'a> <Schema as VariadicExt>::AsRefVar<'a>: Hash,
536{
537    type Schema = Schema;
538
539    fn insert(&mut self, element: Schema) -> bool {
540        if self.last_offset == 0 {
541            self.columns = element.into_singleton_vec()
542        } else {
543            self.columns.push(element);
544        }
545        self.last_offset += 1;
546        true
547    }
548
549    fn iter(&self) -> impl Iterator<Item = <Schema as VariadicExt>::AsRefVar<'_>> {
550        self.columns.zip_vecs()
551    }
552
553    fn len(&self) -> usize {
554        self.last_offset
555    }
556
557    fn is_empty(&self) -> bool {
558        self.len() == 0
559    }
560
561    fn drain(&mut self) -> impl Iterator<Item = Self::Schema> {
562        self.last_offset = 0;
563        self.columns.drain(0..)
564    }
565
566    fn contains(&self, value: <Self::Schema as VariadicExt>::AsRefVar<'_>) -> bool {
567        self.iter()
568            .any(|t| <Schema as PartialEqVariadic>::eq_ref(t, value))
569    }
570}
571
572#[cfg(feature = "alloc")]
573impl<Schema> VariadicMultiset for VariadicColumnMultiset<Schema>
574where
575    Schema: PartialEqVariadic + Eq + Hash,
576    for<'a> <Schema as VariadicExt>::AsRefVar<'a>: Hash,
577{
578}
579
580#[cfg(feature = "alloc")]
581impl<T> fmt::Debug for VariadicColumnMultiset<T>
582where
583    T: fmt::Debug + VariadicExt + PartialEqVariadic + Eq + Hash,
584    for<'a> T::AsRefVar<'a>: Hash + fmt::Debug,
585{
586    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587        f.debug_set().entries(self.iter()).finish()
588    }
589}
590
591#[cfg(feature = "alloc")]
592impl<Schema> IntoIterator for VariadicColumnMultiset<Schema>
593where
594    Schema: PartialEqVariadic + Eq + Hash,
595{
596    type Item = Schema;
597    type IntoIter = <Schema::IntoVec as VecVariadic>::IntoZip;
598
599    #[inline]
600    fn into_iter(self) -> Self::IntoIter {
601        self.columns.into_zip()
602    }
603}
604
605#[cfg(feature = "alloc")]
606impl<K> Extend<K> for VariadicColumnMultiset<K>
607where
608    K: Eq + Hash + PartialEqVariadic,
609    for<'a> K::AsRefVar<'a>: Hash,
610{
611    fn extend<T: IntoIterator<Item = K>>(&mut self, iter: T) {
612        let iter = iter.into_iter();
613        iter.for_each(move |k| {
614            self.insert(k);
615        });
616    }
617}
618
619#[cfg(feature = "std")]
620#[cfg(test)]
621mod test {
622    use super::*;
623    use crate::{var_expr, var_type};
624
625    type TestSchema = var_type!(i16, i32, i64, &'static str);
626
627    #[test]
628    fn test_collections() {
629        use std::vec;
630        use std::vec::Vec;
631
632        let test_data: Vec<TestSchema> = vec![
633            var_expr!(1, 1, 1, "hello"),
634            var_expr!(1, 1, 1, "hello"),
635            var_expr!(1, 1, 1, "world"),
636            var_expr!(1, 1, 2, "world"),
637        ];
638
639        let mut hash_set: VariadicHashSet<TestSchema, _> = VariadicHashSet::new();
640        hash_set.extend(test_data.clone());
641        let mut multi_set: VariadicCountedHashSet<TestSchema, _> = VariadicCountedHashSet::new();
642        let hash = multi_set
643            .hasher
644            .hash_one(var_expr!(1, 1, 1, "world").as_ref_var());
645        let hash2 = multi_set
646            .hasher
647            .hash_one(var_expr!(1, 1, 1, "world").as_ref_var());
648        assert_eq!(hash, hash2);
649        multi_set.extend(test_data.clone());
650        let mut columnar: VariadicColumnMultiset<TestSchema> = Default::default();
651        columnar.extend(test_data.clone());
652
653        assert_eq!(multi_set.len(), 4);
654        assert_eq!(columnar.len(), 4);
655        assert_eq!(hash_set.len(), 3);
656
657        hash_set.insert(var_expr!(1, 1, 1, "hello"));
658        hash_set.insert(var_expr!(2, 1, 1, "dup"));
659        hash_set.insert(var_expr!(2, 1, 1, "dup"));
660        multi_set.insert(var_expr!(1, 1, 1, "hello"));
661        multi_set.insert(var_expr!(2, 1, 1, "dup"));
662        multi_set.insert(var_expr!(2, 1, 1, "dup"));
663        columnar.insert(var_expr!(1, 1, 1, "hello"));
664        columnar.insert(var_expr!(2, 1, 1, "dup"));
665        columnar.insert(var_expr!(2, 1, 1, "dup"));
666
667        assert_eq!(multi_set.len(), 7);
668        assert_eq!(columnar.len(), 7);
669        assert_eq!(hash_set.len(), 4);
670
671        assert!(test_data.iter().all(|t| hash_set.contains(t.as_ref_var())));
672        assert!(test_data.iter().all(|t| multi_set.contains(t.as_ref_var())));
673        assert!(test_data.iter().all(|t| columnar.contains(t.as_ref_var())));
674
675        let _hs = hash_set.drain().collect::<Vec<_>>();
676        let _ms = multi_set.drain().collect::<Vec<_>>();
677        let _c = columnar.drain().collect::<Vec<_>>();
678        assert_eq!(hash_set.len(), 0);
679        assert_eq!(multi_set.len(), 0);
680        assert_eq!(columnar.len(), 0);
681    }
682}