1use std::collections::VecDeque;
2use std::collections::hash_map::Entry;
34use super::HalfJoinState;
5use crate::util::clear::Clear;
67type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
89use smallvec::{SmallVec, smallvec};
10#[derive(Debug)]
11pub struct HalfMultisetJoinState<Key, ValBuild, ValProbe> {
12// Here a smallvec with inline storage of 1 is chosen.
13 // The rationale for this decision is that, I speculate, that joins possibly have a bimodal distribution with regards to how much key contention they have.
14 // That is, I think that there are many joins that have 1 value per key on LHS/RHS, and there are also a large category of joins that have multiple values per key.
15 // For the category of joins that have multiple values per key, it's not clear why they would only have 2, 3, 4, or N specific number of values per key. So there's no good number to set the smallvec storage to.
16 // Instead we can just focus on the first group of joins that have 1 value per key and get benefit there without hurting the other group too much with excessive memory usage.
17/// Table to probe, vec val contains all matches.
18table: HashMap<Key, SmallVec<[ValBuild; 1]>>,
19/// Not-yet emitted matches.
20current_matches: VecDeque<(Key, ValProbe, ValBuild)>,
21 len: usize,
22}
23impl<Key, ValBuild, ValProbe> Default for HalfMultisetJoinState<Key, ValBuild, ValProbe> {
24fn default() -> Self {
25Self {
26 table: HashMap::default(),
27 current_matches: VecDeque::default(),
28 len: 0,
29 }
30 }
31}
32impl<Key, ValBuild, ValProbe> Clear for HalfMultisetJoinState<Key, ValBuild, ValProbe> {
33fn clear(&mut self) {
34self.table.clear();
35self.current_matches.clear();
36self.len = 0;
37 }
38}
39impl<Key, ValBuild, ValProbe> HalfJoinState<Key, ValBuild, ValProbe>
40for HalfMultisetJoinState<Key, ValBuild, ValProbe>
41where
42Key: Clone + Eq + std::hash::Hash,
43 ValBuild: Clone,
44 ValProbe: Clone,
45{
46fn build(&mut self, k: Key, v: &ValBuild) -> bool {
47let entry = self.table.entry(k);
4849match entry {
50 Entry::Occupied(mut e) => {
51let vec = e.get_mut();
5253 vec.push(v.clone());
54self.len += 1;
55 }
56 Entry::Vacant(e) => {
57 e.insert(smallvec![v.clone()]);
58self.len += 1;
59 }
60 };
6162true
63}
6465fn probe(&mut self, k: &Key, v: &ValProbe) -> Option<(Key, ValProbe, ValBuild)> {
66// TODO: We currently don't free/shrink the self.current_matches vecdeque to save time.
67 // This mean it will grow to eventually become the largest number of matches in a single probe call.
68 // Maybe we should clear this memory at the beginning of every tick/periodically?
69let mut iter = self
70.table
71 .get(k)?
72.iter()
73 .map(|valbuild| (k.clone(), v.clone(), valbuild.clone()));
7475let first = iter.next();
7677self.current_matches.extend(iter);
7879 first
80 }
8182fn full_probe(&self, k: &Key) -> std::slice::Iter<'_, ValBuild> {
83let Some(sv) = self.table.get(k) else {
84return [].iter();
85 };
8687 sv.iter()
88 }
8990fn pop_match(&mut self) -> Option<(Key, ValProbe, ValBuild)> {
91self.current_matches.pop_front()
92 }
9394fn len(&self) -> usize {
95self.len
96 }
97fn iter(&self) -> std::collections::hash_map::Iter<'_, Key, SmallVec<[ValBuild; 1]>> {
98self.table.iter()
99 }
100}