Skip to main content

lattices/ght/
mod.rs

1//! GHT from the Wang/Willsey/Suciu Freejoin work
2use std::collections::HashMap;
3use std::fmt::Debug;
4use std::hash::Hash;
5use std::marker::PhantomData;
6
7use variadics::variadic_collections::VariadicCollection;
8use variadics::{
9    PartialEqVariadic, RefVariadic, Split, SplitBySuffix, VariadicExt, var_args, var_type,
10};
11
12pub mod colt;
13pub mod lattice;
14pub mod macros;
15pub mod test;
16
17/// The GeneralizedHashTrieNode trait captures the properties of nodes in a Ght.
18///
19/// The Ght, defined by Wang/Willsey/Suciu, is a hash-based trie for storing tuples.
20/// It is parameterized by an ordered schema [`VariadicExt`] of the relation stored in the trie.
21/// It is a tree of [`GhtInner`] nodes, with [`GhtLeaf`] nodes at the leaves.
22/// The trie is keyed on a prefix of the schema [`Self::KeyType`],
23/// and the remaining columns [`Self::ValType`] are stored in the leaf.
24/// All leaf nodes use the same `[Self::Storage]` type to store the data.
25pub trait GeneralizedHashTrieNode: Default {
26    // types that are the same in all nodes of the trie
27    /// Schema variadic: the schema of the relation stored in this trie.
28    type Schema: VariadicExt + Eq + Hash + Clone + SplitBySuffix<Self::SuffixSchema>;
29    /// The prefix of columns in [`Self::Schema`] that the trie is keyed on
30    type KeyType: VariadicExt + Eq + Hash + Clone;
31    /// The suffix of columns in [`Self::Schema`] that are not part of the trie keys
32    type ValType: VariadicExt + Eq + Hash + Clone;
33    /// The type that holds the data in the leaves
34    type Storage: VariadicCollection<Schema = Self::Schema>
35        + Default
36        + IntoIterator<Item = Self::Schema>;
37
38    // types that vary per node
39    /// SuffixSchema variadic: the suffix of [`Self::Schema`] from this node of the trie
40    /// downward. The first entry in this variadic is of type [`Self::Head`].
41    type SuffixSchema: VariadicExt + Eq + Hash + Clone;
42    /// The first field in [`Self::SuffixSchema`], and the key for the next node in the trie.
43    type Head: Eq + Hash + Clone;
44
45    /// Create a new Ght from the iterator.
46    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self;
47
48    /// Merge a matching Ght node into this node
49    fn merge_node(&mut self, other: Self) -> bool;
50
51    /// Report the height of this node. This is the length of path from this node to a leaf - 1.
52    /// E.g. if we have GhtInner<GhtInner<GhtLeaf...>> the height is 2
53    /// This is a static property of the type of this node, so simply invokes the static method.
54    fn height(&self) -> usize {
55        Self::HEIGHT
56    }
57
58    /// The height of this node in the GhT. Leaf = 0.
59    const HEIGHT: usize;
60
61    /// Inserts an item into the hash trie.
62    fn insert(&mut self, row: Self::Schema) -> bool;
63
64    /// Returns `true` if the (entire) row is found below in the trie, `false` otherwise.
65    /// See [`GhtGet::get`] to look just for "head" keys in this node
66    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool;
67
68    /// Iterate through (entire) rows stored in this HashTrie.
69    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
70
71    /// return the leaf below that contains this row, or `None` if not found.
72    fn find_containing_leaf(
73        &self,
74        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
75    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>>;
76
77    /// into_iter for leaf elements, or None for inner nodes
78    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>>;
79
80    /// pull all the data out of this trie node but retain the reference
81    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>>;
82}
83
84/// internal node of a HashTrie
85#[derive(Debug, Clone)]
86pub struct GhtInner<Head, Node>
87where
88    Head: Clone,
89    Node: GeneralizedHashTrieNode,
90{
91    pub(crate) children: HashMap<Head, Node>,
92}
93
94impl<Head, Node: GeneralizedHashTrieNode> Default for GhtInner<Head, Node>
95where
96    Head: Clone,
97    Node: GeneralizedHashTrieNode,
98{
99    fn default() -> Self {
100        let children = Default::default();
101        Self { children }
102    }
103}
104
105impl<Head, Node> GeneralizedHashTrieNode for GhtInner<Head, Node>
106where
107    Head: 'static + Hash + Eq + Clone,
108    Node: 'static + GeneralizedHashTrieNode,
109    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
110{
111    type Schema = Node::Schema;
112    type KeyType = Node::KeyType;
113    type ValType = Node::ValType;
114    type Storage = Node::Storage;
115    type SuffixSchema = var_type!(Head, ...Node::SuffixSchema);
116    type Head = Head;
117
118    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
119        let mut retval: Self = Default::default();
120        for row in input {
121            retval.insert(row);
122        }
123        retval
124    }
125
126    fn merge_node(&mut self, other: Self) -> bool {
127        let mut changed = false;
128
129        for (k, v) in other.children {
130            match self.children.entry(k) {
131                std::collections::hash_map::Entry::Occupied(mut occupied) => {
132                    changed |= occupied.get_mut().merge_node(v)
133                }
134                std::collections::hash_map::Entry::Vacant(vacant) => {
135                    vacant.insert(v);
136                    changed = true
137                }
138            }
139        }
140        changed
141    }
142
143    const HEIGHT: usize = Node::HEIGHT + 1;
144
145    fn insert(&mut self, row: Self::Schema) -> bool {
146        let (_prefix, var_args!(head, ..._rest)) =
147            Self::Schema::split_by_suffix_ref(row.as_ref_var());
148        self.children.entry(head.clone()).or_default().insert(row)
149    }
150
151    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
152        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
153        if let Some(node) = self.children.get(head) {
154            node.contains(row)
155        } else {
156            false
157        }
158    }
159
160    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
161        #[expect(
162            clippy::disallowed_methods,
163            reason = "nondeterministic iteration order, TODO(mingwei)"
164        )]
165        self.children.values().flat_map(|vs| vs.recursive_iter())
166    }
167
168    fn find_containing_leaf(
169        &self,
170        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
171    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>> {
172        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
173        self.children
174            .get(head)
175            .and_then(|child| child.find_containing_leaf(row))
176    }
177
178    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
179        None::<Box<dyn Iterator<Item = Self::Schema>>>
180    }
181
182    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
183        None::<Box<dyn Iterator<Item = Self::Schema>>>
184    }
185}
186
187impl<Head, Node> FromIterator<Node::Schema> for GhtInner<Head, Node>
188where
189    Head: 'static + Hash + Eq + Clone,
190    Node: 'static + GeneralizedHashTrieNode + Clone,
191    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
192{
193    fn from_iter<Iter: IntoIterator<Item = Node::Schema>>(iter: Iter) -> Self {
194        let mut out = Self::default();
195        for row in iter {
196            out.insert(row);
197        }
198        out
199    }
200}
201
202/// leaf node of a HashTrie
203#[derive(Debug, PartialEq, Eq, Clone)]
204pub struct GhtLeaf<Schema, ValType, Storage>
205where
206    Schema: Eq + Hash,
207    Storage: VariadicCollection<Schema = Schema>,
208{
209    pub(crate) elements: Storage,
210    pub(crate) forced: bool,
211    /// defines ValType for the parents, recursively
212    pub(crate) _suffix_schema: PhantomData<ValType>,
213}
214impl<Schema, ValType, Storage> Default for GhtLeaf<Schema, ValType, Storage>
215where
216    Schema: Eq + Hash,
217    Storage: VariadicCollection<Schema = Schema> + Default,
218{
219    fn default() -> Self {
220        let elements = Storage::default();
221        Self {
222            elements,
223            forced: false,
224            _suffix_schema: PhantomData,
225        }
226    }
227}
228
229impl<Schema, ValHead, ValRest, Storage> GeneralizedHashTrieNode
230    for GhtLeaf<Schema, var_type!(ValHead, ...ValRest), Storage>
231where
232    Schema: 'static
233        + Eq
234        + VariadicExt
235        + Hash
236        + Clone
237        + SplitBySuffix<var_type!(ValHead, ...ValRest)>
238        + PartialEqVariadic,
239    ValHead: Clone + Eq + Hash,
240    var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic,
241    <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix: Eq + Hash + Clone,
242    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
243{
244    type Schema = Schema;
245    type SuffixSchema = var_type!(ValHead, ...ValRest);
246    type ValType = var_type!(ValHead, ...ValRest);
247    type KeyType = <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix;
248    type Head = ValHead;
249    type Storage = Storage;
250
251    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
252        let mut retval: Self = Default::default();
253        for i in input {
254            retval.insert(i);
255        }
256        retval
257    }
258
259    fn merge_node(&mut self, other: Self) -> bool {
260        let old_len = self.elements.len();
261        self.elements.extend(other.elements);
262        self.elements.len() > old_len
263    }
264
265    const HEIGHT: usize = 0;
266
267    fn insert(&mut self, row: Self::Schema) -> bool {
268        self.elements.insert(row);
269        true
270    }
271
272    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
273        self.elements.iter().any(|r| Schema::eq_ref(r, row))
274    }
275
276    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
277        self.elements.iter()
278    }
279
280    fn find_containing_leaf(
281        &self,
282        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
283    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
284    {
285        // TODO(mingwei): actually use the hash set as a hash set
286        if self
287            .elements
288            .iter()
289            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
290        {
291            Some(self)
292        } else {
293            None
294        }
295    }
296
297    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
298        Some(self.elements.into_iter())
299    }
300
301    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
302        Some(self.elements.drain())
303    }
304}
305
306impl<Schema, Storage> GeneralizedHashTrieNode for GhtLeaf<Schema, (), Storage>
307where
308    Schema: 'static + Eq + VariadicExt + Hash + Clone + PartialEqVariadic,
309    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
310{
311    type Schema = Schema;
312    type SuffixSchema = ();
313    type ValType = ();
314    type KeyType = Schema;
315    type Head = ();
316    type Storage = Storage;
317
318    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
319        let mut retval: Self = Default::default();
320        for i in input {
321            retval.insert(i);
322        }
323        retval
324    }
325
326    fn merge_node(&mut self, other: Self) -> bool {
327        let old_len = self.elements.len();
328        self.elements.extend(other.elements);
329        self.elements.len() > old_len
330    }
331
332    const HEIGHT: usize = 0;
333
334    fn insert(&mut self, row: Self::Schema) -> bool {
335        self.elements.insert(row);
336        true
337    }
338
339    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
340        self.elements.iter().any(|r| Schema::eq_ref(r, row))
341    }
342
343    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
344        self.elements.iter()
345    }
346
347    fn find_containing_leaf(
348        &self,
349        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
350    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
351    {
352        // TODO(mingwei): actually use the hash set as a hash set
353        if self
354            .elements
355            .iter()
356            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
357        {
358            Some(self)
359        } else {
360            None
361        }
362    }
363
364    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
365        Some(self.elements.into_iter())
366    }
367
368    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
369        Some(self.elements.drain())
370    }
371}
372
373impl<Schema, ValType, Storage> FromIterator<Schema> for GhtLeaf<Schema, ValType, Storage>
374where
375    Schema: Eq + Hash,
376    Storage: VariadicCollection<Schema = Schema> + Default + FromIterator<Schema>,
377{
378    fn from_iter<Iter: IntoIterator<Item = Schema>>(iter: Iter) -> Self {
379        let elements = iter.into_iter().collect();
380        Self {
381            elements,
382            forced: false,
383            _suffix_schema: PhantomData,
384        }
385    }
386}
387
388/// A trait for the get and iter methods from Wang/Willsey/Suciu, which
389/// work differently on leaves than internal nodes
390pub trait GhtGet: GeneralizedHashTrieNode {
391    /// Type returned by [`Self::get`].
392    type Get: GeneralizedHashTrieNode<Schema = Self::Schema>;
393
394    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
395    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
396    /// On a Leaf node, returns None.
397    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get>;
398
399    /// get, but mutable output
400    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get>;
401
402    /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes).
403    fn iter(&self) -> impl Iterator<Item = Self::Head>;
404
405    /// Iterator for the tuples (from leaf nodes) or nothing (from inner nodes).
406    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
407}
408
409impl<Head, Node> GhtGet for GhtInner<Head, Node>
410where
411    Head: 'static + Eq + Hash + Clone,
412    Node: 'static + GeneralizedHashTrieNode,
413    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
414{
415    /// Type returned by [`Self::get`].
416    type Get = Node;
417
418    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
419    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
420    /// On a Leaf node, returns None.
421    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get> {
422        self.children.get(head)
423    }
424
425    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get> {
426        self.children.get_mut(head)
427    }
428
429    fn iter(&self) -> impl Iterator<Item = Self::Head> {
430        #[expect(
431            clippy::disallowed_methods,
432            reason = "nondeterministic iteration order, TODO(mingwei)"
433        )]
434        self.children.keys().cloned()
435    }
436
437    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
438        std::iter::empty()
439    }
440}
441
442impl<Schema, ValType, Storage> GhtGet for GhtLeaf<Schema, ValType, Storage>
443where
444    Schema: 'static + Eq + Hash + Clone + PartialEqVariadic + SplitBySuffix<ValType>,
445    ValType: Eq + Hash + Clone + PartialEqVariadic,
446    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
447    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema>,
448    Storage: VariadicCollection<Schema = Schema>,
449{
450    /// Type returned by [`Self::get`].
451    type Get = GhtLeaf<Schema, ValType, Storage>;
452
453    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
454    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
455    /// On a Leaf node, returns None.
456    fn get<'a>(&'a self, _head: &Self::Head) -> Option<&'a Self::Get> {
457        None
458    }
459    fn get_mut<'a>(&'a mut self, _head: &Self::Head) -> Option<&'a mut Self::Get> {
460        None
461    }
462
463    fn iter(&self) -> impl Iterator<Item = Self::Head> {
464        std::iter::empty()
465    }
466
467    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
468        self.elements.iter()
469    }
470}
471
472/// A trait to iterate through the items in a Ght based on a prefix of the schema.
473pub trait GhtPrefixIter<KeyPrefix> {
474    /// the schema output
475    type Item: VariadicExt;
476    /// given a prefix, return an iterator through the items below
477    fn prefix_iter<'a>(
478        &'a self,
479        prefix: KeyPrefix,
480    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
481    where
482        Self::Item: 'a;
483}
484
485impl<'k, Head, Node, PrefixRest> GhtPrefixIter<var_type!(&'k Head, ...PrefixRest)>
486    for GhtInner<Head, Node>
487where
488    Head: Eq + Hash + Clone,
489    Node: GeneralizedHashTrieNode + GhtPrefixIter<PrefixRest>,
490{
491    type Item = <Node as GhtPrefixIter<PrefixRest>>::Item;
492    fn prefix_iter<'a>(
493        &'a self,
494        prefix: var_type!(&'k Head, ...PrefixRest),
495    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
496    where
497        Self::Item: 'a,
498    {
499        let var_args!(head, ...rest) = prefix;
500        self.children
501            .get(head)
502            .map(|node| node.prefix_iter(rest))
503            .into_iter()
504            .flatten()
505    }
506}
507impl<Head, Node> GhtPrefixIter<var_type!()> for GhtInner<Head, Node>
508where
509    Self: GeneralizedHashTrieNode,
510    Head: Eq + Hash + Clone,
511    Node: GeneralizedHashTrieNode,
512{
513    type Item = <Self as GeneralizedHashTrieNode>::Schema;
514    fn prefix_iter<'a>(
515        &'a self,
516        _prefix: var_type!(),
517    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
518    where
519        Self::Item: 'a,
520    {
521        self.recursive_iter()
522    }
523}
524
525impl<KeyPrefixRef, Schema, ValType, Storage> GhtPrefixIter<KeyPrefixRef>
526    for GhtLeaf<Schema, ValType, Storage>
527where
528    KeyPrefixRef: 'static + RefVariadic,
529    Schema: 'static + VariadicExt + Hash + Eq + SplitBySuffix<ValType>,
530    ValType: VariadicExt,
531    ValType: Split<KeyPrefixRef::UnRefVar>,
532    KeyPrefixRef::UnRefVar: PartialEqVariadic,
533    Storage: 'static + VariadicCollection<Schema = Schema>,
534{
535    type Item = Schema;
536    fn prefix_iter<'a>(
537        &'a self,
538        prefix: KeyPrefixRef,
539    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
540    where
541        Self::Item: 'a,
542    {
543        self.elements.iter().filter(move |&row| {
544            let (_row_prefix, row_mid_suffix) =
545                <Schema as SplitBySuffix<ValType>>::split_by_suffix_ref(row);
546            let (row_mid, _row_suffix): (<KeyPrefixRef::UnRefVar as VariadicExt>::AsRefVar<'_>, _) =
547                <ValType as Split<KeyPrefixRef::UnRefVar>>::split_ref(row_mid_suffix);
548            <KeyPrefixRef::UnRefVar as PartialEqVariadic>::eq_ref(prefix.unref_ref(), row_mid)
549        })
550    }
551}