Skip to main content

lattices/ght/
mod.rs

1//! GHT from the Wang/Willsey/Suciu Freejoin work
2use std::collections::HashMap;
3use std::fmt::Debug;
4use std::hash::Hash;
5use std::marker::PhantomData;
6
7use variadics::variadic_collections::VariadicCollection;
8use variadics::{
9    PartialEqVariadic, RefVariadic, Split, SplitBySuffix, VariadicExt, var_args, var_type,
10};
11
12pub mod colt;
13pub mod lattice;
14pub mod macros;
15pub mod test;
16
17/// The GeneralizedHashTrieNode trait captures the properties of nodes in a Ght.
18///
19/// The Ght, defined by Wang/Willsey/Suciu, is a hash-based trie for storing tuples.
20/// It is parameterized by an ordered schema [`VariadicExt`] of the relation stored in the trie.
21/// It is a tree of [`GhtInner`] nodes, with [`GhtLeaf`] nodes at the leaves.
22/// The trie is keyed on a prefix of the schema [`Self::KeyType`],
23/// and the remaining columns [`Self::ValType`] are stored in the leaf.
24/// All leaf nodes use the same `[Self::Storage]` type to store the data.
25pub trait GeneralizedHashTrieNode: Default {
26    // types that are the same in all nodes of the trie
27    /// Schema variadic: the schema of the relation stored in this trie.
28    type Schema: VariadicExt + Eq + Hash + Clone + SplitBySuffix<Self::SuffixSchema>;
29    /// The prefix of columns in [`Self::Schema`] that the trie is keyed on
30    type KeyType: VariadicExt + Eq + Hash + Clone;
31    /// The suffix of columns in [`Self::Schema`] that are not part of the trie keys
32    type ValType: VariadicExt + Eq + Hash + Clone;
33    /// The type that holds the data in the leaves
34    type Storage: VariadicCollection<Schema = Self::Schema>
35        + Default
36        + IntoIterator<Item = Self::Schema>;
37
38    // types that vary per node
39    /// SuffixSchema variadic: the suffix of [`Self::Schema`] from this node of the trie
40    /// downward. The first entry in this variadic is of type [`Self::Head`].
41    type SuffixSchema: VariadicExt + Eq + Hash + Clone;
42    /// The first field in [`Self::SuffixSchema`], and the key for the next node in the trie.
43    type Head: Eq + Hash + Clone;
44
45    /// Create a new Ght from the iterator.
46    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self;
47
48    /// Merge a matching Ght node into this node
49    fn merge_node(&mut self, other: Self) -> bool;
50
51    /// Report the height of this node. This is the length of path from this node to a leaf - 1.
52    /// E.g. if we have GhtInner<GhtInner<GhtLeaf...>> the height is 2
53    /// This is a static property of the type of this node, so simply invokes the static method.
54    fn height(&self) -> usize {
55        Self::HEIGHT
56    }
57
58    /// The height of this node in the GhT. Leaf = 0.
59    const HEIGHT: usize;
60
61    /// Inserts an item into the hash trie.
62    fn insert(&mut self, row: Self::Schema) -> bool;
63
64    /// Returns `true` if the (entire) row is found below in the trie, `false` otherwise.
65    /// See [`GhtGet::get`] to look just for "head" keys in this node
66    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool;
67
68    /// Iterate through (entire) rows stored in this HashTrie.
69    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
70
71    /// return the leaf below that contains this row, or `None` if not found.
72    fn find_containing_leaf(
73        &self,
74        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
75    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>>;
76
77    /// into_iter for leaf elements, or None for inner nodes
78    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>>;
79
80    /// pull all the data out of this trie node but retain the reference
81    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>>;
82}
83
84/// internal node of a HashTrie
85#[derive(Debug, Clone)]
86pub struct GhtInner<Head, Node>
87where
88    Head: Clone,
89    Node: GeneralizedHashTrieNode,
90{
91    pub(crate) children: HashMap<Head, Node>,
92}
93
94impl<Head, Node: GeneralizedHashTrieNode> Default for GhtInner<Head, Node>
95where
96    Head: Clone,
97    Node: GeneralizedHashTrieNode,
98{
99    fn default() -> Self {
100        let children = Default::default();
101        Self { children }
102    }
103}
104
105impl<Head, Node> GeneralizedHashTrieNode for GhtInner<Head, Node>
106where
107    Head: 'static + Hash + Eq + Clone,
108    Node: 'static + GeneralizedHashTrieNode,
109    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
110{
111    type Schema = Node::Schema;
112    type KeyType = Node::KeyType;
113    type ValType = Node::ValType;
114    type Storage = Node::Storage;
115    type SuffixSchema = var_type!(Head, ...Node::SuffixSchema);
116    type Head = Head;
117
118    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
119        let mut retval: Self = Default::default();
120        for row in input {
121            retval.insert(row);
122        }
123        retval
124    }
125
126    fn merge_node(&mut self, other: Self) -> bool {
127        let mut changed = false;
128
129        for (k, v) in other.children {
130            match self.children.entry(k) {
131                std::collections::hash_map::Entry::Occupied(mut occupied) => {
132                    changed |= occupied.get_mut().merge_node(v)
133                }
134                std::collections::hash_map::Entry::Vacant(vacant) => {
135                    vacant.insert(v);
136                    changed = true
137                }
138            }
139        }
140        changed
141    }
142
143    const HEIGHT: usize = Node::HEIGHT + 1;
144
145    fn insert(&mut self, row: Self::Schema) -> bool {
146        let (_prefix, var_args!(head, ..._rest)) =
147            Self::Schema::split_by_suffix_ref(row.as_ref_var());
148        self.children.entry(head.clone()).or_default().insert(row)
149    }
150
151    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
152        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
153        if let Some(node) = self.children.get(head) {
154            node.contains(row)
155        } else {
156            false
157        }
158    }
159
160    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
161        #[expect(
162            clippy::disallowed_methods,
163            reason = "nondeterministic iteration order, TODO(mingwei)"
164        )]
165        self.children
166            .iter()
167            .flat_map(|(_k, vs)| vs.recursive_iter())
168    }
169
170    fn find_containing_leaf(
171        &self,
172        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
173    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>> {
174        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
175        self.children
176            .get(head)
177            .and_then(|child| child.find_containing_leaf(row))
178    }
179
180    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
181        None::<Box<dyn Iterator<Item = Self::Schema>>>
182    }
183
184    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
185        None::<Box<dyn Iterator<Item = Self::Schema>>>
186    }
187}
188
189impl<Head, Node> FromIterator<Node::Schema> for GhtInner<Head, Node>
190where
191    Head: 'static + Hash + Eq + Clone,
192    Node: 'static + GeneralizedHashTrieNode + Clone,
193    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
194{
195    fn from_iter<Iter: IntoIterator<Item = Node::Schema>>(iter: Iter) -> Self {
196        let mut out = Self::default();
197        for row in iter {
198            out.insert(row);
199        }
200        out
201    }
202}
203
204/// leaf node of a HashTrie
205#[derive(Debug, PartialEq, Eq, Clone)]
206pub struct GhtLeaf<Schema, ValType, Storage>
207where
208    Schema: Eq + Hash,
209    Storage: VariadicCollection<Schema = Schema>,
210{
211    pub(crate) elements: Storage,
212    pub(crate) forced: bool,
213    /// defines ValType for the parents, recursively
214    pub(crate) _suffix_schema: PhantomData<ValType>,
215}
216impl<Schema, ValType, Storage> Default for GhtLeaf<Schema, ValType, Storage>
217where
218    Schema: Eq + Hash,
219    Storage: VariadicCollection<Schema = Schema> + Default,
220{
221    fn default() -> Self {
222        let elements = Storage::default();
223        Self {
224            elements,
225            forced: false,
226            _suffix_schema: PhantomData,
227        }
228    }
229}
230
231impl<Schema, ValHead, ValRest, Storage> GeneralizedHashTrieNode
232    for GhtLeaf<Schema, var_type!(ValHead, ...ValRest), Storage>
233where
234    Schema: 'static
235        + Eq
236        + VariadicExt
237        + Hash
238        + Clone
239        + SplitBySuffix<var_type!(ValHead, ...ValRest)>
240        + PartialEqVariadic,
241    ValHead: Clone + Eq + Hash,
242    var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic,
243    <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix: Eq + Hash + Clone,
244    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
245{
246    type Schema = Schema;
247    type SuffixSchema = var_type!(ValHead, ...ValRest);
248    type ValType = var_type!(ValHead, ...ValRest);
249    type KeyType = <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix;
250    type Head = ValHead;
251    type Storage = Storage;
252
253    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
254        let mut retval: Self = Default::default();
255        for i in input {
256            retval.insert(i);
257        }
258        retval
259    }
260
261    fn merge_node(&mut self, other: Self) -> bool {
262        let old_len = self.elements.len();
263        self.elements.extend(other.elements);
264        self.elements.len() > old_len
265    }
266
267    const HEIGHT: usize = 0;
268
269    fn insert(&mut self, row: Self::Schema) -> bool {
270        self.elements.insert(row);
271        true
272    }
273
274    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
275        self.elements.iter().any(|r| Schema::eq_ref(r, row))
276    }
277
278    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
279        self.elements.iter()
280    }
281
282    fn find_containing_leaf(
283        &self,
284        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
285    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
286    {
287        // TODO(mingwei): actually use the hash set as a hash set
288        if self
289            .elements
290            .iter()
291            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
292        {
293            Some(self)
294        } else {
295            None
296        }
297    }
298
299    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
300        Some(self.elements.into_iter())
301    }
302
303    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
304        Some(self.elements.drain())
305    }
306}
307
308impl<Schema, Storage> GeneralizedHashTrieNode for GhtLeaf<Schema, (), Storage>
309where
310    Schema: 'static + Eq + VariadicExt + Hash + Clone + PartialEqVariadic,
311    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
312{
313    type Schema = Schema;
314    type SuffixSchema = ();
315    type ValType = ();
316    type KeyType = Schema;
317    type Head = ();
318    type Storage = Storage;
319
320    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
321        let mut retval: Self = Default::default();
322        for i in input {
323            retval.insert(i);
324        }
325        retval
326    }
327
328    fn merge_node(&mut self, other: Self) -> bool {
329        let old_len = self.elements.len();
330        self.elements.extend(other.elements);
331        self.elements.len() > old_len
332    }
333
334    const HEIGHT: usize = 0;
335
336    fn insert(&mut self, row: Self::Schema) -> bool {
337        self.elements.insert(row);
338        true
339    }
340
341    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
342        self.elements.iter().any(|r| Schema::eq_ref(r, row))
343    }
344
345    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
346        self.elements.iter()
347    }
348
349    fn find_containing_leaf(
350        &self,
351        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
352    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
353    {
354        // TODO(mingwei): actually use the hash set as a hash set
355        if self
356            .elements
357            .iter()
358            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
359        {
360            Some(self)
361        } else {
362            None
363        }
364    }
365
366    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
367        Some(self.elements.into_iter())
368    }
369
370    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
371        Some(self.elements.drain())
372    }
373}
374
375impl<Schema, ValType, Storage> FromIterator<Schema> for GhtLeaf<Schema, ValType, Storage>
376where
377    Schema: Eq + Hash,
378    Storage: VariadicCollection<Schema = Schema> + Default + FromIterator<Schema>,
379{
380    fn from_iter<Iter: IntoIterator<Item = Schema>>(iter: Iter) -> Self {
381        let elements = iter.into_iter().collect();
382        Self {
383            elements,
384            forced: false,
385            _suffix_schema: PhantomData,
386        }
387    }
388}
389
390/// A trait for the get and iter methods from Wang/Willsey/Suciu, which
391/// work differently on leaves than internal nodes
392pub trait GhtGet: GeneralizedHashTrieNode {
393    /// Type returned by [`Self::get`].
394    type Get: GeneralizedHashTrieNode<Schema = Self::Schema>;
395
396    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
397    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
398    /// On a Leaf node, returns None.
399    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get>;
400
401    /// get, but mutable output
402    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get>;
403
404    /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes).
405    fn iter(&self) -> impl Iterator<Item = Self::Head>;
406
407    /// Iterator for the tuples (from leaf nodes) or nothing (from inner nodes).
408    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
409}
410
411impl<Head, Node> GhtGet for GhtInner<Head, Node>
412where
413    Head: 'static + Eq + Hash + Clone,
414    Node: 'static + GeneralizedHashTrieNode,
415    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
416{
417    /// Type returned by [`Self::get`].
418    type Get = Node;
419
420    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
421    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
422    /// On a Leaf node, returns None.
423    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get> {
424        self.children.get(head)
425    }
426
427    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get> {
428        self.children.get_mut(head)
429    }
430
431    fn iter(&self) -> impl Iterator<Item = Self::Head> {
432        #[expect(
433            clippy::disallowed_methods,
434            reason = "nondeterministic iteration order, TODO(mingwei)"
435        )]
436        self.children.keys().cloned()
437    }
438
439    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
440        std::iter::empty()
441    }
442}
443
444impl<Schema, ValType, Storage> GhtGet for GhtLeaf<Schema, ValType, Storage>
445where
446    Schema: 'static + Eq + Hash + Clone + PartialEqVariadic + SplitBySuffix<ValType>,
447    ValType: Eq + Hash + Clone + PartialEqVariadic,
448    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
449    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema>,
450    Storage: VariadicCollection<Schema = Schema>,
451{
452    /// Type returned by [`Self::get`].
453    type Get = GhtLeaf<Schema, ValType, Storage>;
454
455    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
456    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
457    /// On a Leaf node, returns None.
458    fn get<'a>(&'a self, _head: &Self::Head) -> Option<&'a Self::Get> {
459        None
460    }
461    fn get_mut<'a>(&'a mut self, _head: &Self::Head) -> Option<&'a mut Self::Get> {
462        None
463    }
464
465    fn iter(&self) -> impl Iterator<Item = Self::Head> {
466        std::iter::empty()
467    }
468
469    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
470        self.elements.iter()
471    }
472}
473
474/// A trait to iterate through the items in a Ght based on a prefix of the schema.
475pub trait GhtPrefixIter<KeyPrefix> {
476    /// the schema output
477    type Item: VariadicExt;
478    /// given a prefix, return an iterator through the items below
479    fn prefix_iter<'a>(
480        &'a self,
481        prefix: KeyPrefix,
482    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
483    where
484        Self::Item: 'a;
485}
486
487impl<'k, Head, Node, PrefixRest> GhtPrefixIter<var_type!(&'k Head, ...PrefixRest)>
488    for GhtInner<Head, Node>
489where
490    Head: Eq + Hash + Clone,
491    Node: GeneralizedHashTrieNode + GhtPrefixIter<PrefixRest>,
492{
493    type Item = <Node as GhtPrefixIter<PrefixRest>>::Item;
494    fn prefix_iter<'a>(
495        &'a self,
496        prefix: var_type!(&'k Head, ...PrefixRest),
497    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
498    where
499        Self::Item: 'a,
500    {
501        let var_args!(head, ...rest) = prefix;
502        self.children
503            .get(head)
504            .map(|node| node.prefix_iter(rest))
505            .into_iter()
506            .flatten()
507    }
508}
509impl<Head, Node> GhtPrefixIter<var_type!()> for GhtInner<Head, Node>
510where
511    Self: GeneralizedHashTrieNode,
512    Head: Eq + Hash + Clone,
513    Node: GeneralizedHashTrieNode,
514{
515    type Item = <Self as GeneralizedHashTrieNode>::Schema;
516    fn prefix_iter<'a>(
517        &'a self,
518        _prefix: var_type!(),
519    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
520    where
521        Self::Item: 'a,
522    {
523        self.recursive_iter()
524    }
525}
526
527impl<KeyPrefixRef, Schema, ValType, Storage> GhtPrefixIter<KeyPrefixRef>
528    for GhtLeaf<Schema, ValType, Storage>
529where
530    KeyPrefixRef: 'static + RefVariadic,
531    Schema: 'static + VariadicExt + Hash + Eq + SplitBySuffix<ValType>,
532    ValType: VariadicExt,
533    ValType: Split<KeyPrefixRef::UnRefVar>,
534    KeyPrefixRef::UnRefVar: PartialEqVariadic,
535    Storage: 'static + VariadicCollection<Schema = Schema>,
536{
537    type Item = Schema;
538    fn prefix_iter<'a>(
539        &'a self,
540        prefix: KeyPrefixRef,
541    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
542    where
543        Self::Item: 'a,
544    {
545        self.elements.iter().filter(move |&row| {
546            let (_row_prefix, row_mid_suffix) =
547                <Schema as SplitBySuffix<ValType>>::split_by_suffix_ref(row);
548            let (row_mid, _row_suffix): (<KeyPrefixRef::UnRefVar as VariadicExt>::AsRefVar<'_>, _) =
549                <ValType as Split<KeyPrefixRef::UnRefVar>>::split_ref(row_mid_suffix);
550            <KeyPrefixRef::UnRefVar as PartialEqVariadic>::eq_ref(prefix.unref_ref(), row_mid)
551        })
552    }
553}