Skip to main content

lattices/ght/
mod.rs

1//! GHT from the Wang/Willsey/Suciu Freejoin work
2use std::boxed::Box;
3use std::collections::HashMap;
4use std::fmt::Debug;
5use std::hash::Hash;
6use std::marker::PhantomData;
7
8use variadics::variadic_collections::VariadicCollection;
9use variadics::{
10    PartialEqVariadic, RefVariadic, Split, SplitBySuffix, VariadicExt, var_args, var_type,
11};
12
13pub mod colt;
14pub mod lattice;
15pub mod macros;
16pub mod test;
17
18/// The GeneralizedHashTrieNode trait captures the properties of nodes in a Ght.
19///
20/// The Ght, defined by Wang/Willsey/Suciu, is a hash-based trie for storing tuples.
21/// It is parameterized by an ordered schema [`VariadicExt`] of the relation stored in the trie.
22/// It is a tree of [`GhtInner`] nodes, with [`GhtLeaf`] nodes at the leaves.
23/// The trie is keyed on a prefix of the schema [`Self::KeyType`],
24/// and the remaining columns [`Self::ValType`] are stored in the leaf.
25/// All leaf nodes use the same `[Self::Storage]` type to store the data.
26pub trait GeneralizedHashTrieNode: Default {
27    // types that are the same in all nodes of the trie
28    /// Schema variadic: the schema of the relation stored in this trie.
29    type Schema: VariadicExt + Eq + Hash + Clone + SplitBySuffix<Self::SuffixSchema>;
30    /// The prefix of columns in [`Self::Schema`] that the trie is keyed on
31    type KeyType: VariadicExt + Eq + Hash + Clone;
32    /// The suffix of columns in [`Self::Schema`] that are not part of the trie keys
33    type ValType: VariadicExt + Eq + Hash + Clone;
34    /// The type that holds the data in the leaves
35    type Storage: VariadicCollection<Schema = Self::Schema>
36        + Default
37        + IntoIterator<Item = Self::Schema>;
38
39    // types that vary per node
40    /// SuffixSchema variadic: the suffix of [`Self::Schema`] from this node of the trie
41    /// downward. The first entry in this variadic is of type [`Self::Head`].
42    type SuffixSchema: VariadicExt + Eq + Hash + Clone;
43    /// The first field in [`Self::SuffixSchema`], and the key for the next node in the trie.
44    type Head: Eq + Hash + Clone;
45
46    /// Create a new Ght from the iterator.
47    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self;
48
49    /// Merge a matching Ght node into this node
50    fn merge_node(&mut self, other: Self) -> bool;
51
52    /// Report the height of this node. This is the length of path from this node to a leaf - 1.
53    /// E.g. if we have GhtInner<GhtInner<GhtLeaf...>> the height is 2
54    /// This is a static property of the type of this node, so simply invokes the static method.
55    fn height(&self) -> usize {
56        Self::HEIGHT
57    }
58
59    /// The height of this node in the GhT. Leaf = 0.
60    const HEIGHT: usize;
61
62    /// Inserts an item into the hash trie.
63    fn insert(&mut self, row: Self::Schema) -> bool;
64
65    /// Returns `true` if the (entire) row is found below in the trie, `false` otherwise.
66    /// See [`GhtGet::get`] to look just for "head" keys in this node
67    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool;
68
69    /// Iterate through (entire) rows stored in this HashTrie.
70    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
71
72    /// return the leaf below that contains this row, or `None` if not found.
73    fn find_containing_leaf(
74        &self,
75        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
76    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>>;
77
78    /// into_iter for leaf elements, or None for inner nodes
79    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>>;
80
81    /// pull all the data out of this trie node but retain the reference
82    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>>;
83}
84
85/// internal node of a HashTrie
86#[derive(Debug, Clone)]
87pub struct GhtInner<Head, Node>
88where
89    Head: Clone,
90    Node: GeneralizedHashTrieNode,
91{
92    pub(crate) children: HashMap<Head, Node>,
93}
94
95impl<Head, Node: GeneralizedHashTrieNode> Default for GhtInner<Head, Node>
96where
97    Head: Clone,
98    Node: GeneralizedHashTrieNode,
99{
100    fn default() -> Self {
101        let children = Default::default();
102        Self { children }
103    }
104}
105
106impl<Head, Node> GeneralizedHashTrieNode for GhtInner<Head, Node>
107where
108    Head: 'static + Hash + Eq + Clone,
109    Node: 'static + GeneralizedHashTrieNode,
110    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
111{
112    type Schema = Node::Schema;
113    type KeyType = Node::KeyType;
114    type ValType = Node::ValType;
115    type Storage = Node::Storage;
116    type SuffixSchema = var_type!(Head, ...Node::SuffixSchema);
117    type Head = Head;
118
119    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
120        let mut retval: Self = Default::default();
121        for row in input {
122            retval.insert(row);
123        }
124        retval
125    }
126
127    fn merge_node(&mut self, other: Self) -> bool {
128        let mut changed = false;
129
130        for (k, v) in other.children {
131            match self.children.entry(k) {
132                std::collections::hash_map::Entry::Occupied(mut occupied) => {
133                    changed |= occupied.get_mut().merge_node(v)
134                }
135                std::collections::hash_map::Entry::Vacant(vacant) => {
136                    vacant.insert(v);
137                    changed = true
138                }
139            }
140        }
141        changed
142    }
143
144    const HEIGHT: usize = Node::HEIGHT + 1;
145
146    fn insert(&mut self, row: Self::Schema) -> bool {
147        let (_prefix, var_args!(head, ..._rest)) =
148            Self::Schema::split_by_suffix_ref(row.as_ref_var());
149        self.children.entry(head.clone()).or_default().insert(row)
150    }
151
152    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
153        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
154        if let Some(node) = self.children.get(head) {
155            node.contains(row)
156        } else {
157            false
158        }
159    }
160
161    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
162        #[expect(
163            clippy::disallowed_methods,
164            reason = "nondeterministic iteration order, TODO(mingwei)"
165        )]
166        self.children.values().flat_map(|vs| vs.recursive_iter())
167    }
168
169    fn find_containing_leaf(
170        &self,
171        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
172    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>> {
173        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
174        self.children
175            .get(head)
176            .and_then(|child| child.find_containing_leaf(row))
177    }
178
179    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
180        None::<Box<dyn Iterator<Item = Self::Schema>>>
181    }
182
183    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
184        None::<Box<dyn Iterator<Item = Self::Schema>>>
185    }
186}
187
188impl<Head, Node> FromIterator<Node::Schema> for GhtInner<Head, Node>
189where
190    Head: 'static + Hash + Eq + Clone,
191    Node: 'static + GeneralizedHashTrieNode + Clone,
192    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
193{
194    fn from_iter<Iter: IntoIterator<Item = Node::Schema>>(iter: Iter) -> Self {
195        let mut out = Self::default();
196        for row in iter {
197            out.insert(row);
198        }
199        out
200    }
201}
202
203/// leaf node of a HashTrie
204#[derive(Debug, PartialEq, Eq, Clone)]
205pub struct GhtLeaf<Schema, ValType, Storage>
206where
207    Schema: Eq + Hash,
208    Storage: VariadicCollection<Schema = Schema>,
209{
210    pub(crate) elements: Storage,
211    pub(crate) forced: bool,
212    /// defines ValType for the parents, recursively
213    pub(crate) _suffix_schema: PhantomData<ValType>,
214}
215impl<Schema, ValType, Storage> Default for GhtLeaf<Schema, ValType, Storage>
216where
217    Schema: Eq + Hash,
218    Storage: VariadicCollection<Schema = Schema> + Default,
219{
220    fn default() -> Self {
221        let elements = Storage::default();
222        Self {
223            elements,
224            forced: false,
225            _suffix_schema: PhantomData,
226        }
227    }
228}
229
230impl<Schema, ValHead, ValRest, Storage> GeneralizedHashTrieNode
231    for GhtLeaf<Schema, var_type!(ValHead, ...ValRest), Storage>
232where
233    Schema: 'static
234        + Eq
235        + VariadicExt
236        + Hash
237        + Clone
238        + SplitBySuffix<var_type!(ValHead, ...ValRest)>
239        + PartialEqVariadic,
240    ValHead: Clone + Eq + Hash,
241    var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic,
242    <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix: Eq + Hash + Clone,
243    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
244{
245    type Schema = Schema;
246    type SuffixSchema = var_type!(ValHead, ...ValRest);
247    type ValType = var_type!(ValHead, ...ValRest);
248    type KeyType = <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix;
249    type Head = ValHead;
250    type Storage = Storage;
251
252    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
253        let mut retval: Self = Default::default();
254        for i in input {
255            retval.insert(i);
256        }
257        retval
258    }
259
260    fn merge_node(&mut self, other: Self) -> bool {
261        let old_len = self.elements.len();
262        self.elements.extend(other.elements);
263        self.elements.len() > old_len
264    }
265
266    const HEIGHT: usize = 0;
267
268    fn insert(&mut self, row: Self::Schema) -> bool {
269        self.elements.insert(row);
270        true
271    }
272
273    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
274        self.elements.iter().any(|r| Schema::eq_ref(r, row))
275    }
276
277    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
278        self.elements.iter()
279    }
280
281    fn find_containing_leaf(
282        &self,
283        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
284    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
285    {
286        // TODO(mingwei): actually use the hash set as a hash set
287        if self
288            .elements
289            .iter()
290            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
291        {
292            Some(self)
293        } else {
294            None
295        }
296    }
297
298    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
299        Some(self.elements.into_iter())
300    }
301
302    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
303        Some(self.elements.drain())
304    }
305}
306
307impl<Schema, Storage> GeneralizedHashTrieNode for GhtLeaf<Schema, (), Storage>
308where
309    Schema: 'static + Eq + VariadicExt + Hash + Clone + PartialEqVariadic,
310    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
311{
312    type Schema = Schema;
313    type SuffixSchema = ();
314    type ValType = ();
315    type KeyType = Schema;
316    type Head = ();
317    type Storage = Storage;
318
319    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
320        let mut retval: Self = Default::default();
321        for i in input {
322            retval.insert(i);
323        }
324        retval
325    }
326
327    fn merge_node(&mut self, other: Self) -> bool {
328        let old_len = self.elements.len();
329        self.elements.extend(other.elements);
330        self.elements.len() > old_len
331    }
332
333    const HEIGHT: usize = 0;
334
335    fn insert(&mut self, row: Self::Schema) -> bool {
336        self.elements.insert(row);
337        true
338    }
339
340    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
341        self.elements.iter().any(|r| Schema::eq_ref(r, row))
342    }
343
344    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
345        self.elements.iter()
346    }
347
348    fn find_containing_leaf(
349        &self,
350        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
351    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
352    {
353        // TODO(mingwei): actually use the hash set as a hash set
354        if self
355            .elements
356            .iter()
357            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
358        {
359            Some(self)
360        } else {
361            None
362        }
363    }
364
365    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
366        Some(self.elements.into_iter())
367    }
368
369    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
370        Some(self.elements.drain())
371    }
372}
373
374impl<Schema, ValType, Storage> FromIterator<Schema> for GhtLeaf<Schema, ValType, Storage>
375where
376    Schema: Eq + Hash,
377    Storage: VariadicCollection<Schema = Schema> + Default + FromIterator<Schema>,
378{
379    fn from_iter<Iter: IntoIterator<Item = Schema>>(iter: Iter) -> Self {
380        let elements = iter.into_iter().collect();
381        Self {
382            elements,
383            forced: false,
384            _suffix_schema: PhantomData,
385        }
386    }
387}
388
389/// A trait for the get and iter methods from Wang/Willsey/Suciu, which
390/// work differently on leaves than internal nodes
391pub trait GhtGet: GeneralizedHashTrieNode {
392    /// Type returned by [`Self::get`].
393    type Get: GeneralizedHashTrieNode<Schema = Self::Schema>;
394
395    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
396    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
397    /// On a Leaf node, returns None.
398    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get>;
399
400    /// get, but mutable output
401    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get>;
402
403    /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes).
404    fn iter(&self) -> impl Iterator<Item = Self::Head>;
405
406    /// Iterator for the tuples (from leaf nodes) or nothing (from inner nodes).
407    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
408}
409
410impl<Head, Node> GhtGet for GhtInner<Head, Node>
411where
412    Head: 'static + Eq + Hash + Clone,
413    Node: 'static + GeneralizedHashTrieNode,
414    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
415{
416    /// Type returned by [`Self::get`].
417    type Get = Node;
418
419    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
420    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
421    /// On a Leaf node, returns None.
422    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get> {
423        self.children.get(head)
424    }
425
426    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get> {
427        self.children.get_mut(head)
428    }
429
430    fn iter(&self) -> impl Iterator<Item = Self::Head> {
431        #[expect(
432            clippy::disallowed_methods,
433            reason = "nondeterministic iteration order, TODO(mingwei)"
434        )]
435        self.children.keys().cloned()
436    }
437
438    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
439        std::iter::empty()
440    }
441}
442
443impl<Schema, ValType, Storage> GhtGet for GhtLeaf<Schema, ValType, Storage>
444where
445    Schema: 'static + Eq + Hash + Clone + PartialEqVariadic + SplitBySuffix<ValType>,
446    ValType: Eq + Hash + Clone + PartialEqVariadic,
447    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
448    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema>,
449    Storage: VariadicCollection<Schema = Schema>,
450{
451    /// Type returned by [`Self::get`].
452    type Get = GhtLeaf<Schema, ValType, Storage>;
453
454    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
455    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
456    /// On a Leaf node, returns None.
457    fn get<'a>(&'a self, _head: &Self::Head) -> Option<&'a Self::Get> {
458        None
459    }
460    fn get_mut<'a>(&'a mut self, _head: &Self::Head) -> Option<&'a mut Self::Get> {
461        None
462    }
463
464    fn iter(&self) -> impl Iterator<Item = Self::Head> {
465        std::iter::empty()
466    }
467
468    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
469        self.elements.iter()
470    }
471}
472
473/// A trait to iterate through the items in a Ght based on a prefix of the schema.
474pub trait GhtPrefixIter<KeyPrefix> {
475    /// the schema output
476    type Item: VariadicExt;
477    /// given a prefix, return an iterator through the items below
478    fn prefix_iter<'a>(
479        &'a self,
480        prefix: KeyPrefix,
481    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
482    where
483        Self::Item: 'a;
484}
485
486impl<'k, Head, Node, PrefixRest> GhtPrefixIter<var_type!(&'k Head, ...PrefixRest)>
487    for GhtInner<Head, Node>
488where
489    Head: Eq + Hash + Clone,
490    Node: GeneralizedHashTrieNode + GhtPrefixIter<PrefixRest>,
491{
492    type Item = <Node as GhtPrefixIter<PrefixRest>>::Item;
493    fn prefix_iter<'a>(
494        &'a self,
495        prefix: var_type!(&'k Head, ...PrefixRest),
496    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
497    where
498        Self::Item: 'a,
499    {
500        let var_args!(head, ...rest) = prefix;
501        self.children
502            .get(head)
503            .map(|node| node.prefix_iter(rest))
504            .into_iter()
505            .flatten()
506    }
507}
508impl<Head, Node> GhtPrefixIter<var_type!()> for GhtInner<Head, Node>
509where
510    Self: GeneralizedHashTrieNode,
511    Head: Eq + Hash + Clone,
512    Node: GeneralizedHashTrieNode,
513{
514    type Item = <Self as GeneralizedHashTrieNode>::Schema;
515    fn prefix_iter<'a>(
516        &'a self,
517        _prefix: var_type!(),
518    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
519    where
520        Self::Item: 'a,
521    {
522        self.recursive_iter()
523    }
524}
525
526impl<KeyPrefixRef, Schema, ValType, Storage> GhtPrefixIter<KeyPrefixRef>
527    for GhtLeaf<Schema, ValType, Storage>
528where
529    KeyPrefixRef: 'static + RefVariadic,
530    Schema: 'static + VariadicExt + Hash + Eq + SplitBySuffix<ValType>,
531    ValType: VariadicExt,
532    ValType: Split<KeyPrefixRef::UnRefVar>,
533    KeyPrefixRef::UnRefVar: PartialEqVariadic,
534    Storage: 'static + VariadicCollection<Schema = Schema>,
535{
536    type Item = Schema;
537    fn prefix_iter<'a>(
538        &'a self,
539        prefix: KeyPrefixRef,
540    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
541    where
542        Self::Item: 'a,
543    {
544        self.elements.iter().filter(move |&row| {
545            let (_row_prefix, row_mid_suffix) =
546                <Schema as SplitBySuffix<ValType>>::split_by_suffix_ref(row);
547            let (row_mid, _row_suffix): (<KeyPrefixRef::UnRefVar as VariadicExt>::AsRefVar<'_>, _) =
548                <ValType as Split<KeyPrefixRef::UnRefVar>>::split_ref(row_mid_suffix);
549            <KeyPrefixRef::UnRefVar as PartialEqVariadic>::eq_ref(prefix.unref_ref(), row_mid)
550        })
551    }
552}