lattices/ght/
mod.rs

1//! GHT from the Wang/Willsey/Suciu Freejoin work
2use std::collections::HashMap;
3use std::fmt::Debug;
4use std::hash::Hash;
5use std::marker::PhantomData;
6
7use variadics::variadic_collections::VariadicCollection;
8use variadics::{
9    PartialEqVariadic, RefVariadic, Split, SplitBySuffix, VariadicExt, var_args, var_type,
10};
11
12pub mod colt;
13pub mod lattice;
14pub mod macros;
15pub mod test;
16
17/// The GeneralizedHashTrieNode trait captures the properties of nodes in a Ght.
18///
19/// The Ght, defined by Wang/Willsey/Suciu, is a hash-based trie for storing tuples.
20/// It is parameterized by an ordered schema [`VariadicExt`] of the relation stored in the trie.
21/// It is a tree of [`GhtInner`] nodes, with [`GhtLeaf`] nodes at the leaves.
22/// The trie is keyed on a prefix of the schema [`Self::KeyType`],
23/// and the remaining columns [`Self::ValType`] are stored in the leaf.
24/// All leaf nodes use the same `[Self::Storage]` type to store the data.
25pub trait GeneralizedHashTrieNode: Default {
26    // types that are the same in all nodes of the trie
27    /// Schema variadic: the schema of the relation stored in this trie.
28    type Schema: VariadicExt + Eq + Hash + Clone + SplitBySuffix<Self::SuffixSchema>;
29    /// The prefix of columns in [`Self::Schema`] that the trie is keyed on
30    type KeyType: VariadicExt + Eq + Hash + Clone;
31    /// The suffix of columns in [`Self::Schema`] that are not part of the trie keys
32    type ValType: VariadicExt + Eq + Hash + Clone;
33    /// The type that holds the data in the leaves
34    type Storage: VariadicCollection<Schema = Self::Schema>
35        + Default
36        + IntoIterator<Item = Self::Schema>;
37
38    // types that vary per node
39    /// SuffixSchema variadic: the suffix of [`Self::Schema`] from this node of the trie
40    /// downward. The first entry in this variadic is of type [`Self::Head`].
41    type SuffixSchema: VariadicExt + Eq + Hash + Clone;
42    /// The first field in [`Self::SuffixSchema`], and the key for the next node in the trie.
43    type Head: Eq + Hash + Clone;
44
45    /// Create a new Ght from the iterator.
46    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self;
47
48    /// Merge a matching Ght node into this node
49    fn merge_node(&mut self, other: Self) -> bool;
50
51    /// Report the height of this node. This is the length of path from this node to a leaf - 1.
52    /// E.g. if we have GhtInner<GhtInner<GhtLeaf...>> the height is 2
53    /// This is a static property of the type of this node, so simply invokes the static method.
54    fn height(&self) -> usize {
55        Self::HEIGHT
56    }
57
58    /// The height of this node in the GhT. Leaf = 0.
59    const HEIGHT: usize;
60
61    /// Inserts an item into the hash trie.
62    fn insert(&mut self, row: Self::Schema) -> bool;
63
64    /// Returns `true` if the (entire) row is found below in the trie, `false` otherwise.
65    /// See [`GhtGet::get`] to look just for "head" keys in this node
66    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool;
67
68    /// Iterate through (entire) rows stored in this HashTrie.
69    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
70
71    /// return the leaf below that contains this row, or `None` if not found.
72    fn find_containing_leaf(
73        &self,
74        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
75    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>>;
76
77    /// into_iter for leaf elements, or None for inner nodes
78    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>>;
79
80    /// pull all the data out of this trie node but retain the reference
81    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>>;
82}
83
84/// internal node of a HashTrie
85#[derive(Debug, Clone)]
86pub struct GhtInner<Head, Node>
87where
88    Head: Clone,
89    Node: GeneralizedHashTrieNode,
90{
91    pub(crate) children: HashMap<Head, Node>,
92}
93
94impl<Head, Node: GeneralizedHashTrieNode> Default for GhtInner<Head, Node>
95where
96    Head: Clone,
97    Node: GeneralizedHashTrieNode,
98{
99    fn default() -> Self {
100        let children = Default::default();
101        Self { children }
102    }
103}
104
105impl<Head, Node> GeneralizedHashTrieNode for GhtInner<Head, Node>
106where
107    Head: 'static + Hash + Eq + Clone,
108    Node: 'static + GeneralizedHashTrieNode,
109    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
110{
111    type Schema = Node::Schema;
112    type KeyType = Node::KeyType;
113    type ValType = Node::ValType;
114    type Storage = Node::Storage;
115    type SuffixSchema = var_type!(Head, ...Node::SuffixSchema);
116    type Head = Head;
117
118    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
119        let mut retval: Self = Default::default();
120        for row in input {
121            retval.insert(row);
122        }
123        retval
124    }
125
126    fn merge_node(&mut self, other: Self) -> bool {
127        let mut changed = false;
128
129        for (k, v) in other.children {
130            match self.children.entry(k) {
131                std::collections::hash_map::Entry::Occupied(mut occupied) => {
132                    changed |= occupied.get_mut().merge_node(v)
133                }
134                std::collections::hash_map::Entry::Vacant(vacant) => {
135                    vacant.insert(v);
136                    changed = true
137                }
138            }
139        }
140        changed
141    }
142
143    const HEIGHT: usize = Node::HEIGHT + 1;
144
145    fn insert(&mut self, row: Self::Schema) -> bool {
146        let (_prefix, var_args!(head, ..._rest)) =
147            Self::Schema::split_by_suffix_ref(row.as_ref_var());
148        self.children.entry(head.clone()).or_default().insert(row)
149    }
150
151    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
152        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
153        if let Some(node) = self.children.get(head) {
154            node.contains(row)
155        } else {
156            false
157        }
158    }
159
160    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
161        self.children
162            .iter()
163            .flat_map(|(_k, vs)| vs.recursive_iter())
164    }
165
166    fn find_containing_leaf(
167        &self,
168        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
169    ) -> Option<&'_ GhtLeaf<Self::Schema, Self::ValType, Self::Storage>> {
170        let (_prefix, var_args!(head, ..._rest)) = Self::Schema::split_by_suffix_ref(row);
171        self.children
172            .get(head)
173            .and_then(|child| child.find_containing_leaf(row))
174    }
175
176    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
177        None::<Box<dyn Iterator<Item = Self::Schema>>>
178    }
179
180    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
181        None::<Box<dyn Iterator<Item = Self::Schema>>>
182    }
183}
184
185impl<Head, Node> FromIterator<Node::Schema> for GhtInner<Head, Node>
186where
187    Head: 'static + Hash + Eq + Clone,
188    Node: 'static + GeneralizedHashTrieNode + Clone,
189    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
190{
191    fn from_iter<Iter: IntoIterator<Item = Node::Schema>>(iter: Iter) -> Self {
192        let mut out = Self::default();
193        for row in iter {
194            out.insert(row);
195        }
196        out
197    }
198}
199
200/// leaf node of a HashTrie
201#[derive(Debug, PartialEq, Eq, Clone)]
202pub struct GhtLeaf<Schema, ValType, Storage>
203where
204    Schema: Eq + Hash,
205    Storage: VariadicCollection<Schema = Schema>,
206{
207    pub(crate) elements: Storage,
208    pub(crate) forced: bool,
209    /// defines ValType for the parents, recursively
210    pub(crate) _suffix_schema: PhantomData<ValType>,
211}
212impl<Schema, ValType, Storage> Default for GhtLeaf<Schema, ValType, Storage>
213where
214    Schema: Eq + Hash,
215    Storage: VariadicCollection<Schema = Schema> + Default,
216{
217    fn default() -> Self {
218        let elements = Storage::default();
219        Self {
220            elements,
221            forced: false,
222            _suffix_schema: PhantomData,
223        }
224    }
225}
226
227impl<Schema, ValHead, ValRest, Storage> GeneralizedHashTrieNode
228    for GhtLeaf<Schema, var_type!(ValHead, ...ValRest), Storage>
229where
230    Schema: 'static
231        + Eq
232        + VariadicExt
233        + Hash
234        + Clone
235        + SplitBySuffix<var_type!(ValHead, ...ValRest)>
236        + PartialEqVariadic,
237    ValHead: Clone + Eq + Hash,
238    var_type!(ValHead, ...ValRest): Clone + Eq + Hash + PartialEqVariadic,
239    <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix: Eq + Hash + Clone,
240    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
241{
242    type Schema = Schema;
243    type SuffixSchema = var_type!(ValHead, ...ValRest);
244    type ValType = var_type!(ValHead, ...ValRest);
245    type KeyType = <Schema as SplitBySuffix<var_type!(ValHead, ...ValRest)>>::Prefix;
246    type Head = ValHead;
247    type Storage = Storage;
248
249    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
250        let mut retval: Self = Default::default();
251        for i in input {
252            retval.insert(i);
253        }
254        retval
255    }
256
257    fn merge_node(&mut self, other: Self) -> bool {
258        let old_len = self.elements.len();
259        self.elements.extend(other.elements);
260        self.elements.len() > old_len
261    }
262
263    const HEIGHT: usize = 0;
264
265    fn insert(&mut self, row: Self::Schema) -> bool {
266        self.elements.insert(row);
267        true
268    }
269
270    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
271        self.elements.iter().any(|r| Schema::eq_ref(r, row))
272    }
273
274    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
275        self.elements.iter()
276    }
277
278    fn find_containing_leaf(
279        &self,
280        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
281    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
282    {
283        // TODO(mingwei): actually use the hash set as a hash set
284        if self
285            .elements
286            .iter()
287            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
288        {
289            Some(self)
290        } else {
291            None
292        }
293    }
294
295    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
296        Some(self.elements.into_iter())
297    }
298
299    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
300        Some(self.elements.drain())
301    }
302}
303
304impl<Schema, Storage> GeneralizedHashTrieNode for GhtLeaf<Schema, (), Storage>
305where
306    Schema: 'static + Eq + VariadicExt + Hash + Clone + PartialEqVariadic,
307    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
308{
309    type Schema = Schema;
310    type SuffixSchema = ();
311    type ValType = ();
312    type KeyType = Schema;
313    type Head = ();
314    type Storage = Storage;
315
316    fn new_from(input: impl IntoIterator<Item = Self::Schema>) -> Self {
317        let mut retval: Self = Default::default();
318        for i in input {
319            retval.insert(i);
320        }
321        retval
322    }
323
324    fn merge_node(&mut self, other: Self) -> bool {
325        let old_len = self.elements.len();
326        self.elements.extend(other.elements);
327        self.elements.len() > old_len
328    }
329
330    const HEIGHT: usize = 0;
331
332    fn insert(&mut self, row: Self::Schema) -> bool {
333        self.elements.insert(row);
334        true
335    }
336
337    fn contains<'a>(&'a self, row: <Self::Schema as VariadicExt>::AsRefVar<'a>) -> bool {
338        self.elements.iter().any(|r| Schema::eq_ref(r, row))
339    }
340
341    fn recursive_iter(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
342        self.elements.iter()
343    }
344
345    fn find_containing_leaf(
346        &self,
347        row: <Self::Schema as VariadicExt>::AsRefVar<'_>,
348    ) -> Option<&'_ GhtLeaf<<Self as GeneralizedHashTrieNode>::Schema, Self::ValType, Self::Storage>>
349    {
350        // TODO(mingwei): actually use the hash set as a hash set
351        if self
352            .elements
353            .iter()
354            .any(|x| <Schema as PartialEqVariadic>::eq_ref(row, x))
355        {
356            Some(self)
357        } else {
358            None
359        }
360    }
361
362    fn into_iter(self) -> Option<impl Iterator<Item = Self::Schema>> {
363        Some(self.elements.into_iter())
364    }
365
366    fn drain(&mut self) -> Option<impl Iterator<Item = Self::Schema>> {
367        Some(self.elements.drain())
368    }
369}
370
371impl<Schema, ValType, Storage> FromIterator<Schema> for GhtLeaf<Schema, ValType, Storage>
372where
373    Schema: Eq + Hash,
374    Storage: VariadicCollection<Schema = Schema> + Default + FromIterator<Schema>,
375{
376    fn from_iter<Iter: IntoIterator<Item = Schema>>(iter: Iter) -> Self {
377        let elements = iter.into_iter().collect();
378        Self {
379            elements,
380            forced: false,
381            _suffix_schema: PhantomData,
382        }
383    }
384}
385
386/// A trait for the get and iter methods from Wang/Willsey/Suciu, which
387/// work differently on leaves than internal nodes
388pub trait GhtGet: GeneralizedHashTrieNode {
389    /// Type returned by [`Self::get`].
390    type Get: GeneralizedHashTrieNode<Schema = Self::Schema>;
391
392    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
393    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
394    /// On a Leaf node, returns None.
395    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get>;
396
397    /// get, but mutable output
398    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get>;
399
400    /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes).
401    fn iter(&self) -> impl Iterator<Item = Self::Head>;
402
403    /// Iterator for the tuples (from leaf nodes) or nothing (from inner nodes).
404    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>>;
405}
406
407impl<Head, Node> GhtGet for GhtInner<Head, Node>
408where
409    Head: 'static + Eq + Hash + Clone,
410    Node: 'static + GeneralizedHashTrieNode,
411    Node::Schema: SplitBySuffix<var_type!(Head, ...Node::SuffixSchema)>,
412{
413    /// Type returned by [`Self::get`].
414    type Get = Node;
415
416    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
417    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
418    /// On a Leaf node, returns None.
419    fn get<'a>(&'a self, head: &Self::Head) -> Option<&'a Self::Get> {
420        self.children.get(head)
421    }
422
423    fn get_mut<'a>(&'a mut self, head: &Self::Head) -> Option<&'a mut Self::Get> {
424        self.children.get_mut(head)
425    }
426
427    fn iter(&self) -> impl Iterator<Item = Self::Head> {
428        self.children.keys().cloned()
429    }
430
431    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
432        std::iter::empty()
433    }
434}
435
436impl<Schema, ValType, Storage> GhtGet for GhtLeaf<Schema, ValType, Storage>
437where
438    Schema: 'static + Eq + Hash + Clone + PartialEqVariadic + SplitBySuffix<ValType>,
439    ValType: Eq + Hash + Clone + PartialEqVariadic,
440    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
441    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema>,
442    Storage: VariadicCollection<Schema = Schema>,
443{
444    /// Type returned by [`Self::get`].
445    type Get = GhtLeaf<Schema, ValType, Storage>;
446
447    /// On an Inner node, retrieves the value (child) associated with the given "head" key.
448    /// returns an `Option` containing a reference to the value if found, or `None` if not found.
449    /// On a Leaf node, returns None.
450    fn get<'a>(&'a self, _head: &Self::Head) -> Option<&'a Self::Get> {
451        None
452    }
453    fn get_mut<'a>(&'a mut self, _head: &Self::Head) -> Option<&'a mut Self::Get> {
454        None
455    }
456
457    fn iter(&self) -> impl Iterator<Item = Self::Head> {
458        std::iter::empty()
459    }
460
461    fn iter_tuples(&self) -> impl Iterator<Item = <Self::Schema as VariadicExt>::AsRefVar<'_>> {
462        self.elements.iter()
463    }
464}
465
466/// A trait to iterate through the items in a Ght based on a prefix of the schema.
467pub trait GhtPrefixIter<KeyPrefix> {
468    /// the schema output
469    type Item: VariadicExt;
470    /// given a prefix, return an iterator through the items below
471    fn prefix_iter<'a>(
472        &'a self,
473        prefix: KeyPrefix,
474    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
475    where
476        Self::Item: 'a;
477}
478
479impl<'k, Head, Node, PrefixRest> GhtPrefixIter<var_type!(&'k Head, ...PrefixRest)>
480    for GhtInner<Head, Node>
481where
482    Head: Eq + Hash + Clone,
483    Node: GeneralizedHashTrieNode + GhtPrefixIter<PrefixRest>,
484{
485    type Item = <Node as GhtPrefixIter<PrefixRest>>::Item;
486    fn prefix_iter<'a>(
487        &'a self,
488        prefix: var_type!(&'k Head, ...PrefixRest),
489    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
490    where
491        Self::Item: 'a,
492    {
493        let var_args!(head, ...rest) = prefix;
494        self.children
495            .get(head)
496            .map(|node| node.prefix_iter(rest))
497            .into_iter()
498            .flatten()
499    }
500}
501impl<Head, Node> GhtPrefixIter<var_type!()> for GhtInner<Head, Node>
502where
503    Self: GeneralizedHashTrieNode,
504    Head: Eq + Hash + Clone,
505    Node: GeneralizedHashTrieNode,
506{
507    type Item = <Self as GeneralizedHashTrieNode>::Schema;
508    fn prefix_iter<'a>(
509        &'a self,
510        _prefix: var_type!(),
511    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
512    where
513        Self::Item: 'a,
514    {
515        self.recursive_iter()
516    }
517}
518
519impl<KeyPrefixRef, Schema, ValType, Storage> GhtPrefixIter<KeyPrefixRef>
520    for GhtLeaf<Schema, ValType, Storage>
521where
522    KeyPrefixRef: 'static + RefVariadic,
523    Schema: 'static + VariadicExt + Hash + Eq + SplitBySuffix<ValType>,
524    ValType: VariadicExt,
525    ValType: Split<KeyPrefixRef::UnRefVar>,
526    KeyPrefixRef::UnRefVar: PartialEqVariadic,
527    Storage: 'static + VariadicCollection<Schema = Schema>,
528{
529    type Item = Schema;
530    fn prefix_iter<'a>(
531        &'a self,
532        prefix: KeyPrefixRef,
533    ) -> impl Iterator<Item = <Self::Item as VariadicExt>::AsRefVar<'a>>
534    where
535        Self::Item: 'a,
536    {
537        self.elements.iter().filter(move |&row| {
538            let (_row_prefix, row_mid_suffix) =
539                <Schema as SplitBySuffix<ValType>>::split_by_suffix_ref(row);
540            let (row_mid, _row_suffix): (<KeyPrefixRef::UnRefVar as VariadicExt>::AsRefVar<'_>, _) =
541                <ValType as Split<KeyPrefixRef::UnRefVar>>::split_ref(row_mid_suffix);
542            <KeyPrefixRef::UnRefVar as PartialEqVariadic>::eq_ref(prefix.unref_ref(), row_mid)
543        })
544    }
545}