lattices/ght/
colt.rs

1//! COLT from Wang/Willsey/Suciu
2
3use std::hash::Hash;
4
5use variadics::variadic_collections::VariadicCollection;
6use variadics::{PartialEqVariadic, SplitBySuffix, VariadicExt, var_expr, var_type};
7
8use crate::ght::{GeneralizedHashTrieNode, GhtGet, GhtInner, GhtLeaf};
9
10/// Data structure design for our COLT is unique.
11///
12/// In the paper, the COLT is an unbalanced trie that "grows upward" from leaves lazily
13/// on access via the `force` method.
14/// Unfortunately, unbalanced tries break our types: a node's type to be defined via the
15/// type of its children, recursively -- meaning all paths need to be the same type (and length)!
16///
17/// To work around this, our COLT is a variadic *list* GHTs (a forest) of increasing height,
18/// starting with a trie of height 0 and continuing until a trie of height |key| - 1.
19/// Our `force` method does not add a node above a leaf L as in the paper. Instead
20/// it `take`s L from the current trie and merges it into the next trie to the right which is 1 taller.
21//
22/// The following trait provides the behavior we need from the nodes in a COLT forest. Every
23/// `ColtForestNode` is a `GeneralizedHashTrieNode` with some extra methods.
24pub trait ColtForestNode: GeneralizedHashTrieNode {
25    /// result of `force`ing a node
26    type Force: GeneralizedHashTrieNode;
27
28    /// Force the generation of a parent node, as in the Wang/Willsey/Suciu COLT structure,
29    /// to be merged into the next trie to the right.
30    fn force(self) -> Option<Self::Force>;
31
32    /// Force the generation of a parent node but retain ref to this node
33    fn force_drain(&mut self) -> Option<Self::Force>;
34}
35
36// Force only acts on leaves
37impl<Head, Node> ColtForestNode for GhtInner<Head, Node>
38where
39    Head: 'static + Hash + Eq + Clone,
40    Node: 'static + ColtForestNode,
41    <Node as GeneralizedHashTrieNode>::Schema:
42        SplitBySuffix<var_type!(Head,  ...<Node as GeneralizedHashTrieNode>::SuffixSchema)>,
43{
44    type Force = Node; // where Node:GeneralizedHashTrieNode;
45    fn force(self) -> Option<Self::Force> {
46        None
47    }
48
49    fn force_drain(&mut self) -> Option<Self::Force> {
50        None
51    }
52}
53
54// Leaf case
55impl<Schema, Head, Rest, Storage> ColtForestNode
56    for GhtLeaf<Schema, var_type!(Head, ...Rest), Storage>
57where
58    Head: 'static + Clone + Hash + Eq,
59    Rest: 'static + Clone + Hash + Eq + VariadicExt,
60    Schema: 'static + Hash + Eq + Clone + VariadicExt + PartialEqVariadic,
61    Rest: PartialEqVariadic,
62    Schema: SplitBySuffix<var_type!(Head, ...Rest)>,
63    Schema: SplitBySuffix<Rest>,
64    <Schema as SplitBySuffix<(Head, Rest)>>::Prefix: Eq + Hash + Clone,
65    <Schema as SplitBySuffix<Rest>>::Prefix: Eq + Hash + Clone,
66    Storage: VariadicCollection<Schema = Schema> + Default + IntoIterator<Item = Schema>,
67    GhtLeaf<Schema, Rest, Storage>: GeneralizedHashTrieNode<Schema = Schema, Storage = Storage>,
68    GhtInner<Head, GhtLeaf<Schema, Rest, Storage>>:
69        GeneralizedHashTrieNode<Schema = Schema, Storage = Storage>,
70{
71    type Force = GhtInner<Head, GhtLeaf<Schema, Rest, Storage>>;
72    fn force(mut self) -> Option<Self::Force> {
73        let mut retval = Self::Force::default();
74        self.forced = true;
75        for row in self.into_iter().unwrap() {
76            retval.insert(row);
77        }
78        Some(retval)
79    }
80
81    fn force_drain(&mut self) -> Option<GhtInner<Head, GhtLeaf<Schema, Rest, Storage>>> {
82        let mut retval = Self::Force::default();
83        self.forced = true;
84        for row in self.elements.drain() {
85            retval.insert(row);
86        }
87        Some(retval)
88    }
89}
90
91/// Emulate the `get` and iter` functions for a single Ght node
92/// [`GhtGet`] across a forest of ColtForestNodes.
93///
94/// The "current" ColtGet node (corresponding to the "current" GhtGet node) at depth
95/// d from the root is a variadic list of nodes, each at depth d in its their
96/// respective trie in the forest, Tries of height d or smaller are omitted,
97/// hence the first element in any ColtGet is a GhtLeaf.
98pub trait ColtGet {
99    /// Schema variadic: the schema of the relation stored in this COLT.
100    /// This type is the same in all Tries and nodes of the COLT.
101    type Schema: VariadicExt + Eq + Hash + Clone;
102    /// The type of Storage
103    /// This type is the same in all Tries and nodes of the COLT
104    type Storage: VariadicCollection;
105    /// SuffixSchema variadic: the suffix of the schema *from this node of the trie
106    /// downward*. The first entry in this variadic is of type Head.
107    /// This type is the same in all Tries of the COLT (but changes as we traverse downward)
108    type SuffixSchema: VariadicExt + Eq + Hash + Clone;
109    /// The type of the first column in the SuffixSchema
110    /// This type is the same in all Tries of the COLT (but changes as we traverse downward)
111    type Head: Eq + Hash;
112
113    /// Type returned by [`Self::get`].
114    type Get;
115
116    /// Following the spec in Wang/Willsey/Suciu, on an Inner node this retrieves the value
117    /// (child) associated with the given "head" key. It returns an `Option` containing a
118    /// reference to the value if found, or `None` if not found.
119    /// On a Leaf node, returns None.
120    fn get(self, head: &Self::Head) -> Self::Get;
121
122    /// Iterator for the "head" keys (from inner nodes) or nothing (from leaf nodes).
123    fn iter(&self) -> impl Iterator<Item = Self::Head>;
124}
125
126/// `ColtGet` without the first (head) trie.
127pub trait ColtGetTail<InnerToMerge>: ColtGet {
128    /// merge an inner node into the head of this tail of the forest
129    fn merge(&mut self, inner_to_merge: InnerToMerge);
130}
131
132impl<'a, Rest, Schema, SuffixSchema, Storage> ColtGet for var_type!(&'a mut GhtLeaf<Schema, SuffixSchema, Storage>, ...Rest)
133where
134    Rest: ColtGetTail<
135            <GhtLeaf<Schema, SuffixSchema, Storage> as ColtForestNode>::Force,
136            Storage = Storage,
137        >,
138    <Rest as ColtGet>::SuffixSchema: 'a,
139    GhtLeaf<Schema, SuffixSchema, Storage>: ColtForestNode,
140    Schema: Clone + Hash + Eq + VariadicExt,
141    SuffixSchema: Clone + Hash + Eq + VariadicExt,
142    Storage: VariadicCollection<Schema = Schema>,
143{
144    type Schema = Schema;
145    type Head = Rest::Head;
146    type SuffixSchema = SuffixSchema;
147    type Get = Rest::Get;
148    type Storage = Rest::Storage;
149
150    fn get(self, head: &Self::Head) -> Self::Get {
151        let (first, mut rest) = self;
152        let forced = first.force_drain().unwrap();
153        ColtGetTail::merge(&mut rest, forced);
154        Rest::get(rest, head)
155    }
156
157    fn iter(&self) -> impl Iterator<Item = Self::Head> {
158        std::iter::empty()
159    }
160}
161
162// we only merge in GhtInner<Head, GhtLeaf<_>> nodes, so this
163// should never be called.
164impl<'a, Rest, Schema, SuffixSchema, T, Storage> ColtGetTail<T> for var_type!(&'a mut GhtLeaf<Schema, SuffixSchema, Storage>, ...Rest)
165where
166    Rest: ColtGetTail<
167            <GhtLeaf<Schema, SuffixSchema, Storage> as ColtForestNode>::Force,
168            Storage = Storage,
169        >,
170    <Rest as ColtGet>::SuffixSchema: 'a,
171    GhtLeaf<Schema, SuffixSchema, Storage>: ColtForestNode,
172    Schema: Clone + Hash + Eq + VariadicExt,
173    SuffixSchema: Clone + Hash + Eq + VariadicExt,
174    Storage: VariadicCollection<Schema = Schema>,
175{
176    fn merge(&mut self, _inner_to_merge: T) {
177        panic!();
178    }
179}
180
181impl<'a, Head, Head2, Rest, Node> ColtGet for var_type!(&'a mut GhtInner<Head, GhtInner<Head2, Node>>, ...Rest)
182where
183    Rest: ColtGet<Head = Head>,
184    Head: Eq + Hash + Clone,
185    Head2: Eq + Hash + Clone,
186    Node: GeneralizedHashTrieNode,
187    GhtInner<Head, GhtInner<Head2, Node>>: GeneralizedHashTrieNode<
188            Head = Rest::Head,
189            SuffixSchema = Rest::SuffixSchema,
190            Schema = Rest::Schema,
191            Storage = Rest::Storage,
192        >,
193    GhtInner<Head2, Node>: GeneralizedHashTrieNode<Schema = Rest::Schema, Storage = Rest::Storage>,
194{
195    type Schema = Rest::Schema;
196    type Head = Rest::Head;
197    type SuffixSchema = Rest::SuffixSchema;
198    type Get = var_type!(&'a mut GhtInner<Head2, Node>, ...Rest::Get);
199    type Storage = Rest::Storage;
200
201    fn get(self, head: &Self::Head) -> Self::Get {
202        let (first, rest) = self;
203        // create a child entry here for this get, to absorb future forces
204        // TODO(mingwei): extra clone here if entry already exists.
205        let child = first.children.entry(head.clone()).or_default();
206        var_expr!(child, ...Rest::get(rest, head))
207    }
208
209    fn iter(&self) -> impl Iterator<Item = Self::Head> {
210        self.0.children.keys().cloned().chain(Rest::iter(&self.1))
211    }
212}
213
214impl<'a, Head, Rest, Schema, ValType, Storage> ColtGet for var_type!(&'a mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>, ...Rest)
215where
216    Rest: ColtGet<Head = Head>,
217    Head: Eq + Hash + Clone,
218    Schema: Eq + Hash + Clone + PartialEqVariadic,
219    ValType: Eq + Hash + Clone + PartialEqVariadic,
220    Storage: VariadicCollection<Schema = Schema>,
221    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode,
222    Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix<ValType> + PartialEqVariadic,
223    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
224    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
225        GeneralizedHashTrieNode<Head = Head> + GhtGet,
226    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
227        GeneralizedHashTrieNode<Head = Rest::Head, Schema = Rest::Schema, Storage = Rest::Storage>,
228    GhtLeaf<Schema, ValType, Storage>:
229        GeneralizedHashTrieNode<Schema = Rest::Schema, Storage = Rest::Storage> + GhtGet,
230{
231    type Schema = Rest::Schema;
232    type Head = Rest::Head;
233    type SuffixSchema = Rest::SuffixSchema;
234    type Get = var_type!(&'a mut GhtLeaf<Schema, ValType, Storage>, ...Rest::Get);
235    type Storage = Rest::Storage;
236
237    fn get(self, head: &Self::Head) -> Self::Get {
238        let (first, rest) = self;
239        let child = first.children.entry(head.clone()).or_default();
240        var_expr!(child, ...Rest::get(rest, head))
241    }
242
243    fn iter(&self) -> impl Iterator<Item = Self::Head> {
244        self.0.children.keys().cloned().chain(Rest::iter(&self.1))
245    }
246}
247
248impl<'a, Head, Rest, Schema, ValType, Storage>
249    ColtGetTail<GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>> for var_type!(&'a mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>, ...Rest)
250where
251    Rest: ColtGet<Head = Head, Schema = Schema, Storage = Storage>,
252    Head: Eq + Hash + Clone,
253    Schema: Eq + Hash + Clone + PartialEqVariadic,
254    ValType: Eq + Hash + Clone + PartialEqVariadic,
255    Storage: VariadicCollection<Schema = Schema>,
256    var_type!(&'a mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>, ...Rest):
257        ColtGet<Head = Head, Schema = Schema, Storage = Storage>,
258    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema>,
259    Schema: 'static + Eq + VariadicExt + Hash + Clone + SplitBySuffix<ValType> + PartialEqVariadic,
260    <Schema as SplitBySuffix<ValType>>::Prefix: Eq + Hash + Clone,
261    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
262        GeneralizedHashTrieNode<Head = Head, Schema = Schema, Storage = Storage> + GhtGet,
263{
264    fn merge(&mut self, inner_to_merge: GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>) {
265        let (head, _rest) = self;
266        // can't use Merge with COLT bc columnstore is not a lattice!!
267        head.merge_node(inner_to_merge);
268    }
269}
270
271impl<'a, Head, Node> ColtGet for var_type!(&'a mut GhtInner<Head, Node>)
272where
273    GhtInner<Head, Node>: GeneralizedHashTrieNode,
274    Head: Clone + Eq + Hash,
275    Node: GeneralizedHashTrieNode,
276{
277    type Schema = <GhtInner<Head, Node> as GeneralizedHashTrieNode>::Schema;
278    type SuffixSchema = <GhtInner<Head, Node> as GeneralizedHashTrieNode>::SuffixSchema;
279    type Head = Head;
280    type Get = var_type!(&'a mut Node);
281    type Storage = Node::Storage;
282
283    fn get(self, head: &Self::Head) -> Self::Get {
284        let child = self.0.children.entry(head.clone()).or_default();
285        var_expr!(child)
286    }
287
288    fn iter(&self) -> impl Iterator<Item = Self::Head> {
289        self.0.children.keys().cloned()
290    }
291}
292impl<Head, Schema, ValType, Storage> ColtGetTail<GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>> for var_type!(&mut GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>)
293where
294    GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>:
295        GeneralizedHashTrieNode<Head = Head> + GhtGet,
296    GhtLeaf<Schema, ValType, Storage>: GeneralizedHashTrieNode<Schema = Schema, Storage = Storage>,
297    Head: Clone + Eq + Hash,
298    Schema: Clone + Eq + Hash + VariadicExt,
299    Storage: VariadicCollection<Schema = Schema>,
300{
301    fn merge(&mut self, inner_to_merge: GhtInner<Head, GhtLeaf<Schema, ValType, Storage>>) {
302        let (head, _rest) = self;
303        // can't use Merge with COLT bc columnstore is not a lattice!!
304        head.merge_node(inner_to_merge);
305    }
306}