dfir_lang/graph/ops/
join.rs

1use quote::{ToTokens, quote_spanned};
2use syn::parse_quote;
3
4use super::{
5    OpInstGenerics, OperatorCategory, OperatorConstraints, OperatorInstance, OperatorWriteOutput,
6    Persistence, RANGE_1, WriteContextArgs,
7};
8
9/// > 2 input streams of type `<(K, V1)>` and `<(K, V2)>`, 1 output stream of type `<(K, (V1, V2))>`
10///
11/// Forms the equijoin of the tuples in the input streams by their first (key) attribute. Note that the result nests the 2nd input field (values) into a tuple in the 2nd output field.
12///
13/// ```dfir
14/// source_iter(vec![("hello", "world"), ("stay", "gold"), ("hello", "world")]) -> [0]my_join;
15/// source_iter(vec![("hello", "cleveland")]) -> [1]my_join;
16/// my_join = join()
17///     -> assert_eq([("hello", ("world", "cleveland"))]);
18/// ```
19///
20/// `join` can also be provided with one or two generic lifetime persistence arguments, either
21/// `'tick` or `'static`, to specify how join data persists. With `'tick`, pairs will only be
22/// joined with corresponding pairs within the same tick. With `'static`, pairs will be remembered
23/// across ticks and will be joined with pairs arriving in later ticks. When not explicitly
24/// specified persistence defaults to `tick.
25///
26/// When two persistence arguments are supplied the first maps to port `0` and the second maps to
27/// port `1`.
28/// When a single persistence argument is supplied, it is applied to both input ports.
29/// When no persistence arguments are applied it defaults to `'tick` for both.
30///
31/// The syntax is as follows:
32/// ```dfir,ignore
33/// join(); // Or
34/// join::<'static>();
35///
36/// join::<'tick>();
37///
38/// join::<'static, 'tick>();
39///
40/// join::<'tick, 'static>();
41/// // etc.
42/// ```
43///
44/// `join` is defined to treat its inputs as *sets*, meaning that it
45/// eliminates duplicated values in its inputs. If you do not want
46/// duplicates eliminated, use the [`join_multiset`](#join_multiset) operator.
47///
48/// ### Examples
49///
50/// ```rustbook
51/// let (input_send, input_recv) = dfir_rs::util::unbounded_channel::<(&str, &str)>();
52/// let mut flow = dfir_rs::dfir_syntax! {
53///     source_iter([("hello", "world")]) -> [0]my_join;
54///     source_stream(input_recv) -> [1]my_join;
55///     my_join = join::<'tick>() -> for_each(|(k, (v1, v2))| println!("({}, ({}, {}))", k, v1, v2));
56/// };
57/// input_send.send(("hello", "oakland")).unwrap();
58/// flow.run_tick();
59/// input_send.send(("hello", "san francisco")).unwrap();
60/// flow.run_tick();
61/// ```
62/// Prints out `"(hello, (world, oakland))"` since `source_iter([("hello", "world")])` is only
63/// included in the first tick, then forgotten.
64///
65/// ---
66///
67/// ```rustbook
68/// let (input_send, input_recv) = dfir_rs::util::unbounded_channel::<(&str, &str)>();
69/// let mut flow = dfir_rs::dfir_syntax! {
70///     source_iter([("hello", "world")]) -> [0]my_join;
71///     source_stream(input_recv) -> [1]my_join;
72///     my_join = join::<'static>() -> for_each(|(k, (v1, v2))| println!("({}, ({}, {}))", k, v1, v2));
73/// };
74/// input_send.send(("hello", "oakland")).unwrap();
75/// flow.run_tick();
76/// input_send.send(("hello", "san francisco")).unwrap();
77/// flow.run_tick();
78/// ```
79/// Prints out `"(hello, (world, oakland))"` and `"(hello, (world, san francisco))"` since the
80/// inputs are peristed across ticks.
81pub const JOIN: OperatorConstraints = OperatorConstraints {
82    name: "join",
83    categories: &[OperatorCategory::MultiIn],
84    hard_range_inn: &(2..=2),
85    soft_range_inn: &(2..=2),
86    hard_range_out: RANGE_1,
87    soft_range_out: RANGE_1,
88    num_args: 0,
89    persistence_args: &(0..=2),
90    type_args: &(0..=1),
91    is_external_input: false,
92    has_singleton_output: false,
93    flo_type: None,
94    ports_inn: Some(|| super::PortListSpec::Fixed(parse_quote! { 0, 1 })),
95    ports_out: None,
96    input_delaytype_fn: |_| None,
97    write_fn: |wc @ &WriteContextArgs {
98                   root,
99                   context,
100                   df_ident,
101                   loop_id,
102                   op_span,
103                   ident,
104                   inputs,
105                   work_fn,
106                   op_inst:
107                       OperatorInstance {
108                           generics:
109                               OpInstGenerics {
110                                   persistence_args,
111                                   type_args,
112                                   ..
113                               },
114                           ..
115                       },
116                   ..
117               },
118               _diagnostics| {
119        let join_type =
120            type_args
121                .first()
122                .map(ToTokens::to_token_stream)
123                .unwrap_or(quote_spanned!(op_span=>
124                    #root::compiled::pull::HalfSetJoinState
125                ));
126
127        // TODO: This is really bad.
128        // This will break if the user aliases HalfSetJoinState to something else. Temporary hacky solution.
129        // Note that cross_join() depends on the implementation here as well.
130        let additional_trait_bounds = if join_type.to_string().contains("HalfSetJoinState") {
131            quote_spanned!(op_span=>
132                + ::std::cmp::Eq
133            )
134        } else {
135            quote_spanned!(op_span=>)
136        };
137
138        let make_joindata = |persistence, side| {
139            let joindata_ident = wc.make_ident(format!("joindata_{}", side));
140            let borrow_ident = wc.make_ident(format!("joindata_{}_borrow", side));
141
142            let lifespan = wc.persistence_as_state_lifespan(persistence);
143            let reset = lifespan.map(|lifespan| quote_spanned! {op_span=>
144                #df_ident.set_state_lifespan_hook(#joindata_ident, #lifespan, |rcell| (#work_fn)(|| #root::util::clear::Clear::clear(::std::cell::RefCell::get_mut(rcell))));
145            }).unwrap_or_default();
146
147            let prologue = quote_spanned! {op_span=>
148                let #joindata_ident = #df_ident.add_state(::std::cell::RefCell::new(
149                    #join_type::default()
150                ));
151            };
152            let borrow = quote_spanned! {op_span=>
153                unsafe {
154                    // SAFETY: handle from `#df_ident.add_state(..)`.
155                    #context.state_ref_unchecked(#joindata_ident)
156                }.borrow_mut()
157            };
158
159            Ok((prologue, reset, borrow, borrow_ident))
160        };
161
162        let persistences = match persistence_args[..] {
163            [] => {
164                let p = if loop_id.is_some() {
165                    Persistence::None
166                } else {
167                    Persistence::Tick
168                };
169                [p, p]
170            }
171            [a] => [a, a],
172            [a, b] => [a, b],
173            _ => panic!(),
174        };
175
176        let (lhs_prologue, lhs_prologue_after, lhs_borrow, lhs_borrow_ident) =
177            (make_joindata)(persistences[0], "lhs")?;
178        let (rhs_prologue, rhs_prologue_after, rhs_borrow, rhs_borrow_ident) =
179            (make_joindata)(persistences[1], "rhs")?;
180
181        let lhs = &inputs[0];
182        let rhs = &inputs[1];
183        let write_iterator = quote_spanned! {op_span=>
184            let mut #lhs_borrow_ident = #lhs_borrow;
185            let mut #rhs_borrow_ident = #rhs_borrow;
186            let #ident = {
187                // Limit error propagation by bounding locally, erasing output iterator type.
188                #[inline(always)]
189                fn check_inputs<'a, K, I1, V1, I2, V2>(
190                    lhs: I1,
191                    rhs: I2,
192                    lhs_state: &'a mut #join_type<K, V1, V2>,
193                    rhs_state: &'a mut #join_type<K, V2, V1>,
194                    is_new_tick: bool,
195                ) -> impl 'a + Iterator<Item = (K, (V1, V2))>
196                where
197                    K: Eq + std::hash::Hash + Clone,
198                    V1: Clone #additional_trait_bounds,
199                    V2: Clone #additional_trait_bounds,
200                    I1: 'a + Iterator<Item = (K, V1)>,
201                    I2: 'a + Iterator<Item = (K, V2)>,
202                {
203                    #work_fn(|| #root::compiled::pull::symmetric_hash_join_into_iter(lhs, rhs, lhs_state, rhs_state, is_new_tick))
204                }
205
206                check_inputs(#lhs, #rhs, &mut *#lhs_borrow_ident, &mut *#rhs_borrow_ident, #context.is_first_run_this_tick())
207            };
208        };
209
210        let write_iterator_after =
211            if persistences[0] == Persistence::Static || persistences[1] == Persistence::Static {
212                quote_spanned! {op_span=>
213                    // TODO: Probably only need to schedule if #*_borrow.len() > 0?
214                    #context.schedule_subgraph(#context.current_subgraph(), false);
215                }
216            } else {
217                quote_spanned! {op_span=>}
218            };
219
220        Ok(OperatorWriteOutput {
221            write_prologue: quote_spanned! {op_span=>
222                #lhs_prologue
223                #rhs_prologue
224            },
225            write_prologue_after: quote_spanned! {op_span=>
226                #lhs_prologue_after
227                #rhs_prologue_after
228            },
229            write_iterator,
230            write_iterator_after,
231        })
232    },
233};