1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
//! LR(1) state construction algorithm.

use crate::collections::{map, Multimap};
use crate::grammar::repr::*;
use crate::kernel_set;
use crate::lr1::core::*;
use crate::lr1::first;
use crate::lr1::lane_table::*;
use crate::lr1::lookahead::*;
use crate::tls::Tls;
use std::env;

#[cfg(test)]
mod test;

fn build_lr1_states_legacy<'grammar>(
    grammar: &'grammar Grammar,
    start: NonterminalString,
) -> Lr1Result<'grammar> {
    let eof = TokenSet::eof();
    let mut lr1: Lr<'grammar, TokenSet> = Lr::new(grammar, start, eof);
    lr1.set_permit_early_stop(true);
    lr1.build_states()
}

type ConstructionFunction<'grammar> =
    fn(&'grammar Grammar, NonterminalString) -> Lr1Result<'grammar>;

pub fn use_lane_table() -> bool {
    match env::var("LALRPOP_LANE_TABLE") {
        Ok(ref s) => s != "disabled",
        _ => true,
    }
}

pub fn build_lr1_states(grammar: &Grammar, start: NonterminalString) -> Lr1Result<'_> {
    let (method_name, method_fn) = if use_lane_table() {
        ("lane", build_lane_table_states as ConstructionFunction)
    } else {
        ("legacy", build_lr1_states_legacy as ConstructionFunction)
    };

    profile! {
        &Tls::session(),
        format!("LR(1) state construction ({})", method_name),
        {
            method_fn(grammar, start)
        }
    }
}

pub fn build_lr0_states(
    grammar: &Grammar,
    start: NonterminalString,
) -> Result<Vec<Lr0State<'_>>, Lr0TableConstructionError<'_>> {
    let lr1 = Lr::new(grammar, start, Nil);
    lr1.build_states()
}

pub struct Lr<'grammar, L: LookaheadBuild> {
    grammar: &'grammar Grammar,
    first_sets: first::FirstSets,
    start_nt: NonterminalString,
    start_lookahead: L,
    permit_early_stop: bool,
}

impl<'grammar, L: LookaheadBuild> Lr<'grammar, L> {
    fn new(grammar: &'grammar Grammar, start_nt: NonterminalString, start_lookahead: L) -> Self {
        Lr {
            grammar,
            first_sets: first::FirstSets::new(grammar),
            start_nt,
            start_lookahead,
            permit_early_stop: false,
        }
    }

    fn set_permit_early_stop(&mut self, v: bool) {
        self.permit_early_stop = v;
    }

    fn build_states(&self) -> Result<Vec<State<'grammar, L>>, TableConstructionError<'grammar, L>> {
        let session = Tls::session();
        let mut kernel_set = kernel_set::KernelSet::new();
        let mut states = vec![];
        let mut conflicts = vec![];

        // create the starting state
        kernel_set.add_state(Kernel::start(self.items(
            &self.start_nt,
            0,
            &self.start_lookahead,
        )));

        while let Some(Kernel { items: seed_items }) = kernel_set.next() {
            let items = self.transitive_closure(seed_items);
            let index = StateIndex(states.len());

            if index.0 % 5000 == 0 && index.0 > 0 {
                log!(session, Verbose, "{} states created so far.", index.0);
            }

            let mut this_state = State {
                index,
                items: items.clone(),
                shifts: map(),
                reductions: vec![],
                gotos: map(),
            };

            // group the items that we can transition into by shifting
            // over a term or nonterm
            let transitions: Multimap<Symbol, Multimap<Lr0Item<'grammar>, L>> = items
                .vec
                .iter()
                .filter_map(Item::shifted_item)
                .map(
                    |(
                        symbol,
                        Item {
                            production,
                            index,
                            lookahead,
                        },
                    )| {
                        (symbol.clone(), (Item::lr0(production, index), lookahead))
                    },
                )
                .collect();

            for (symbol, shifted_items) in transitions.into_iter() {
                let shifted_items: Vec<Item<'grammar, L>> = shifted_items
                    .into_iter()
                    .map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
                    .collect();

                // Not entirely obvious: if the original set of items
                // is sorted to begin with (and it is), then this new
                // set of shifted items is *also* sorted. This is
                // because it is produced from the old items by simply
                // incrementing the index by 1.
                let next_state = kernel_set.add_state(Kernel::shifted(shifted_items));

                match symbol {
                    Symbol::Terminal(s) => {
                        let prev = this_state.shifts.insert(s, next_state);
                        assert!(prev.is_none()); // cannot have a shift/shift conflict
                    }

                    Symbol::Nonterminal(s) => {
                        let prev = this_state.gotos.insert(s, next_state);
                        assert!(prev.is_none());
                    }
                }
            }

            // finally, consider the reductions
            for item in items.vec.iter().filter(|i| i.can_reduce()) {
                this_state
                    .reductions
                    .push((item.lookahead.clone(), item.production));
            }

            // check for shift-reduce conflicts (reduce-reduce detected above)
            conflicts.extend(L::conflicts(&this_state));

            // extract a new state
            states.push(this_state);

            if self.permit_early_stop && session.stop_after(conflicts.len()) {
                log!(
                    session,
                    Verbose,
                    "{} conflicts encountered, stopping.",
                    conflicts.len()
                );
                break;
            }
        }

        if !conflicts.is_empty() {
            Err(TableConstructionError { states, conflicts })
        } else {
            Ok(states)
        }
    }

    fn items(&self, id: &NonterminalString, index: usize, lookahead: &L) -> Vec<Item<'grammar, L>> {
        self.grammar
            .productions_for(id)
            .iter()
            .map(|production| {
                debug_assert!(index <= production.symbols.len());
                Item {
                    production,
                    index,
                    lookahead: lookahead.clone(),
                }
            })
            .collect()
    }

    // expands `state` with epsilon moves
    fn transitive_closure(&self, items: Vec<Item<'grammar, L>>) -> Items<'grammar, L> {
        let mut stack: Vec<Lr0Item<'grammar>> = items.iter().map(Item::to_lr0).collect();
        let mut map: Multimap<Lr0Item<'grammar>, L> = items
            .into_iter()
            .map(|item| (item.to_lr0(), item.lookahead))
            .collect();

        while let Some(item) = stack.pop() {
            let lookahead = map.get(&item).unwrap().clone();

            let shift_symbol = item.shift_symbol();

            // Check whether this is an item where the cursor
            // is resting on a non-terminal:
            //
            // I = ... (*) X z... [lookahead]
            //
            // The `nt` will be X and the `remainder` will be `z...`.
            let (nt, remainder) = match shift_symbol {
                None => continue, // requires a reduce
                Some((Symbol::Terminal(_), _)) => {
                    continue; // requires a shift
                }
                Some((Symbol::Nonterminal(nt), remainder)) => (nt, remainder),
            };

            // In that case, for each production of `X`, we are also
            // in a state where the cursor rests at the start of that production:
            //
            // X = (*) a... [lookahead']
            // X = (*) b... [lookahead']
            //
            // Here `lookahead'` is computed based on the `remainder` and our
            // `lookahead`. In LR1 at least, it is the union of:
            //
            //   (a) FIRST(remainder)
            //   (b) if remainder may match epsilon, also our lookahead.
            for new_item in L::epsilon_moves(self, nt, remainder, &lookahead) {
                let new_item0 = new_item.to_lr0();
                if map.push(new_item0, new_item.lookahead) {
                    stack.push(new_item0);
                }
            }
        }

        let final_items = map
            .into_iter()
            .map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
            .collect();

        Items { vec: final_items }
    }
}

/// Except for the initial state, the kernel sets always contain
/// a set of "seed" items where something has been pushed (that is,
/// index > 0). In other words, items like this:
///
///    A = ...p (*) ...
///
/// where ...p is non-empty. We now have to expand to include any
/// epsilon moves:
///
///    A = ... (*) B ...
///    B = (*) ...        // added by transitive_closure algorithm
///
/// But note that the state is completely identified by its
/// kernel set: the same kernel sets always expand to the
/// same transitive closures, and different kernel sets
/// always expand to different transitive closures. The
/// first point is obvious, but the latter point follows
/// because the transitive closure algorithm only adds
/// items where `index == 0`, and hence it can never add an
/// item found in a kernel set.
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct Kernel<'grammar, L: LookaheadBuild> {
    items: Vec<Item<'grammar, L>>,
}

impl<'grammar, L: LookaheadBuild> Kernel<'grammar, L> {
    pub fn start(items: Vec<Item<'grammar, L>>) -> Kernel<'grammar, L> {
        // In start state, kernel should have only items with `index == 0`.
        debug_assert!(items.iter().all(|item| item.index == 0));
        Kernel { items }
    }

    pub fn shifted(items: Vec<Item<'grammar, L>>) -> Kernel<'grammar, L> {
        // Assert that this kernel consists only of shifted items
        // where `index > 0`. This assertion could cost real time to
        // check so only do it in debug mode.
        debug_assert!(items.iter().all(|item| item.index > 0));
        Kernel { items }
    }
}

impl<'grammar, L: LookaheadBuild> kernel_set::Kernel for Kernel<'grammar, L> {
    type Index = StateIndex;

    fn index(c: usize) -> StateIndex {
        StateIndex(c)
    }
}

pub trait LookaheadBuild: Lookahead {
    // Given that there exists an item
    //
    //     X = ... (*) Y ...s [L]
    //
    // where `nt` is `Y`, `remainder` is `...s`, and `lookahead` is
    // `L`, computes the new items resulting from epsilon moves (if
    // any). The technique of doing this will depend on the amount of
    // lookahead.
    //
    // For example, if we have an LR0 item, then for each `Y = ...`
    // production, we just add an `Y = (*) ...` item. But for LR1
    // items, we have to add multiple items where we consider the
    // lookahead from `FIRST(...s, L)`.
    fn epsilon_moves<'grammar>(
        lr: &Lr<'grammar, Self>,
        nt: &NonterminalString,
        remainder: &[Symbol],
        lookahead: &Self,
    ) -> Vec<Item<'grammar, Self>>;
}

impl LookaheadBuild for Nil {
    fn epsilon_moves<'grammar>(
        lr: &Lr<'grammar, Self>,
        nt: &NonterminalString,
        _remainder: &[Symbol],
        lookahead: &Nil,
    ) -> Vec<Lr0Item<'grammar>> {
        lr.items(nt, 0, lookahead)
    }
}

impl LookaheadBuild for TokenSet {
    fn epsilon_moves<'grammar>(
        lr: &Lr<'grammar, Self>,
        nt: &NonterminalString,
        remainder: &[Symbol],
        lookahead: &Self,
    ) -> Vec<Lr1Item<'grammar>> {
        let first_set = lr.first_sets.first1(remainder, lookahead);
        lr.items(nt, 0, &first_set)
    }
}