1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
//! First set construction and computation.
use crate::collections::{map, Map};
use crate::grammar::repr::*;
use crate::lr1::lookahead::{Token, TokenSet};
use super::Lr1Tls;
#[cfg(test)]
mod test;
#[derive(Clone)]
pub struct FirstSets {
map: Map<NonterminalString, TokenSet>,
}
impl FirstSets {
pub fn new(grammar: &Grammar) -> FirstSets {
let mut this = FirstSets { map: map() };
let mut changed = true;
while changed {
changed = false;
for production in grammar.nonterminals.values().flat_map(|p| &p.productions) {
let nt = &production.nonterminal;
let lookahead = this.first0(&production.symbols);
let first_set = this.map.entry(nt.clone()).or_insert_with(TokenSet::new);
changed |= first_set.union_with(&lookahead);
}
}
this
}
/// Returns `FIRST(...symbols)`. If `...symbols` may derive
/// epsilon, then this returned set will include EOF. (This is
/// kind of repurposing EOF to serve as a binary flag of sorts.)
pub fn first0<'s, I>(&self, symbols: I) -> TokenSet
where
I: IntoIterator<Item = &'s Symbol>,
{
let mut result = TokenSet::new();
for symbol in symbols {
match symbol {
Symbol::Terminal(t) => {
result.insert(Token::Terminal(t.clone()));
return result;
}
Symbol::Nonterminal(nt) => {
let mut empty_prod = false;
match self.map.get(nt) {
None => {
// This should only happen during set
// construction; it corresponds to an
// entry that has not yet been
// built. Otherwise, it would mean a
// terminal with no productions. Either
// way, the resulting first set should be
// empty.
}
Some(set) => {
result.reserve(set.len());
Lr1Tls::with(|terminals| {
for lookahead in set.iter() {
match lookahead {
Token::Eof => {
empty_prod = true;
}
Token::Error | Token::Terminal(_) => {
result.insert_with(lookahead, terminals);
}
}
}
})
}
}
if !empty_prod {
return result;
}
}
}
}
// control only reaches here if either symbols is empty, or it
// consists of nonterminals all of which may derive epsilon
result.insert(Token::Eof);
result
}
pub fn first1<'s, I>(&self, symbols: I, lookahead: &TokenSet) -> TokenSet
where
I: IntoIterator<Item = &'s Symbol>,
{
let mut set = self.first0(symbols);
// we use EOF as the signal that `symbols` derives epsilon:
let epsilon = set.take_eof();
if epsilon {
set.union_with(lookahead);
}
set
}
}