1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
//! When we are combining two NFAs, we will grab all the outgoing
//! edges from a set of nodes and wind up with a bunch of potentially
//! overlapping character ranges like:
//!
//!     a-z
//!     c-l
//!     0-9
//!
//! This module contains code to turn those into non-overlapping ranges like:
//!
//!     a-b
//!     c-l
//!     m-z
//!     0-9
//!
//! Specifically, we want to ensure that the same set of characters is
//! covered when we started, and that each of the input ranges is
//! covered precisely by some set of ranges in the output.

use crate::collections::Set;
use crate::lexer::nfa::Test;
use std::cmp;
use std::ops::RangeInclusive;

pub fn remove_overlap(ranges: &Set<Test>) -> Vec<Test> {
    // We will do this in the dumbest possible way to start. :)
    // Maintain a result vector that contains disjoint ranges.  To
    // insert a new range, we walk over this vector and split things
    // up as we go. This algorithm is so naive as to be exponential, I
    // think. Sue me.

    let mut disjoint_ranges = vec![];

    for range in ranges {
        add_range(range.to_owned(), 0, &mut disjoint_ranges);
    }

    // the algorithm above leaves some empty ranges in for simplicity;
    // prune them out.
    disjoint_ranges.retain(|r| !r.is_empty());

    disjoint_ranges.sort();

    disjoint_ranges
}

fn add_range(range: Test, start_index: usize, disjoint_ranges: &mut Vec<Test>) {
    if range.is_empty() {
        return;
    }

    // Find first overlapping range in `disjoint_ranges`, if any.
    match disjoint_ranges[start_index..]
        .iter()
        .position(|r| r.intersects(&range))
    {
        Some(index) => {
            let index = index + start_index;
            let overlapping_range = &disjoint_ranges[index];

            // If the range we are trying to add already exists, we're all done.
            if overlapping_range == &range {
                return;
            }

            // Otherwise, we want to create three ranges (some of which may
            // be empty). e.g. imagine one range is `a-z` and the other
            // is `c-l`, we want `a-b`, `c-l`, and `m-z`.
            let min_min = cmp::min(range.start(), overlapping_range.start());
            let mid_min = cmp::max(range.start(), overlapping_range.start());
            let mid_max = cmp::min(range.end(), overlapping_range.end());
            let max_max = cmp::max(range.end(), overlapping_range.end());
            // When working with inclusive ranges, we need to be sure to not double count
            // the meeting points of low-mid_range and mid-max_range.
            // So we adjust the end of the low_range and start of max_range as these elements are already included in the start of their corresponding next ranges.

            let low_range = if mid_min == 0 {
                // This is an edgecase where both ranges start at the null character
                // In this case we don't want to create a range from 0 to -1
                // Thus we create an empty range
                Test::new(RangeInclusive::new(1, 0))
            } else {
                Test::new(min_min..=mid_min - 1)
            };
            let mid_range = Test::new(mid_min..=mid_max);
            let max_range = Test::new(mid_max + 1..=max_max);

            assert!(low_range.is_disjoint(&mid_range));
            assert!(low_range.is_disjoint(&max_range));
            assert!(mid_range.is_disjoint(&max_range));

            // Replace the existing range with the low range, and then
            // add the mid and max ranges in. (The low range may be
            // empty, but we'll prune that out later.)
            disjoint_ranges[index] = low_range;
            add_range(mid_range, index + 1, disjoint_ranges);
            add_range(max_range, index + 1, disjoint_ranges);
        }

        None => {
            // no overlap -- easy case.
            disjoint_ranges.push(range);
        }
    }
}

#[cfg(test)]
macro_rules! test {
    ($($range:expr,)*) => {
        {
            use crate::collections::set;
            use crate::lexer::nfa::Test;
            use std::char;
            let mut s = set();
            $({ let r = $range; s.insert(Test::inclusive_range(*r.start(), *r.end())); })*
            remove_overlap(&s).into_iter()
                              .map(|r|
                                   char::from_u32(r.start()).unwrap() ..=
                                   char::from_u32(r.end()).unwrap())
                              .collect::<Vec<_>>()
        }
    }
}

#[test]
fn alphabet() {
    let result = test! {
        'a' ..= 'z',
        'c' ..= 'l',
        '0' ..= '9',
    };
    assert_eq!(result, vec!['0'..='9', 'a'..='b', 'c'..='l', 'm'..='z']);
}

#[test]
fn repeat() {
    let result = test! {
        'a' ..= 'z',
        'c' ..= 'l',
        'l' ..= 'z',
        '0' ..= '9',
    };
    assert_eq!(
        result,
        vec!['0'..='9', 'a'..='b', 'c'..='k', 'l'..='l', 'm'..='z']
    );
}

#[test]
fn stagger() {
    let result = test! {
        '0' ..= '3',
        '2' ..= '4',
        '3' ..= '5',
    };
    assert_eq!(
        result,
        vec!['0'..='1', '2'..='2', '3'..='3', '4'..='4', '5'..='5']
    );
}

#[test]
fn empty_range() {
    let result = test! {
        'b' ..= 'b',
        'a' ..= 'z',
    };
    assert_eq!(result, vec!['a'..='a', 'b'..='b', 'c'..='z']);
}

#[test]
fn null() {
    let result = test! {
        '\0' ..= '\0',
        '\0' ..= 'a',
    };
    assert_eq!(result, vec!['\0'..='\0', 1 as char..='a']);
}