1# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import print_function
16
17import heapq
18import itertools
19
20
21__all__ = ["RangeSet"]
22
23
24class RangeSet(object):
25  """A RangeSet represents a set of non-overlapping ranges on integers.
26
27  Attributes:
28    monotonic: Whether the input has all its integers in increasing order.
29    extra: A dict that can be used by the caller, e.g. to store info that's
30        only meaningful to caller.
31  """
32
33  def __init__(self, data=None):
34    self.monotonic = False
35    self._extra = {}
36    if isinstance(data, str):
37      self._parse_internal(data)
38    elif data:
39      assert len(data) % 2 == 0
40      self.data = tuple(self._remove_pairs(data))
41      self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
42    else:
43      self.data = ()
44
45  def __iter__(self):
46    for i in range(0, len(self.data), 2):
47      yield self.data[i:i+2]
48
49  def __eq__(self, other):
50    return self.data == other.data
51
52  def __ne__(self, other):
53    return self.data != other.data
54
55  def __bool__(self):
56    return bool(self.data)
57
58  # Python 2 uses __nonzero__, while Python 3 uses __bool__.
59  __nonzero__ = __bool__
60
61  def __str__(self):
62    if not self.data:
63      return "empty"
64    else:
65      return self.to_string()
66
67  def __repr__(self):
68    return '<RangeSet("' + self.to_string() + '")>'
69
70  @property
71  def extra(self):
72    return self._extra
73
74  @classmethod
75  def parse(cls, text):
76    """Parses a text string into a RangeSet.
77
78    The input text string consists of a space-separated list of blocks and
79    ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their
80    ends (so the above example represents 18 individual blocks). Returns a
81    RangeSet object.
82
83    If the input has all its blocks in increasing order, then the 'monotonic'
84    attribute of the returned RangeSet will be set to True. For example the
85    input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even
86    though they represent the same set of blocks (and the two RangeSets will
87    compare equal with ==).
88    """
89    return cls(text)
90
91  @classmethod
92  def parse_raw(cls, text):
93    """Parse a string generated by RangeSet.to_string_raw().
94
95    >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
96    <RangeSet("0-9")>
97    """
98
99    raw = [int(i) for i in text.split(',')]
100    assert raw[0] == len(raw[1:]), "Invalid raw string."
101
102    return cls(data=raw[1:])
103
104  def _parse_internal(self, text):
105    data = []
106    last = -1
107    monotonic = True
108    for p in text.split():
109      if "-" in p:
110        s, e = (int(x) for x in p.split("-"))
111        data.append(s)
112        data.append(e+1)
113        if last <= s <= e:
114          last = e
115        else:
116          monotonic = False
117      else:
118        s = int(p)
119        data.append(s)
120        data.append(s+1)
121        if last <= s:
122          last = s+1
123        else:
124          monotonic = False
125    data.sort()
126    self.data = tuple(self._remove_pairs(data))
127    self.monotonic = monotonic
128
129  @staticmethod
130  def _remove_pairs(source):
131    """Remove consecutive duplicate items to simplify the result.
132
133    [1, 2, 2, 5, 5, 10] will become [1, 10]."""
134    last = None
135    for i in source:
136      if i == last:
137        last = None
138      else:
139        if last is not None:
140          yield last
141        last = i
142    if last is not None:
143      yield last
144
145  def to_string(self):
146    out = []
147    for i in range(0, len(self.data), 2):
148      s, e = self.data[i:i+2]
149      if e == s+1:
150        out.append(str(s))
151      else:
152        out.append(str(s) + "-" + str(e-1))
153    return " ".join(out)
154
155  def to_string_raw(self):
156    assert self.data
157    return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
158
159  def union(self, other):
160    """Return a new RangeSet representing the union of this RangeSet
161    with the argument.
162
163    >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
164    <RangeSet("10-34")>
165    >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
166    <RangeSet("10-19 22 30-34")>
167    """
168    out = []
169    z = 0
170    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
171                            zip(other.data, itertools.cycle((+1, -1)))):
172      if (z == 0 and d == 1) or (z == 1 and d == -1):
173        out.append(p)
174      z += d
175    return RangeSet(data=out)
176
177  def intersect(self, other):
178    """Return a new RangeSet representing the intersection of this
179    RangeSet with the argument.
180
181    >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
182    <RangeSet("18-19 30-32")>
183    >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
184    <RangeSet("")>
185    """
186    out = []
187    z = 0
188    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
189                            zip(other.data, itertools.cycle((+1, -1)))):
190      if (z == 1 and d == 1) or (z == 2 and d == -1):
191        out.append(p)
192      z += d
193    return RangeSet(data=out)
194
195  def subtract(self, other):
196    """Return a new RangeSet representing subtracting the argument
197    from this RangeSet.
198
199    >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
200    <RangeSet("10-17 33-34")>
201    >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
202    <RangeSet("10-19 30-34")>
203    """
204
205    out = []
206    z = 0
207    for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
208                            zip(other.data, itertools.cycle((-1, +1)))):
209      if (z == 0 and d == 1) or (z == 1 and d == -1):
210        out.append(p)
211      z += d
212    return RangeSet(data=out)
213
214  def overlaps(self, other):
215    """Returns true if the argument has a nonempty overlap with this
216    RangeSet.
217
218    >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
219    True
220    >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
221    False
222    """
223
224    # This is like intersect, but we can stop as soon as we discover the
225    # output is going to be nonempty.
226    z = 0
227    for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
228                            zip(other.data, itertools.cycle((+1, -1)))):
229      if (z == 1 and d == 1) or (z == 2 and d == -1):
230        return True
231      z += d
232    return False
233
234  def size(self):
235    """Returns the total size of the RangeSet (ie, how many integers
236    are in the set).
237
238    >>> RangeSet("10-19 30-34").size()
239    15
240    """
241
242    total = 0
243    for i, p in enumerate(self.data):
244      if i % 2:
245        total += p
246      else:
247        total -= p
248    return total
249
250  def map_within(self, other):
251    """'other' should be a subset of 'self'.  Returns a RangeSet
252    representing what 'other' would get translated to if the integers
253    of 'self' were translated down to be contiguous starting at zero.
254
255    >>> RangeSet("0-9").map_within(RangeSet("3-4"))
256    <RangeSet("3-4")>
257    >>> RangeSet("10-19").map_within(RangeSet("13-14"))
258    <RangeSet("3-4")>
259    >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
260    <RangeSet("7-12")>
261    >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
262    <RangeSet("2-3 7-12")>
263    """
264
265    out = []
266    offset = 0
267    start = None
268    for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
269                            zip(other.data, itertools.cycle((-1, +1)))):
270      if d == -5:
271        start = p
272      elif d == +5:
273        offset += p-start
274        start = None
275      else:
276        out.append(offset + p - start)
277    return RangeSet(data=out)
278
279  def extend(self, n):
280    """Extend the RangeSet by 'n' blocks.
281
282    The lower bound is guaranteed to be non-negative.
283
284    >>> RangeSet("0-9").extend(1)
285    <RangeSet("0-10")>
286    >>> RangeSet("10-19").extend(15)
287    <RangeSet("0-34")>
288    >>> RangeSet("10-19 30-39").extend(4)
289    <RangeSet("6-23 26-43")>
290    >>> RangeSet("10-19 30-39").extend(10)
291    <RangeSet("0-49")>
292    """
293    out = self
294    for i in range(0, len(self.data), 2):
295      s, e = self.data[i:i+2]
296      s1 = max(0, s - n)
297      e1 = e + n
298      out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
299    return out
300
301  def first(self, n):
302    """Return the RangeSet that contains at most the first 'n' integers.
303
304    >>> RangeSet("0-9").first(1)
305    <RangeSet("0")>
306    >>> RangeSet("10-19").first(5)
307    <RangeSet("10-14")>
308    >>> RangeSet("10-19").first(15)
309    <RangeSet("10-19")>
310    >>> RangeSet("10-19 30-39").first(3)
311    <RangeSet("10-12")>
312    >>> RangeSet("10-19 30-39").first(15)
313    <RangeSet("10-19 30-34")>
314    >>> RangeSet("10-19 30-39").first(30)
315    <RangeSet("10-19 30-39")>
316    >>> RangeSet("0-9").first(0)
317    <RangeSet("")>
318    """
319
320    if self.size() <= n:
321      return self
322
323    out = []
324    for s, e in self:
325      if e - s >= n:
326        out += (s, s+n)
327        break
328      else:
329        out += (s, e)
330        n -= e - s
331    return RangeSet(data=out)
332
333  def next_item(self):
334    """Return the next integer represented by the RangeSet.
335
336    >>> list(RangeSet("0-9").next_item())
337    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
338    >>> list(RangeSet("10-19 3-5").next_item())
339    [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
340    >>> list(RangeSet("10-19 3 5 7").next_item())
341    [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
342    """
343    for s, e in self:
344      for element in range(s, e):
345        yield element
346
347
348if __name__ == "__main__":
349  import doctest
350  doctest.testmod()
351