1# Copyright (C) 2014 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import print_function 16 17import heapq 18import itertools 19 20 21__all__ = ["RangeSet"] 22 23 24class RangeSet(object): 25 """A RangeSet represents a set of non-overlapping ranges on integers. 26 27 Attributes: 28 monotonic: Whether the input has all its integers in increasing order. 29 extra: A dict that can be used by the caller, e.g. to store info that's 30 only meaningful to caller. 31 """ 32 33 def __init__(self, data=None): 34 self.monotonic = False 35 self._extra = {} 36 if isinstance(data, str): 37 self._parse_internal(data) 38 elif data: 39 assert len(data) % 2 == 0 40 self.data = tuple(self._remove_pairs(data)) 41 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:])) 42 else: 43 self.data = () 44 45 def __iter__(self): 46 for i in range(0, len(self.data), 2): 47 yield self.data[i:i+2] 48 49 def __eq__(self, other): 50 return self.data == other.data 51 52 def __ne__(self, other): 53 return self.data != other.data 54 55 def __bool__(self): 56 return bool(self.data) 57 58 # Python 2 uses __nonzero__, while Python 3 uses __bool__. 59 __nonzero__ = __bool__ 60 61 def __str__(self): 62 if not self.data: 63 return "empty" 64 else: 65 return self.to_string() 66 67 def __repr__(self): 68 return '<RangeSet("' + self.to_string() + '")>' 69 70 @property 71 def extra(self): 72 return self._extra 73 74 @classmethod 75 def parse(cls, text): 76 """Parses a text string into a RangeSet. 77 78 The input text string consists of a space-separated list of blocks and 79 ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their 80 ends (so the above example represents 18 individual blocks). Returns a 81 RangeSet object. 82 83 If the input has all its blocks in increasing order, then the 'monotonic' 84 attribute of the returned RangeSet will be set to True. For example the 85 input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even 86 though they represent the same set of blocks (and the two RangeSets will 87 compare equal with ==). 88 """ 89 return cls(text) 90 91 @classmethod 92 def parse_raw(cls, text): 93 """Parse a string generated by RangeSet.to_string_raw(). 94 95 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw()) 96 <RangeSet("0-9")> 97 """ 98 99 raw = [int(i) for i in text.split(',')] 100 assert raw[0] == len(raw[1:]), "Invalid raw string." 101 102 return cls(data=raw[1:]) 103 104 def _parse_internal(self, text): 105 data = [] 106 last = -1 107 monotonic = True 108 for p in text.split(): 109 if "-" in p: 110 s, e = (int(x) for x in p.split("-")) 111 data.append(s) 112 data.append(e+1) 113 if last <= s <= e: 114 last = e 115 else: 116 monotonic = False 117 else: 118 s = int(p) 119 data.append(s) 120 data.append(s+1) 121 if last <= s: 122 last = s+1 123 else: 124 monotonic = False 125 data.sort() 126 self.data = tuple(self._remove_pairs(data)) 127 self.monotonic = monotonic 128 129 @staticmethod 130 def _remove_pairs(source): 131 """Remove consecutive duplicate items to simplify the result. 132 133 [1, 2, 2, 5, 5, 10] will become [1, 10].""" 134 last = None 135 for i in source: 136 if i == last: 137 last = None 138 else: 139 if last is not None: 140 yield last 141 last = i 142 if last is not None: 143 yield last 144 145 def to_string(self): 146 out = [] 147 for i in range(0, len(self.data), 2): 148 s, e = self.data[i:i+2] 149 if e == s+1: 150 out.append(str(s)) 151 else: 152 out.append(str(s) + "-" + str(e-1)) 153 return " ".join(out) 154 155 def to_string_raw(self): 156 assert self.data 157 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data) 158 159 def union(self, other): 160 """Return a new RangeSet representing the union of this RangeSet 161 with the argument. 162 163 >>> RangeSet("10-19 30-34").union(RangeSet("18-29")) 164 <RangeSet("10-34")> 165 >>> RangeSet("10-19 30-34").union(RangeSet("22 32")) 166 <RangeSet("10-19 22 30-34")> 167 """ 168 out = [] 169 z = 0 170 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 171 zip(other.data, itertools.cycle((+1, -1)))): 172 if (z == 0 and d == 1) or (z == 1 and d == -1): 173 out.append(p) 174 z += d 175 return RangeSet(data=out) 176 177 def intersect(self, other): 178 """Return a new RangeSet representing the intersection of this 179 RangeSet with the argument. 180 181 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32")) 182 <RangeSet("18-19 30-32")> 183 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28")) 184 <RangeSet("")> 185 """ 186 out = [] 187 z = 0 188 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 189 zip(other.data, itertools.cycle((+1, -1)))): 190 if (z == 1 and d == 1) or (z == 2 and d == -1): 191 out.append(p) 192 z += d 193 return RangeSet(data=out) 194 195 def subtract(self, other): 196 """Return a new RangeSet representing subtracting the argument 197 from this RangeSet. 198 199 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32")) 200 <RangeSet("10-17 33-34")> 201 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28")) 202 <RangeSet("10-19 30-34")> 203 """ 204 205 out = [] 206 z = 0 207 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 208 zip(other.data, itertools.cycle((-1, +1)))): 209 if (z == 0 and d == 1) or (z == 1 and d == -1): 210 out.append(p) 211 z += d 212 return RangeSet(data=out) 213 214 def overlaps(self, other): 215 """Returns true if the argument has a nonempty overlap with this 216 RangeSet. 217 218 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32")) 219 True 220 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28")) 221 False 222 """ 223 224 # This is like intersect, but we can stop as soon as we discover the 225 # output is going to be nonempty. 226 z = 0 227 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))), 228 zip(other.data, itertools.cycle((+1, -1)))): 229 if (z == 1 and d == 1) or (z == 2 and d == -1): 230 return True 231 z += d 232 return False 233 234 def size(self): 235 """Returns the total size of the RangeSet (ie, how many integers 236 are in the set). 237 238 >>> RangeSet("10-19 30-34").size() 239 15 240 """ 241 242 total = 0 243 for i, p in enumerate(self.data): 244 if i % 2: 245 total += p 246 else: 247 total -= p 248 return total 249 250 def map_within(self, other): 251 """'other' should be a subset of 'self'. Returns a RangeSet 252 representing what 'other' would get translated to if the integers 253 of 'self' were translated down to be contiguous starting at zero. 254 255 >>> RangeSet("0-9").map_within(RangeSet("3-4")) 256 <RangeSet("3-4")> 257 >>> RangeSet("10-19").map_within(RangeSet("13-14")) 258 <RangeSet("3-4")> 259 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32")) 260 <RangeSet("7-12")> 261 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32")) 262 <RangeSet("2-3 7-12")> 263 """ 264 265 out = [] 266 offset = 0 267 start = None 268 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))), 269 zip(other.data, itertools.cycle((-1, +1)))): 270 if d == -5: 271 start = p 272 elif d == +5: 273 offset += p-start 274 start = None 275 else: 276 out.append(offset + p - start) 277 return RangeSet(data=out) 278 279 def extend(self, n): 280 """Extend the RangeSet by 'n' blocks. 281 282 The lower bound is guaranteed to be non-negative. 283 284 >>> RangeSet("0-9").extend(1) 285 <RangeSet("0-10")> 286 >>> RangeSet("10-19").extend(15) 287 <RangeSet("0-34")> 288 >>> RangeSet("10-19 30-39").extend(4) 289 <RangeSet("6-23 26-43")> 290 >>> RangeSet("10-19 30-39").extend(10) 291 <RangeSet("0-49")> 292 """ 293 out = self 294 for i in range(0, len(self.data), 2): 295 s, e = self.data[i:i+2] 296 s1 = max(0, s - n) 297 e1 = e + n 298 out = out.union(RangeSet(str(s1) + "-" + str(e1-1))) 299 return out 300 301 def first(self, n): 302 """Return the RangeSet that contains at most the first 'n' integers. 303 304 >>> RangeSet("0-9").first(1) 305 <RangeSet("0")> 306 >>> RangeSet("10-19").first(5) 307 <RangeSet("10-14")> 308 >>> RangeSet("10-19").first(15) 309 <RangeSet("10-19")> 310 >>> RangeSet("10-19 30-39").first(3) 311 <RangeSet("10-12")> 312 >>> RangeSet("10-19 30-39").first(15) 313 <RangeSet("10-19 30-34")> 314 >>> RangeSet("10-19 30-39").first(30) 315 <RangeSet("10-19 30-39")> 316 >>> RangeSet("0-9").first(0) 317 <RangeSet("")> 318 """ 319 320 if self.size() <= n: 321 return self 322 323 out = [] 324 for s, e in self: 325 if e - s >= n: 326 out += (s, s+n) 327 break 328 else: 329 out += (s, e) 330 n -= e - s 331 return RangeSet(data=out) 332 333 def next_item(self): 334 """Return the next integer represented by the RangeSet. 335 336 >>> list(RangeSet("0-9").next_item()) 337 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 338 >>> list(RangeSet("10-19 3-5").next_item()) 339 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 340 >>> list(RangeSet("10-19 3 5 7").next_item()) 341 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 342 """ 343 for s, e in self: 344 for element in range(s, e): 345 yield element 346 347 348if __name__ == "__main__": 349 import doctest 350 doctest.testmod() 351