Update version number and release date.
[python/dscho.git] / Lib / sets.py
blobe6a509f1fa2d4574baf45121d1f8e83fdac6930b
1 """Classes to represent arbitrary sets (including sets of sets).
3 This module implements sets using dictionaries whose values are
4 ignored. The usual operations (union, intersection, deletion, etc.)
5 are provided as both methods and operators.
7 Important: sets are not sequences! While they support 'x in s',
8 'len(s)', and 'for x in s', none of those operations are unique for
9 sequences; for example, mappings support all three as well. The
10 characteristic operation for sequences is subscripting with small
11 integers: s[i], for i in range(len(s)). Sets don't support
12 subscripting at all. Also, sequences allow multiple occurrences and
13 their elements have a definite order; sets on the other hand don't
14 record multiple occurrences and don't remember the order of element
15 insertion (which is why they don't support s[i]).
17 The following classes are provided:
19 BaseSet -- All the operations common to both mutable and immutable
20 sets. This is an abstract class, not meant to be directly
21 instantiated.
23 Set -- Mutable sets, subclass of BaseSet; not hashable.
25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26 An iterable argument is mandatory to create an ImmutableSet.
28 _TemporarilyImmutableSet -- Not a subclass of BaseSet: just a wrapper
29 around a Set, hashable, giving the same hash value as the
30 immutable set equivalent would have. Do not use this class
31 directly.
33 Only hashable objects can be added to a Set. In particular, you cannot
34 really add a Set as an element to another Set; if you try, what is
35 actually added is an ImmutableSet built from it (it compares equal to
36 the one you tried adding).
38 When you ask if `x in y' where x is a Set and y is a Set or
39 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
40 what's tested is actually `z in y'.
42 """
44 # Code history:
46 # - Greg V. Wilson wrote the first version, using a different approach
47 # to the mutable/immutable problem, and inheriting from dict.
49 # - Alex Martelli modified Greg's version to implement the current
50 # Set/ImmutableSet approach, and make the data an attribute.
52 # - Guido van Rossum rewrote much of the code, made some API changes,
53 # and cleaned up the docstrings.
55 # - Raymond Hettinger added a number of speedups and other
56 # improvements.
59 __all__ = ['BaseSet', 'Set', 'ImmutableSet']
60 from itertools import ifilter, ifilterfalse
62 class BaseSet(object):
63 """Common base class for mutable and immutable sets."""
65 __slots__ = ['_data']
67 # Constructor
69 def __init__(self):
70 """This is an abstract class."""
71 # Don't call this from a concrete subclass!
72 if self.__class__ is BaseSet:
73 raise TypeError, ("BaseSet is an abstract class. "
74 "Use Set or ImmutableSet.")
76 # Standard protocols: __len__, __repr__, __str__, __iter__
78 def __len__(self):
79 """Return the number of elements of a set."""
80 return len(self._data)
82 def __repr__(self):
83 """Return string representation of a set.
85 This looks like 'Set([<list of elements>])'.
86 """
87 return self._repr()
89 # __str__ is the same as __repr__
90 __str__ = __repr__
92 def _repr(self, sorted=False):
93 elements = self._data.keys()
94 if sorted:
95 elements.sort()
96 return '%s(%r)' % (self.__class__.__name__, elements)
98 def __iter__(self):
99 """Return an iterator over the elements or a set.
101 This is the keys iterator for the underlying dict.
103 return self._data.iterkeys()
105 # Three-way comparison is not supported. However, because __eq__ is
106 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
107 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
108 # case).
110 def __cmp__(self, other):
111 raise TypeError, "can't compare sets using cmp()"
113 # Equality comparisons using the underlying dicts. Mixed-type comparisons
114 # are allowed here, where Set == z for non-Set z always returns False,
115 # and Set != z always True. This allows expressions like "x in y" to
116 # give the expected result when y is a sequence of mixed types, not
117 # raising a pointless TypeError just because y contains a Set, or x is
118 # a Set and y contain's a non-set ("in" invokes only __eq__).
119 # Subtle: it would be nicer if __eq__ and __ne__ could return
120 # NotImplemented instead of True or False. Then the other comparand
121 # would get a chance to determine the result, and if the other comparand
122 # also returned NotImplemented then it would fall back to object address
123 # comparison (which would always return False for __eq__ and always
124 # True for __ne__). However, that doesn't work, because this type
125 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
126 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
128 def __eq__(self, other):
129 if isinstance(other, BaseSet):
130 return self._data == other._data
131 else:
132 return False
134 def __ne__(self, other):
135 if isinstance(other, BaseSet):
136 return self._data != other._data
137 else:
138 return True
140 # Copying operations
142 def copy(self):
143 """Return a shallow copy of a set."""
144 result = self.__class__()
145 result._data.update(self._data)
146 return result
148 __copy__ = copy # For the copy module
150 def __deepcopy__(self, memo):
151 """Return a deep copy of a set; used by copy module."""
152 # This pre-creates the result and inserts it in the memo
153 # early, in case the deep copy recurses into another reference
154 # to this same set. A set can't be an element of itself, but
155 # it can certainly contain an object that has a reference to
156 # itself.
157 from copy import deepcopy
158 result = self.__class__()
159 memo[id(self)] = result
160 data = result._data
161 value = True
162 for elt in self:
163 data[deepcopy(elt, memo)] = value
164 return result
166 # Standard set operations: union, intersection, both differences.
167 # Each has an operator version (e.g. __or__, invoked with |) and a
168 # method version (e.g. union).
169 # Subtle: Each pair requires distinct code so that the outcome is
170 # correct when the type of other isn't suitable. For example, if
171 # we did "union = __or__" instead, then Set().union(3) would return
172 # NotImplemented instead of raising TypeError (albeit that *why* it
173 # raises TypeError as-is is also a bit subtle).
175 def __or__(self, other):
176 """Return the union of two sets as a new set.
178 (I.e. all elements that are in either set.)
180 if not isinstance(other, BaseSet):
181 return NotImplemented
182 result = self.__class__()
183 result._data = self._data.copy()
184 result._data.update(other._data)
185 return result
187 def union(self, other):
188 """Return the union of two sets as a new set.
190 (I.e. all elements that are in either set.)
192 return self | other
194 def __and__(self, other):
195 """Return the intersection of two sets as a new set.
197 (I.e. all elements that are in both sets.)
199 if not isinstance(other, BaseSet):
200 return NotImplemented
201 if len(self) <= len(other):
202 little, big = self, other
203 else:
204 little, big = other, self
205 common = ifilter(big._data.has_key, little)
206 return self.__class__(common)
208 def intersection(self, other):
209 """Return the intersection of two sets as a new set.
211 (I.e. all elements that are in both sets.)
213 return self & other
215 def __xor__(self, other):
216 """Return the symmetric difference of two sets as a new set.
218 (I.e. all elements that are in exactly one of the sets.)
220 if not isinstance(other, BaseSet):
221 return NotImplemented
222 result = self.__class__()
223 data = result._data
224 value = True
225 selfdata = self._data
226 otherdata = other._data
227 for elt in ifilterfalse(otherdata.has_key, selfdata):
228 data[elt] = value
229 for elt in ifilterfalse(selfdata.has_key, otherdata):
230 data[elt] = value
231 return result
233 def symmetric_difference(self, other):
234 """Return the symmetric difference of two sets as a new set.
236 (I.e. all elements that are in exactly one of the sets.)
238 return self ^ other
240 def __sub__(self, other):
241 """Return the difference of two sets as a new Set.
243 (I.e. all elements that are in this set and not in the other.)
245 if not isinstance(other, BaseSet):
246 return NotImplemented
247 result = self.__class__()
248 data = result._data
249 value = True
250 for elt in ifilterfalse(other._data.has_key, self):
251 data[elt] = value
252 return result
254 def difference(self, other):
255 """Return the difference of two sets as a new Set.
257 (I.e. all elements that are in this set and not in the other.)
259 return self - other
261 # Membership test
263 def __contains__(self, element):
264 """Report whether an element is a member of a set.
266 (Called in response to the expression `element in self'.)
268 try:
269 return element in self._data
270 except TypeError:
271 transform = getattr(element, "__as_temporarily_immutable__", None)
272 if transform is None:
273 raise # re-raise the TypeError exception we caught
274 return transform() in self._data
276 # Subset and superset test
278 def issubset(self, other):
279 """Report whether another set contains this set."""
280 self._binary_sanity_check(other)
281 if len(self) > len(other): # Fast check for obvious cases
282 return False
283 for elt in ifilterfalse(other._data.has_key, self):
284 return False
285 return True
287 def issuperset(self, other):
288 """Report whether this set contains another set."""
289 self._binary_sanity_check(other)
290 if len(self) < len(other): # Fast check for obvious cases
291 return False
292 for elt in ifilterfalse(self._data.has_key, other):
293 return False
294 return True
296 # Inequality comparisons using the is-subset relation.
297 __le__ = issubset
298 __ge__ = issuperset
300 def __lt__(self, other):
301 self._binary_sanity_check(other)
302 return len(self) < len(other) and self.issubset(other)
304 def __gt__(self, other):
305 self._binary_sanity_check(other)
306 return len(self) > len(other) and self.issuperset(other)
308 # Assorted helpers
310 def _binary_sanity_check(self, other):
311 # Check that the other argument to a binary operation is also
312 # a set, raising a TypeError otherwise.
313 if not isinstance(other, BaseSet):
314 raise TypeError, "Binary operation only permitted between sets"
316 def _compute_hash(self):
317 # Calculate hash code for a set by xor'ing the hash codes of
318 # the elements. This ensures that the hash code does not depend
319 # on the order in which elements are added to the set. This is
320 # not called __hash__ because a BaseSet should not be hashable;
321 # only an ImmutableSet is hashable.
322 result = 0
323 for elt in self:
324 result ^= hash(elt)
325 return result
327 def _update(self, iterable):
328 # The main loop for update() and the subclass __init__() methods.
329 data = self._data
331 # Use the fast update() method when a dictionary is available.
332 if isinstance(iterable, BaseSet):
333 data.update(iterable._data)
334 return
336 value = True
338 if type(iterable) in (list, tuple, xrange):
339 # Optimized: we know that __iter__() and next() can't
340 # raise TypeError, so we can move 'try:' out of the loop.
341 it = iter(iterable)
342 while True:
343 try:
344 for element in it:
345 data[element] = value
346 return
347 except TypeError:
348 transform = getattr(element, "__as_immutable__", None)
349 if transform is None:
350 raise # re-raise the TypeError exception we caught
351 data[transform()] = value
352 else:
353 # Safe: only catch TypeError where intended
354 for element in iterable:
355 try:
356 data[element] = value
357 except TypeError:
358 transform = getattr(element, "__as_immutable__", None)
359 if transform is None:
360 raise # re-raise the TypeError exception we caught
361 data[transform()] = value
364 class ImmutableSet(BaseSet):
365 """Immutable set class."""
367 __slots__ = ['_hashcode']
369 # BaseSet + hashing
371 def __init__(self, iterable=None):
372 """Construct an immutable set from an optional iterable."""
373 self._hashcode = None
374 self._data = {}
375 if iterable is not None:
376 self._update(iterable)
378 def __hash__(self):
379 if self._hashcode is None:
380 self._hashcode = self._compute_hash()
381 return self._hashcode
383 def __getstate__(self):
384 return self._data, self._hashcode
386 def __setstate__(self, state):
387 self._data, self._hashcode = state
389 class Set(BaseSet):
390 """ Mutable set class."""
392 __slots__ = []
394 # BaseSet + operations requiring mutability; no hashing
396 def __init__(self, iterable=None):
397 """Construct a set from an optional iterable."""
398 self._data = {}
399 if iterable is not None:
400 self._update(iterable)
402 def __getstate__(self):
403 # getstate's results are ignored if it is not
404 return self._data,
406 def __setstate__(self, data):
407 self._data, = data
409 def __hash__(self):
410 """A Set cannot be hashed."""
411 # We inherit object.__hash__, so we must deny this explicitly
412 raise TypeError, "Can't hash a Set, only an ImmutableSet."
414 # In-place union, intersection, differences.
415 # Subtle: The xyz_update() functions deliberately return None,
416 # as do all mutating operations on built-in container types.
417 # The __xyz__ spellings have to return self, though.
419 def __ior__(self, other):
420 """Update a set with the union of itself and another."""
421 self._binary_sanity_check(other)
422 self._data.update(other._data)
423 return self
425 def union_update(self, other):
426 """Update a set with the union of itself and another."""
427 self |= other
429 def __iand__(self, other):
430 """Update a set with the intersection of itself and another."""
431 self._binary_sanity_check(other)
432 self._data = (self & other)._data
433 return self
435 def intersection_update(self, other):
436 """Update a set with the intersection of itself and another."""
437 self &= other
439 def __ixor__(self, other):
440 """Update a set with the symmetric difference of itself and another."""
441 self._binary_sanity_check(other)
442 data = self._data
443 value = True
444 for elt in other:
445 if elt in data:
446 del data[elt]
447 else:
448 data[elt] = value
449 return self
451 def symmetric_difference_update(self, other):
452 """Update a set with the symmetric difference of itself and another."""
453 self ^= other
455 def __isub__(self, other):
456 """Remove all elements of another set from this set."""
457 self._binary_sanity_check(other)
458 data = self._data
459 for elt in ifilter(data.has_key, other):
460 del data[elt]
461 return self
463 def difference_update(self, other):
464 """Remove all elements of another set from this set."""
465 self -= other
467 # Python dict-like mass mutations: update, clear
469 def update(self, iterable):
470 """Add all values from an iterable (such as a list or file)."""
471 self._update(iterable)
473 def clear(self):
474 """Remove all elements from this set."""
475 self._data.clear()
477 # Single-element mutations: add, remove, discard
479 def add(self, element):
480 """Add an element to a set.
482 This has no effect if the element is already present.
484 try:
485 self._data[element] = True
486 except TypeError:
487 transform = getattr(element, "__as_immutable__", None)
488 if transform is None:
489 raise # re-raise the TypeError exception we caught
490 self._data[transform()] = True
492 def remove(self, element):
493 """Remove an element from a set; it must be a member.
495 If the element is not a member, raise a KeyError.
497 try:
498 del self._data[element]
499 except TypeError:
500 transform = getattr(element, "__as_temporarily_immutable__", None)
501 if transform is None:
502 raise # re-raise the TypeError exception we caught
503 del self._data[transform()]
505 def discard(self, element):
506 """Remove an element from a set if it is a member.
508 If the element is not a member, do nothing.
510 try:
511 self.remove(element)
512 except KeyError:
513 pass
515 def pop(self):
516 """Remove and return an arbitrary set element."""
517 return self._data.popitem()[0]
519 def __as_immutable__(self):
520 # Return a copy of self as an immutable set
521 return ImmutableSet(self)
523 def __as_temporarily_immutable__(self):
524 # Return self wrapped in a temporarily immutable set
525 return _TemporarilyImmutableSet(self)
528 class _TemporarilyImmutableSet(BaseSet):
529 # Wrap a mutable set as if it was temporarily immutable.
530 # This only supplies hashing and equality comparisons.
532 def __init__(self, set):
533 self._set = set
534 self._data = set._data # Needed by ImmutableSet.__eq__()
536 def __hash__(self):
537 return self._set._compute_hash()