/** Set types for the D Programming Language (Tested under D 1.018, Phobos std library) The two main classes provided by this module are: o set!(value_type, comparator) o multiset!(value_type, comparator) 'set' is a standard ordered set that only allows one copy of each value. 'multiset' is a set that allows multiple copies of the same value (a.k.a. what is called a "Bag" in some circles) Author: Bill Baxter (wbaxter@gmail.com) License: zlib/libpng license: $(LICENSE) The implementation is based on a RedBlack tree, so sorted order is always maintained. That also gives it the following performance characteristics: insertions - O(lg N) removals - O(lg N) searching - O(lg N) sorting - O(1) sorted traverals - O(N) */ module sets; import redblacktree; import std.stdio : writefln; class KeyException : Exception { this(char[] msg) { super(msg); } } template _set_impl_mixin_public(T, Compare) { alias RedBlackTree!(T,Compare) tree_type; alias T value_type; alias T key_type; alias Compare value_compare; alias Compare key_compare; alias tree_type.node_type node_type; alias typeof(this) self_type; /// create an empty set this() { _tree = new tree_type(); } /// create a set based off of another set this(self_type other) { _tree = new tree_type(other._tree); } /// make a duplicate of this set, D-style self_type dup() { return new self_type(this); } /// print contents of the set with writefln char[] toString() { return "set("~_tree.toString()~")"; } /// Synonym for insert self_type opCatAssign( T value ) { insert(value); return this; } /// Synonym for merge self_type opCatAssign( self_type other ) { merge(other); return this; } /// removes the given value, /// If the element is not present, raises a KeyException void remove (T value) { if (!_tree.remove(value)) { throw new KeyException("no such element in set"); } } /// removes and returns the given value, /// If the element is not present, raises a KeyException T pop (T value) { node_type n = _tree.pop(value); if (!n) { throw new KeyException("no such element in set"); } return n.value; } /// removes and returns the minimum value, /// If the set is empty, raises a KeyException T pop_min() { node_type n = _tree.pop_min(); if (!n) { throw new KeyException("no such element in set"); } return n.value; } /// removes and returns the maximum value, /// If the set is empty, raises a KeyException T pop_max() { node_type n = _tree.pop_min(); if (!n) { throw new KeyException("no such element in set"); } return n.value; } /// Discards the given value from the set, /// Returns true if the value was removed, false otherwise. bool discard (T value) { return _tree.remove(value); } /// Search for a value in the set and return pointer to it if found, /// Return null otherwise. /// You may NOT change the value returned. Doing so will corrupt the /// set's internal data structure. T* find(T value) { node_type N = _tree.search(value); if (N is null) return null; return &N.value; } /// remove all values from the set void clear() { _tree.clear(); } /// return tree nodes size size_t size() { return _tree.size(); } /// return tree nodes size (for compatibility with D's Array concept) size_t length() { return _tree.size(); } /// true if empty bool empty() { return _tree.empty(); } /// merge values from other set into this one (union, but that's a keyword) void merge(self_type other) { _tree.merge(other._tree); } /// remove all values present in the other set from one void intersect(self_type other) { _tree.intersect(other._tree); } /// make this set a duplicate of another void duplicate(self_type other) { _tree.duplicate(other._tree); } /// Foreach iterator forward through the values /// This is guaranteed to iterate in sorted order. int opApply(int delegate(inout T) dg) { return _tree.opApply(dg); } /// Return whether value is in the set /// E.g. 'if (4 in int_set) { ... }' T* opIn_r(T value) { node_type n = _tree.opIn_r(value); if (n) { return &n.value; } else { return null; } } } template _set_impl_mixin_private(T,Compare) { RedBlackTree!(T,Compare) _tree; /// returns whether the set data structure is valid or not /// (just for internal debugging) int is_valid() { return _tree.is_valid(); } } //============================================================================ class set(T, Compare=_GenericLess!(T)) { mixin _set_impl_mixin_public!(T,Compare); /// insert value into the set /// Return true if it was actually added, /// or false if the item was already present. bool insert( T value ) { return _tree.insert(value); } private: mixin _set_impl_mixin_private!(T,Compare); } //============================================================================ /** The multiset class is set type that allows duplicates. Referred to as a 'bag' in some circles, I think. */ class multiset(T, Compare=_GenericLess!(T)) { mixin _set_impl_mixin_public!(T,Compare); /// Insert the value into the set. /// Multiset allows for multiple copies of the same value, so /// this will always succeed. void insert( T value ) { _tree.insert_unconditional(value); } private: mixin _set_impl_mixin_private!(T,Compare); } //============================================================================ /++ class hashset(T) { alias bool[T] table_type; alias T value_type; alias T key_type; //alias Compare value_compare; //alias Compare key_compare; alias typeof(this) self_type; /// create an empty set this() { _table = new table_type(); } /// create a set based off of another set this(self_type other) { _table = new tree_type(other._table); } /// make a duplicate of this set, D-style self_type dup() { return new self_type(this); } /// print contents of the set with writefln char[] toString() { return "set("~_table.toString()~")"; } /// Synonym for insert self_type opCatAssign( T value ) { insert(value); return this; } /// Synonym for merge self_type opCatAssign( self_type other ) { merge(other); return this; } /// Insert the value into the set bool insert( T value ) { return _table[value] = true; } /// removes the given value, /// If the element is not present, raises a KeyException void remove (T value) { try { _table.remove(value); } catch (ArrayBoundsError e) { throw new KeyException("no such element in hashset"); } } /// removes and returns the given value, /// If the element is not present, raises a KeyException T pop (T value) { if (! (T in _table)) { throw new KeyException("no such element in set"); } T ret = _table.pop(value); if (!n) { } return n.value; } /// removes and returns the minimum value, /// If the set is empty, raises a KeyException T pop_min() { node_type n = _table.pop_min(); if (!n) { throw new KeyException("no such element in set"); } return n.value; } /// removes and returns the maximum value, /// If the set is empty, raises a KeyException T pop_max() { node_type n = _table.pop_min(); if (!n) { throw new KeyException("no such element in set"); } return n.value; } /// Discards the given value from the set, /// Returns true if the value was removed, false otherwise. bool discard (T value) { return _table.remove(value); } /// Search for a value in the set and return pointer to it if found, /// Return null otherwise. /// You may NOT change the value returned. Doing so will corrupt the /// set's internal data structure. T* find(T value) { node_type N = _table.search(value); if (N is null) return null; return &N.value; } /// remove all values from the set void clear() { _table.clear(); } /// return tree nodes size int size() { return _table.size(); } /// true if empty bool empty() { return _table.empty(); } /// merge values from other set into this one (union, but that's a keyword) void merge(self_type other) { _table.merge(other._table); } /// remove all values present in the other set from one void intersect(self_type other) { _table.intersect(other._table); } /// make this set a duplicate of another void duplicate(self_type other) { _table.duplicate(other._table); } /// Foreach iterator forward through the values /// This is guaranteed to iterate in sorted order. int opApply(int delegate(inout T) dg) { return _table.opApply(dg); } /// Return whether value is in the set /// E.g. 'if (4 in int_set) { ... }' T* opIn_r(T value) { node_type n = _table.opIn_r(value); if (n) { return &n.value; } else { return null; } } private: HashTable _table; } ++/ //============================================================================ unittest{ { auto aset = new set!(int); static assert( is(int==aset.value_type) ); static assert( is(int==aset.key_type) ); alias typeof(aset) Set; for (int i = 0; i < 10; i++) { bool inserted = aset.insert(i); assert(inserted); } assert(aset.size==10); // Should ignore repeats for (int i = 0; i < 10; i++) { bool inserted = aset.insert(i); assert(!inserted); } assert(aset.size==10); // Test opIn assert(!(99 in aset)); assert(5 in aset); //aset.clear(); bool[int] aacheck; foreach(int v; aset) aacheck[v]=true; foreach(k,v; aacheck) { assert(k in aset); } foreach(int v; aset) { assert(v in aset); } foreach(aset.value_type v; aset) { assert(v in aset); } Set t2 = new Set(aset); // creates a copy assert(aacheck.length == t2.size); // Make sure contents are the same foreach(k,v; aacheck) { assert(k in t2); } foreach(int v; t2) { assert(v in t2); } foreach(aset.value_type v; t2) { assert(v in t2); } assert(aset.toString == "set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)"); for (int i=-10; i<20; i++) { int* n = aset.find(i); if (i>=0 && i<10) { assert (n !is null); } else { assert (n is null); } } // test bogus remove { bool caught = false; try { aset.remove(99); } catch (KeyException) { caught = true; } assert(caught); } // test bogus discard { bool ret = aset.discard(99); assert(ret==false); } // test remove & discard -- remove them all 1 by 1 int[] to_remove = [7,2,3,5,6,9,4,8,0,1]; bool[int] removed; foreach(r; to_remove) { assert(aset.is_valid); removed[r] = true; // Use discard on half, remove on other half if (r%2) { aset.remove(r); } else { bool ret = aset.discard(r); assert(ret); } for (int i=0; i<10; i++) { assert( (i in removed) || (i in aset) ); } } assert(aset.is_valid); } { // Test lexicographical ordered pairs with external comparator struct Pair { int L,R; static Pair opCall(int L, int R) { Pair ret; ret.L=L; ret.R=R; return ret; } char[] toString() { return string_format("(%d, %d)", L,R); } } struct PairLess { static bool opCall(ref Pair a, ref Pair b) { if (a.Lb.L) { return false; } if (a.R= 0; i--) { for (int j = 3; j >= 0; j--) { aset.insert(Pair(i,j)); aset.insert(Pair(j,i)); } } char[] str = aset.toString(); assert(str=="set((0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3), (3, 0), (3, 1), (3, 2), (3, 3))"); // Should be 16 distinct values, (though 32 were inserted) assert(aset.size==16); // Test opIn assert(!(Pair(1,9) in aset)); assert(!(Pair(9,1) in aset)); assert(Pair(3,2) in aset); //aset.clear(); bool[Pair] aacheck; foreach(v; aset) aacheck[v]=true; foreach(k,v; aacheck) { assert(k in aset); } foreach(v; aset) { assert(v in aset); } foreach(aset.value_type v; aset) { assert(v in aset); } Set t2 = new Set(aset); // creates a copy assert(aacheck.length == t2.size); // Make sure contents are the same foreach(k,v; aacheck) { assert(k in t2); } foreach(v; t2) { assert(v in t2); } foreach(aset.value_type v; t2) { assert(v in t2); } for (int i=-10; i<20; i++) { for (int j=-10; j<20; j++) { Pair* n = aset.find(Pair(i,j)); if (i>=0 && i<4 && j>=0 && j<4) { assert (n !is null); } else { assert (n is null); } } } // test remove -- remove them all 1 by 1 Pair[] to_remove = [ Pair(0, 0), Pair(0, 1), Pair(0, 2), Pair(0, 3), Pair(1, 0), Pair(1, 1), Pair(1, 2), Pair(1, 3), Pair(2, 0), Pair(2, 1), Pair(2, 2), Pair(2, 3), Pair(3, 0), Pair(3, 1), Pair(3, 2), Pair(3, 3), ]; bool[Pair] removed; foreach(r; to_remove) { assert(aset.is_valid); removed[r] = true; aset.remove(r); for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { assert( (Pair(i,j) in removed) || (Pair(i,j) in aset) ); } } } assert(aset.toString() == "set()"); assert(aset.is_valid); } { // Test merge and intersect auto t1 = new set!(int); auto t2 = new set!(int); t1 ~= 1; t1 ~= 9; t1 ~= 7; t2 ~= 2; t2 ~= 9; t2 ~= 8; t1 ~= t2; assert(t1.size == 5); assert(1 in t1); assert(9 in t1); assert(7 in t1); assert(2 in t1); assert(8 in t1); // T2 shouldn't be changed assert(t2.size == 3); assert(2 in t2); assert(9 in t2); assert(8 in t2); t1.intersect(t2); assert(t1.size == 2); assert(1 in t1); assert(7 in t1); // T2 still shouldn't be changed assert(t2.size == 3); assert(2 in t2); assert(9 in t2); assert(8 in t2); // make sure 'merge' ok too t1.merge(t2); assert(t1.size==5); t1.is_valid(); t2.is_valid(); } { // Test multi-set functionality auto intset = new multiset!(int); alias typeof(intset) Set; for (int i = 0; i < 10; i++) { assert(intset.size==i); intset.insert(i); } assert(intset.size==10); // Should not ignore repeats for (int i = 0; i < 10; i++) { intset.insert(i); } assert(intset.size==20); for (int i=0; i<10; i++) { intset.remove(i); } assert(intset.size==10); for (int i=0; i<10; i++) { intset.remove(i); } assert(intset.size==0); } }