Message ID | fd621d0f-b217-97cc-3428-e3a709a4f259@gmail.com |
---|---|
State | New |
Headers | show |
Series | Enhance unordered container merge | expand |
On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++ <libstdc++@gcc.gnu.org> wrote: > > libstdc++: Unordered containers merge re-use hash code. > > When merging between 2 unordered containers with same hasher we can > re-use > the cached hash code if any. Instead of introducing the _ReuseOrComputeHash type, wouldn't it be simpler to just overload _M_hash_code? // Same hash function, use the cached hash code. __hash_code _M_hash_code(const _Hash&, const _Hash_node_value<_Value, true>& __n) const { return __n._M_hash_code; } // Compute hash code using a different hash function, _H2 template<typename _H2> __hash_code _M_hash_code(const _H2&, const _Hash_node_value<_Value, __cache_hash_code>& __n) const { return this->_M_hash_code(_ExtractKey{}(__n._M_v()); } The first overload is more specialized, so will be chosen when the first argument is the same type as _Hash and the cache_has_code boolean is true.
On 15/11/21 12:25 am, Jonathan Wakely wrote: > On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++ > <libstdc++@gcc.gnu.org> wrote: >> libstdc++: Unordered containers merge re-use hash code. >> >> When merging between 2 unordered containers with same hasher we can >> re-use >> the cached hash code if any. > Instead of introducing the _ReuseOrComputeHash type, wouldn't it be > simpler to just overload _M_hash_code? > > > // Same hash function, use the cached hash code. > __hash_code > _M_hash_code(const _Hash&, > const _Hash_node_value<_Value, true>& __n) const > { return __n._M_hash_code; } > > // Compute hash code using a different hash function, _H2 > template<typename _H2> > __hash_code > _M_hash_code(const _H2&, > const _Hash_node_value<_Value, __cache_hash_code>& __n) const > { return this->_M_hash_code(_ExtractKey{}(__n._M_v()); } > > The first overload is more specialized, so will be chosen when the > first argument is the same type as _Hash and the cache_has_code > boolean is true. Yes, it is simpler. Ok to commit ? François
On Mon, 15 Nov 2021 at 06:00, François Dumont wrote: > > On 15/11/21 12:25 am, Jonathan Wakely wrote: > > On Sun, 14 Nov 2021 at 13:31, François Dumont via Libstdc++ > > <libstdc++@gcc.gnu.org> wrote: > >> libstdc++: Unordered containers merge re-use hash code. > >> > >> When merging between 2 unordered containers with same hasher we can > >> re-use > >> the cached hash code if any. > > Instead of introducing the _ReuseOrComputeHash type, wouldn't it be > > simpler to just overload _M_hash_code? > > > > > > // Same hash function, use the cached hash code. > > __hash_code > > _M_hash_code(const _Hash&, > > const _Hash_node_value<_Value, true>& __n) const > > { return __n._M_hash_code; } > > > > // Compute hash code using a different hash function, _H2 > > template<typename _H2> > > __hash_code > > _M_hash_code(const _H2&, > > const _Hash_node_value<_Value, __cache_hash_code>& __n) const > > { return this->_M_hash_code(_ExtractKey{}(__n._M_v()); } > > > > The first overload is more specialized, so will be chosen when the > > first argument is the same type as _Hash and the cache_has_code > > boolean is true. > > Yes, it is simpler. > > Ok to commit ? Yes, thanks.
diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index 0e949d73614..6e2d4c10cfe 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -1076,7 +1076,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto __pos = __i++; const key_type& __k = _ExtractKey{}(*__pos); - __hash_code __code = this->_M_hash_code(__k); + __hash_code __code + = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); size_type __bkt = _M_bucket_index(__code); if (_M_find_node(__bkt, __k, __code) == nullptr) { @@ -1099,14 +1100,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION node_type>, "Node types are compatible"); __glibcxx_assert(get_allocator() == __src.get_allocator()); + __node_ptr __hint = nullptr; this->reserve(size() + __src.size()); for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;) { auto __pos = __i++; - const key_type& __k = _ExtractKey{}(*__pos); - __hash_code __code = this->_M_hash_code(__k); + __hash_code __code + = this->_M_hash_code(__src.hash_function(), *__pos._M_cur); auto __nh = __src.extract(__pos); - _M_insert_multi_node(nullptr, __code, __nh._M_ptr); + __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; __nh._M_ptr = nullptr; } } diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index c0295b75963..95a1c45e634 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1217,6 +1217,26 @@ namespace __detail friend struct _Local_iterator_base<_Key, _Value, _ExtractKey, _Hash, _RangeHash, _Unused, false>; + template<typename _H1, typename _H2, bool __with_cache> + struct _ReuseOrComputeHash + { + std::size_t + operator()(const _Hash_node_value<_Value, __with_cache>& __n) const + { return _M_hash_code_base._M_hash_code(_ExtractKey{}(__n._M_v())); } + + const _Hash_code_base& _M_hash_code_base; + }; + + template<typename _Hn> + struct _ReuseOrComputeHash<_Hn, _Hn, true> + { + _ReuseOrComputeHash(const _Hash_code_base&) { } + + std::size_t + operator()(const _Hash_node_value<_Value, true>& __n) const + { return __n._M_hash_code; } + }; + public: typedef _Hash hasher; @@ -1250,6 +1270,12 @@ namespace __detail return _M_hash()(__k); } + template<typename _H2> + __hash_code + _M_hash_code(const _H2&, + const _Hash_node_value<_Value, __cache_hash_code>& __n) const + { return _ReuseOrComputeHash<_Hash, _H2, __cache_hash_code>{ *this }(__n); } + std::size_t _M_bucket_index(__hash_code __c, std::size_t __bkt_count) const { return _RangeHash{}(__c, __bkt_count); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc index 1ed2ce234a1..07b8a344169 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_multiset/modifiers/merge.cc @@ -17,6 +17,7 @@ // { dg-do run { target c++17 } } +#include <string> #include <unordered_set> #include <algorithm> #include <testsuite_hooks.h> @@ -105,6 +106,26 @@ test04() VERIFY( c2.empty() ); } +void +test05() +{ + const std::unordered_multiset<std::string> c0{ "abcd", "abcd", "efgh", "efgh", "ijkl", "ijkl" }; + std::unordered_multiset<std::string> c1 = c0; + std::unordered_set<std::string> c2( c0.begin(), c0.end() ); + + c1.merge(c2); + VERIFY( c1.size() == (1.5 * c0.size()) ); + for (auto& i : c1) + VERIFY( c1.count(i) == (1.5 * c0.count(i)) ); + VERIFY( c2.empty() ); + + c1.clear(); + c2.insert( c0.begin(), c0.end() ); + c1.merge(std::move(c2)); + VERIFY( c1.size() == (0.5 * c0.size()) ); + VERIFY( c2.empty() ); +} + int main() { @@ -112,4 +133,5 @@ main() test02(); test03(); test04(); + test05(); } diff --git a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc index c9c8a60fd54..0e184b10c60 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_set/modifiers/merge.cc @@ -17,6 +17,7 @@ // { dg-do run { target c++17 } } +#include <string> #include <unordered_set> #include <algorithm> #include <testsuite_hooks.h> @@ -125,10 +126,52 @@ test03() VERIFY( c2.empty() ); } +void +test04() +{ + const std::unordered_set<std::string> c0{ "abcd", "efgh", "ijkl", }; + std::unordered_set<std::string> c1 = c0; + std::unordered_multiset<std::string> c2( c0.begin(), c0.end() ); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( equal_elements(c2, c0) ); + + c1 = c0; + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2.size() == (2 * c0.size()) ); + VERIFY( c2.count("abcd") == 2 ); + VERIFY( c2.count("efgh") == 2 ); + VERIFY( c2.count("ijkl") == 2 ); + + c1.merge(c2); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( equal_elements(c2, c0) ); + + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c0 ); + VERIFY( c2.empty() ); +} + int main() { test01(); test02(); test03(); + test04(); }