Message ID | 62e2b45a-6999-4b3f-a8a1-f01f434a9ed6@gmail.com |
---|---|
State | New |
Headers | show |
Series | [_Hashtable] Fix hash code cache usage | expand |
Ping for this bug fix, would you like a PR ? On 20/01/2025 22:12, François Dumont wrote: > Hi > > In my work on fancy pointer support I've decided to always cache the > hash code. > > Doing so I spotted a bug in the management of this cache when hash > functor is stateful. > > libstdc++: [_Hashtable] Fix hash code cache usage when hash > functor is stateful > > It is wrong to reuse a cached hash code when this code depends > then on the state > of the Hash functor. > > Add checks that Hash functor is stateless before reusing the > cached hash code. > > libstdc++-v3/ChangeLog: > > * include/bits/hashtable_policy.h > (_Hash_code_base::_M_copy_code): Remove. > * include/bits/hashtable.h (_M_copy_code): New. > (_M_assign): Use latter. > (_M_bucket_index_ex): New. > (_M_equals): Use latter. > * testsuite/23_containers/unordered_map/modifiers/merge.cc > (test10): New > test case. > > Tested under Linux x64, ok to commit ? > > François
On Mon, 17 Feb 2025 at 21:27, François Dumont <frs.dumont@gmail.com> wrote: > > Ping for this bug fix, would you like a PR ? No, I don't think we need one, I'm reviewing the patch now. > > On 20/01/2025 22:12, François Dumont wrote: > > Hi > > > > In my work on fancy pointer support I've decided to always cache the > > hash code. > > > > Doing so I spotted a bug in the management of this cache when hash > > functor is stateful. > > > > libstdc++: [_Hashtable] Fix hash code cache usage when hash > > functor is stateful > > > > It is wrong to reuse a cached hash code when this code depends > > then on the state > > of the Hash functor. > > > > Add checks that Hash functor is stateless before reusing the > > cached hash code. > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/hashtable_policy.h > > (_Hash_code_base::_M_copy_code): Remove. > > * include/bits/hashtable.h (_M_copy_code): New. > > (_M_assign): Use latter. > > (_M_bucket_index_ex): New. > > (_M_equals): Use latter. > > * testsuite/23_containers/unordered_map/modifiers/merge.cc > > (test10): New > > test case. > > > > Tested under Linux x64, ok to commit ? > > > > François >
On 20/01/25 22:12 +0100, François Dumont wrote: >Hi > >In my work on fancy pointer support I've decided to always cache the >hash code. > >Doing so I spotted a bug in the management of this cache when hash >functor is stateful. > > libstdc++: [_Hashtable] Fix hash code cache usage when hash >functor is stateful > > It is wrong to reuse a cached hash code when this code depends >then on the state > of the Hash functor. > > Add checks that Hash functor is stateless before reusing the >cached hash code. > > libstdc++-v3/ChangeLog: > > * include/bits/hashtable_policy.h >(_Hash_code_base::_M_copy_code): Remove. > * include/bits/hashtable.h (_M_copy_code): New. I like replacing the _M_copy_code overloads with one function, thanks. We could do the same for _M_store_code too. > (_M_assign): Use latter. > (_M_bucket_index_ex): New. I assume "ex" means "external"? That doesn't seem very clear to me though. Maybe "ext" would be better. > (_M_equals): Use latter. > * testsuite/23_containers/unordered_map/modifiers/merge.cc >(test10): New > test case. > >Tested under Linux x64, ok to commit ? > >François >diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h >index d6d76a743bb..b3c1d7aac24 100644 >--- a/libstdc++-v3/include/bits/hashtable.h >+++ b/libstdc++-v3/include/bits/hashtable.h >@@ -808,6 +808,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > _M_bucket_index(__hash_code __c) const > { return __hash_code_base::_M_bucket_index(__c, _M_bucket_count); } > >+#pragma GCC diagnostic push >+#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr >+ // Like _M_bucket_index but when the node is coming from another >+ // container instance. >+ size_type >+ _M_bucket_index_ex(const __node_value_type& __n) const >+ { >+ if constexpr (__hash_cached::value) >+ if constexpr (std::is_empty<_Hash>::value) >+ return _RangeHash{}(__n._M_hash_code, _M_bucket_count); >+ >+ return _RangeHash{} >+ (this->_M_hash_code(_ExtractKey{}(__n._M_v())), _M_bucket_count); >+ } >+ >+ void >+ _M_copy_code(__node_value_type& __to, >+ const __node_value_type& __from) const >+ { >+ if constexpr (__hash_cached::value) >+ { >+ if constexpr (std::is_empty<_Hash>::value) >+ __to._M_hash_code = __from._M_hash_code; >+ else >+ __to._M_hash_code = >+ this->_M_hash_code(_ExtractKey{}(__from._M_v())); >+ } >+ } These two functions are doing similar work, would it make sense to factor out the common part to a new function: // Get hash code for a node that comes from another _Hashtable. // Reuse a cached hash code if the hash function is stateless, // otherwise recalculate it using our own hash function. size_t _M_hash_code_ext(const __node_value_type& __from) const { if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) return __from._M_hash_code; else this->_M_hash_code(_ExtractKey{}(__from._M_v())); } // Like _M_bucket_index but when the node is coming from another // container instance. size_type _M_bucket_index_ext(const __node_value_type& __n) const { return _M_bucket_index(_M_hash_code_ext(__n)); } void _M_copy_code(__node_value_type& __to, const __node_value_type& __from) const { if constexpr (__hash_cached::value) __to._M_hash_code = _M_hash_code_ext(__n); } >+#pragma GCC diagnostic pop >+ > // Find and insert helper functions and types > > // Find the node before the one matching the criteria. >@@ -1587,7 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > __node_ptr __ht_n = __ht._M_begin(); > __node_ptr __this_n > = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); >- this->_M_copy_code(*__this_n, *__ht_n); >+ _M_copy_code(*__this_n, *__ht_n); > _M_update_bbegin(__this_n); > > // Then deal with other nodes. >@@ -1596,7 +1626,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > { > __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); > __prev_n->_M_nxt = __this_n; >- this->_M_copy_code(*__this_n, *__ht_n); >+ _M_copy_code(*__this_n, *__ht_n); > size_type __bkt = _M_bucket_index(*__this_n); > if (!_M_buckets[__bkt]) > _M_buckets[__bkt] = __prev_n; >@@ -2851,7 +2881,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > if constexpr (__unique_keys::value) > for (auto __x_n = _M_begin(); __x_n; __x_n = __x_n->_M_next()) > { >- std::size_t __ybkt = __other._M_bucket_index(*__x_n); >+ std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); > auto __prev_n = __other._M_buckets[__ybkt]; > if (!__prev_n) > return false; >@@ -2878,7 +2908,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > __x_n_end = __x_n_end->_M_next()) > ++__x_count; > >- std::size_t __ybkt = __other._M_bucket_index(*__x_n); >+ std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); > auto __y_prev_n = __other._M_buckets[__ybkt]; > if (!__y_prev_n) > return false; >diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h >index 1fa8c01d5e8..61c57651c4a 100644 >--- a/libstdc++-v3/include/bits/hashtable_policy.h >+++ b/libstdc++-v3/include/bits/hashtable_policy.h >@@ -1108,19 +1108,9 @@ namespace __detail > _M_store_code(_Hash_node_code_cache<false>&, __hash_code) const > { } > >- void >- _M_copy_code(_Hash_node_code_cache<false>&, >- const _Hash_node_code_cache<false>&) const >- { } >- > void > _M_store_code(_Hash_node_code_cache<true>& __n, __hash_code __c) const > { __n._M_hash_code = __c; } >- >- void >- _M_copy_code(_Hash_node_code_cache<true>& __to, >- const _Hash_node_code_cache<true>& __from) const >- { __to._M_hash_code = __from._M_hash_code; } > }; > > /// Partial specialization used when nodes contain a cached hash code. >diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >index 10b61464243..010181f7038 100644 >--- a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >+++ b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >@@ -417,6 +417,51 @@ test09() > VERIFY( c2.size() == 3 ); > } > >+struct slow_stateful_hash >+{ >+ size_t seed = 0; >+ >+ auto operator()(const int& i) const noexcept >+ { return std::hash<int>()(i) + seed; } >+}; >+ >+namespace std >+{ >+ template<> >+ struct __is_fast_hash<slow_stateful_hash> : public std::false_type >+ { }; >+} >+ >+void >+test10() >+{ >+ using map_type = std::unordered_map<int, int, slow_stateful_hash>; >+ map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, slow_stateful_hash{1}); >+ map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, slow_stateful_hash{2}); >+ const auto c3 = c2; >+ >+ c1.merge(c2); >+ VERIFY( c1.size() == 6 ); >+ VERIFY( c2.empty() ); >+ >+ c2 = c3; >+ c1.clear(); >+ c1.merge(std::move(c2)); >+ VERIFY( c1 == c3 ); >+ VERIFY( c2.empty() ); >+ >+ c2.merge(std::move(c1)); >+ VERIFY( c1.empty() ); >+ VERIFY( c2 == c3 ); >+ >+ c2.merge(c1); >+ VERIFY( c1.empty() ); >+ VERIFY( c2 == c3 ); >+ >+ c2.merge(c2); >+ VERIFY( c2 == c3 ); >+} >+ > int > main() > { >@@ -429,4 +474,5 @@ main() > test07(); > test08(); > test09(); >+ test10(); > }
All good remarks as usual, here is a new version. I took the time to leverage on the new method to replace a usage of _M_src_hash_code. libstdc++: [_Hashtable] Fix hash code cache usage when stateful hash functor It is wrong to reuse a cached hash code from another container when this code depends on the state of the container's Hash functor. Add checks that Hash functor is stateless before reusing the cached hash code. libstdc++-v3/ChangeLog: * include/bits/hashtable_policy.h (_Hash_code_base::_M_copy_code, _Hash_code_base::_M_stored_code): Remove. * include/bits/hashtable.h (_M_hash_code_ext): New. (_M_copy_code): New. (_M_assign): Use latter. (_M_bucket_index_ex): New. (_M_equals): Use latter. (_M_store_code): New. * testsuite/23_containers/unordered_map/modifiers/merge.cc (test10): New test case. Tested under Linux x64. Ok to commit ? François On 19/02/2025 14:33, Jonathan Wakely wrote: > On 20/01/25 22:12 +0100, François Dumont wrote: >> Hi >> >> In my work on fancy pointer support I've decided to always cache the >> hash code. >> >> Doing so I spotted a bug in the management of this cache when hash >> functor is stateful. >> >> libstdc++: [_Hashtable] Fix hash code cache usage when hash >> functor is stateful >> >> It is wrong to reuse a cached hash code when this code depends >> then on the state >> of the Hash functor. >> >> Add checks that Hash functor is stateless before reusing the >> cached hash code. >> >> libstdc++-v3/ChangeLog: >> >> * include/bits/hashtable_policy.h >> (_Hash_code_base::_M_copy_code): Remove. >> * include/bits/hashtable.h (_M_copy_code): New. > > I like replacing the _M_copy_code overloads with one function, thanks. > We could do the same for _M_store_code too. > >> (_M_assign): Use latter. >> (_M_bucket_index_ex): New. > > I assume "ex" means "external"? That doesn't seem very clear to me > though. Maybe "ext" would be better. > >> (_M_equals): Use latter. >> * >> testsuite/23_containers/unordered_map/modifiers/merge.cc (test10): New >> test case. >> >> Tested under Linux x64, ok to commit ? >> >> François > >> diff --git a/libstdc++-v3/include/bits/hashtable.h >> b/libstdc++-v3/include/bits/hashtable.h >> index d6d76a743bb..b3c1d7aac24 100644 >> --- a/libstdc++-v3/include/bits/hashtable.h >> +++ b/libstdc++-v3/include/bits/hashtable.h >> @@ -808,6 +808,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> _M_bucket_index(__hash_code __c) const >> { return __hash_code_base::_M_bucket_index(__c, >> _M_bucket_count); } >> >> +#pragma GCC diagnostic push >> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr >> + // Like _M_bucket_index but when the node is coming from another >> + // container instance. >> + size_type >> + _M_bucket_index_ex(const __node_value_type& __n) const >> + { >> + if constexpr (__hash_cached::value) >> + if constexpr (std::is_empty<_Hash>::value) >> + return _RangeHash{}(__n._M_hash_code, _M_bucket_count); >> + >> + return _RangeHash{} >> + (this->_M_hash_code(_ExtractKey{}(__n._M_v())), _M_bucket_count); >> + } >> + >> + void >> + _M_copy_code(__node_value_type& __to, >> + const __node_value_type& __from) const >> + { >> + if constexpr (__hash_cached::value) >> + { >> + if constexpr (std::is_empty<_Hash>::value) >> + __to._M_hash_code = __from._M_hash_code; >> + else >> + __to._M_hash_code = >> + this->_M_hash_code(_ExtractKey{}(__from._M_v())); >> + } >> + } > > > These two functions are doing similar work, would it make sense to > factor out the common part to a new function: > > // Get hash code for a node that comes from another _Hashtable. > // Reuse a cached hash code if the hash function is stateless, > // otherwise recalculate it using our own hash function. > size_t > _M_hash_code_ext(const __node_value_type& __from) const > { > if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > return __from._M_hash_code; > else > this->_M_hash_code(_ExtractKey{}(__from._M_v())); > } > > // Like _M_bucket_index but when the node is coming from another > // container instance. > size_type > _M_bucket_index_ext(const __node_value_type& __n) const > { return _M_bucket_index(_M_hash_code_ext(__n)); } > > void > _M_copy_code(__node_value_type& __to, > const __node_value_type& __from) const > { > if constexpr (__hash_cached::value) > __to._M_hash_code = _M_hash_code_ext(__n); > } > > >> +#pragma GCC diagnostic pop >> + >> // Find and insert helper functions and types >> >> // Find the node before the one matching the criteria. >> @@ -1587,7 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> __node_ptr __ht_n = __ht._M_begin(); >> __node_ptr __this_n >> = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); >> - this->_M_copy_code(*__this_n, *__ht_n); >> + _M_copy_code(*__this_n, *__ht_n); >> _M_update_bbegin(__this_n); >> >> // Then deal with other nodes. >> @@ -1596,7 +1626,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> { >> __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); >> __prev_n->_M_nxt = __this_n; >> - this->_M_copy_code(*__this_n, *__ht_n); >> + _M_copy_code(*__this_n, *__ht_n); >> size_type __bkt = _M_bucket_index(*__this_n); >> if (!_M_buckets[__bkt]) >> _M_buckets[__bkt] = __prev_n; >> @@ -2851,7 +2881,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> if constexpr (__unique_keys::value) >> for (auto __x_n = _M_begin(); __x_n; __x_n = __x_n->_M_next()) >> { >> - std::size_t __ybkt = __other._M_bucket_index(*__x_n); >> + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); >> auto __prev_n = __other._M_buckets[__ybkt]; >> if (!__prev_n) >> return false; >> @@ -2878,7 +2908,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION >> __x_n_end = __x_n_end->_M_next()) >> ++__x_count; >> >> - std::size_t __ybkt = __other._M_bucket_index(*__x_n); >> + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); >> auto __y_prev_n = __other._M_buckets[__ybkt]; >> if (!__y_prev_n) >> return false; >> diff --git a/libstdc++-v3/include/bits/hashtable_policy.h >> b/libstdc++-v3/include/bits/hashtable_policy.h >> index 1fa8c01d5e8..61c57651c4a 100644 >> --- a/libstdc++-v3/include/bits/hashtable_policy.h >> +++ b/libstdc++-v3/include/bits/hashtable_policy.h >> @@ -1108,19 +1108,9 @@ namespace __detail >> _M_store_code(_Hash_node_code_cache<false>&, __hash_code) const >> { } >> >> - void >> - _M_copy_code(_Hash_node_code_cache<false>&, >> - const _Hash_node_code_cache<false>&) const >> - { } >> - >> void >> _M_store_code(_Hash_node_code_cache<true>& __n, __hash_code >> __c) const >> { __n._M_hash_code = __c; } >> - >> - void >> - _M_copy_code(_Hash_node_code_cache<true>& __to, >> - const _Hash_node_code_cache<true>& __from) const >> - { __to._M_hash_code = __from._M_hash_code; } >> }; >> >> /// Partial specialization used when nodes contain a cached hash code. >> diff --git >> a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >> b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >> index 10b61464243..010181f7038 100644 >> --- >> a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >> +++ >> b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc >> @@ -417,6 +417,51 @@ test09() >> VERIFY( c2.size() == 3 ); >> } >> >> +struct slow_stateful_hash >> +{ >> + size_t seed = 0; >> + >> + auto operator()(const int& i) const noexcept >> + { return std::hash<int>()(i) + seed; } >> +}; >> + >> +namespace std >> +{ >> + template<> >> + struct __is_fast_hash<slow_stateful_hash> : public std::false_type >> + { }; >> +} >> + >> +void >> +test10() >> +{ >> + using map_type = std::unordered_map<int, int, slow_stateful_hash>; >> + map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, slow_stateful_hash{1}); >> + map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, slow_stateful_hash{2}); >> + const auto c3 = c2; >> + >> + c1.merge(c2); >> + VERIFY( c1.size() == 6 ); >> + VERIFY( c2.empty() ); >> + >> + c2 = c3; >> + c1.clear(); >> + c1.merge(std::move(c2)); >> + VERIFY( c1 == c3 ); >> + VERIFY( c2.empty() ); >> + >> + c2.merge(std::move(c1)); >> + VERIFY( c1.empty() ); >> + VERIFY( c2 == c3 ); >> + >> + c2.merge(c1); >> + VERIFY( c1.empty() ); >> + VERIFY( c2 == c3 ); >> + >> + c2.merge(c2); >> + VERIFY( c2 == c3 ); >> +} >> + >> int >> main() >> { >> @@ -429,4 +474,5 @@ main() >> test07(); >> test08(); >> test09(); >> + test10(); >> } > diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index b17a314a11d..246e62afc6a 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -808,6 +808,42 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_bucket_index(__hash_code __c) const { return __hash_code_base::_M_bucket_index(__c, _M_bucket_count); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr + // Get hash code for a node that comes from another _Hashtable. + // Reuse a cached hash code if the hash function is stateless, + // otherwise recalculate it using our own hash function. + __hash_code + _M_hash_code_ext(const __node_value_type& __from) const + { + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) + return __from._M_hash_code; + else + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); + } + + // Like _M_bucket_index but when the node is coming from another + // container instance. + size_type + _M_bucket_index_ext(const __node_value_type& __from) const + { return _RangeHash{}(_M_hash_code_ext(__from), _M_bucket_count); } + + void + _M_copy_code(__node_value_type& __to, + const __node_value_type& __from) const + { + if constexpr (__hash_cached::value) + __to._M_hash_code = _M_hash_code_ext(__from); + } + + void + _M_store_code(__node_value_type& __to, __hash_code __code) const + { + if constexpr (__hash_cached::value) + __to._M_hash_code = __code; + } +#pragma GCC diagnostic pop + // Find and insert helper functions and types // Find the node before the one matching the criteria. @@ -1210,16 +1246,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // with a hash function that might not match this->hash_function(). template<typename _H2> __hash_code - _M_src_hash_code(const _H2&, const key_type& __k, - const __node_value_type& __src_n) const + _M_src_hash_code(const _H2&, const __node_value_type& __src_n) const { - if constexpr (__hash_cached::value) - if constexpr (std::is_same_v<_H2, _Hash>) - if constexpr (std::is_empty_v<_Hash>) - // If the node has a cached hash code, it's OK to use it. - return this->_M_hash_code(__src_n); - - return this->_M_hash_code(__k); + if constexpr (__and_<__hash_cached, + is_same<_H2, _Hash>, is_empty<_Hash>>::value) + // If the node has a cached hash code, it's OK to use it. + return __src_n._M_hash_code; + else + return this->_M_hash_code(_ExtractKey{}(__src_n._M_v())); } public: @@ -1328,9 +1362,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION do { const auto& __node = static_cast<__node_type&>(*__prev->_M_nxt); - const key_type& __k = _ExtractKey{}(__node._M_v()); - // Hash code from this->hash_function(): - auto __code = _M_src_hash_code(__src.hash_function(), __k, __node); + // Hash code from this: + auto __code = _M_hash_code_ext(__node); // Bucket index in __src, using code from __src.hash_function(): size_type __src_bkt = __src._M_bucket_index(__node); auto __nh = __src._M_extract_node(__src_bkt, __prev); @@ -1356,9 +1389,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;) { auto __pos = __i++; - const key_type& __k = _ExtractKey{}(*__pos); __hash_code __code - = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur); + = _M_src_hash_code(__src.hash_function(), *__pos._M_cur); auto __nh = __src.extract(__pos); __hint = _M_insert_multi_node(__hint, __code, __nh._M_ptr)._M_cur; __nh.release(); @@ -1588,7 +1620,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __node_ptr __ht_n = __ht._M_begin(); __node_ptr __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); - this->_M_copy_code(*__this_n, *__ht_n); + _M_copy_code(*__this_n, *__ht_n); _M_update_bbegin(__this_n); // Then deal with other nodes. @@ -1597,7 +1629,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); __prev_n->_M_nxt = __this_n; - this->_M_copy_code(*__this_n, *__ht_n); + _M_copy_code(*__this_n, *__ht_n); size_type __bkt = _M_bucket_index(*__this_n); if (!_M_buckets[__bkt]) _M_buckets[__bkt] = __prev_n; @@ -2438,7 +2470,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } __rehash_guard._M_guarded_obj = nullptr; - this->_M_store_code(*__node, __code); + _M_store_code(*__node, __code); // Always insert at the beginning of the bucket. _M_insert_bucket_begin(__bkt, __node); @@ -2465,7 +2497,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_rehash(__do_rehash.second, false_type{}); __rehash_guard._M_guarded_obj = nullptr; - this->_M_store_code(*__node, __code); + _M_store_code(*__node, __code); const key_type& __k = _ExtractKey{}(__node->_M_v()); size_type __bkt = _M_bucket_index(__code); @@ -2852,7 +2884,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if constexpr (__unique_keys::value) for (auto __x_n = _M_begin(); __x_n; __x_n = __x_n->_M_next()) { - std::size_t __ybkt = __other._M_bucket_index(*__x_n); + std::size_t __ybkt = __other._M_bucket_index_ext(*__x_n); auto __prev_n = __other._M_buckets[__ybkt]; if (!__prev_n) return false; @@ -2879,7 +2911,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __x_n_end = __x_n_end->_M_next()) ++__x_count; - std::size_t __ybkt = __other._M_bucket_index(*__x_n); + std::size_t __ybkt = __other._M_bucket_index_ext(*__x_n); auto __y_prev_n = __other._M_buckets[__ybkt]; if (!__y_prev_n) return false; diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 1fa8c01d5e8..a9567cc3c74 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1103,24 +1103,6 @@ namespace __detail _M_bucket_index(const _Hash_node_value<_Value, true>& __n, size_t __bkt_count) const noexcept { return _RangeHash{}(__n._M_hash_code, __bkt_count); } - - void - _M_store_code(_Hash_node_code_cache<false>&, __hash_code) const - { } - - void - _M_copy_code(_Hash_node_code_cache<false>&, - const _Hash_node_code_cache<false>&) const - { } - - void - _M_store_code(_Hash_node_code_cache<true>& __n, __hash_code __c) const - { __n._M_hash_code = __c; } - - void - _M_copy_code(_Hash_node_code_cache<true>& __to, - const _Hash_node_code_cache<true>& __from) const - { __to._M_hash_code = __from._M_hash_code; } }; /// Partial specialization used when nodes contain a cached hash code. diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc index 10b61464243..010181f7038 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc @@ -417,6 +417,51 @@ test09() VERIFY( c2.size() == 3 ); } +struct slow_stateful_hash +{ + size_t seed = 0; + + auto operator()(const int& i) const noexcept + { return std::hash<int>()(i) + seed; } +}; + +namespace std +{ + template<> + struct __is_fast_hash<slow_stateful_hash> : public std::false_type + { }; +} + +void +test10() +{ + using map_type = std::unordered_map<int, int, slow_stateful_hash>; + map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, slow_stateful_hash{1}); + map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, slow_stateful_hash{2}); + const auto c3 = c2; + + c1.merge(c2); + VERIFY( c1.size() == 6 ); + VERIFY( c2.empty() ); + + c2 = c3; + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c3 ); + VERIFY( c2.empty() ); + + c2.merge(std::move(c1)); + VERIFY( c1.empty() ); + VERIFY( c2 == c3 ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2 == c3 ); + + c2.merge(c2); + VERIFY( c2 == c3 ); +} + int main() { @@ -429,4 +474,5 @@ main() test07(); test08(); test09(); + test10(); }
On Mon, 24 Feb 2025 at 20:52, François Dumont <frs.dumont@gmail.com> wrote: > > All good remarks as usual, here is a new version. > > I took the time to leverage on the new method to replace a usage of > _M_src_hash_code. > > libstdc++: [_Hashtable] Fix hash code cache usage when stateful > hash functor > > It is wrong to reuse a cached hash code from another container when > this code depends > on the state of the container's Hash functor. > > Add checks that Hash functor is stateless before reusing the cached > hash code. > > libstdc++-v3/ChangeLog: > > * include/bits/hashtable_policy.h > (_Hash_code_base::_M_copy_code, > _Hash_code_base::_M_stored_code): Remove. "_M_store_code" not "_M_stored_code". > * include/bits/hashtable.h (_M_hash_code_ext): New. > (_M_copy_code): New. > (_M_assign): Use latter. > (_M_bucket_index_ex): New. > (_M_equals): Use latter. > (_M_store_code): New. The changes to _M_src_hash_code and _M_merge_multi should be in the ChangeLog too. OK for trunk with those changes, thanks. > * testsuite/23_containers/unordered_map/modifiers/merge.cc > (test10): New > test case. > > Tested under Linux x64. > > Ok to commit ? > > François > > > On 19/02/2025 14:33, Jonathan Wakely wrote: > > On 20/01/25 22:12 +0100, François Dumont wrote: > >> Hi > >> > >> In my work on fancy pointer support I've decided to always cache the > >> hash code. > >> > >> Doing so I spotted a bug in the management of this cache when hash > >> functor is stateful. > >> > >> libstdc++: [_Hashtable] Fix hash code cache usage when hash > >> functor is stateful > >> > >> It is wrong to reuse a cached hash code when this code depends > >> then on the state > >> of the Hash functor. > >> > >> Add checks that Hash functor is stateless before reusing the > >> cached hash code. > >> > >> libstdc++-v3/ChangeLog: > >> > >> * include/bits/hashtable_policy.h > >> (_Hash_code_base::_M_copy_code): Remove. > >> * include/bits/hashtable.h (_M_copy_code): New. > > > > I like replacing the _M_copy_code overloads with one function, thanks. > > We could do the same for _M_store_code too. > > > >> (_M_assign): Use latter. > >> (_M_bucket_index_ex): New. > > > > I assume "ex" means "external"? That doesn't seem very clear to me > > though. Maybe "ext" would be better. > > > >> (_M_equals): Use latter. > >> * > >> testsuite/23_containers/unordered_map/modifiers/merge.cc (test10): New > >> test case. > >> > >> Tested under Linux x64, ok to commit ? > >> > >> François > > > >> diff --git a/libstdc++-v3/include/bits/hashtable.h > >> b/libstdc++-v3/include/bits/hashtable.h > >> index d6d76a743bb..b3c1d7aac24 100644 > >> --- a/libstdc++-v3/include/bits/hashtable.h > >> +++ b/libstdc++-v3/include/bits/hashtable.h > >> @@ -808,6 +808,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > >> _M_bucket_index(__hash_code __c) const > >> { return __hash_code_base::_M_bucket_index(__c, > >> _M_bucket_count); } > >> > >> +#pragma GCC diagnostic push > >> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr > >> + // Like _M_bucket_index but when the node is coming from another > >> + // container instance. > >> + size_type > >> + _M_bucket_index_ex(const __node_value_type& __n) const > >> + { > >> + if constexpr (__hash_cached::value) > >> + if constexpr (std::is_empty<_Hash>::value) > >> + return _RangeHash{}(__n._M_hash_code, _M_bucket_count); > >> + > >> + return _RangeHash{} > >> + (this->_M_hash_code(_ExtractKey{}(__n._M_v())), _M_bucket_count); > >> + } > >> + > >> + void > >> + _M_copy_code(__node_value_type& __to, > >> + const __node_value_type& __from) const > >> + { > >> + if constexpr (__hash_cached::value) > >> + { > >> + if constexpr (std::is_empty<_Hash>::value) > >> + __to._M_hash_code = __from._M_hash_code; > >> + else > >> + __to._M_hash_code = > >> + this->_M_hash_code(_ExtractKey{}(__from._M_v())); > >> + } > >> + } > > > > > > These two functions are doing similar work, would it make sense to > > factor out the common part to a new function: > > > > // Get hash code for a node that comes from another _Hashtable. > > // Reuse a cached hash code if the hash function is stateless, > > // otherwise recalculate it using our own hash function. > > size_t > > _M_hash_code_ext(const __node_value_type& __from) const > > { > > if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > > return __from._M_hash_code; > > else > > this->_M_hash_code(_ExtractKey{}(__from._M_v())); > > } > > > > // Like _M_bucket_index but when the node is coming from another > > // container instance. > > size_type > > _M_bucket_index_ext(const __node_value_type& __n) const > > { return _M_bucket_index(_M_hash_code_ext(__n)); } > > > > void > > _M_copy_code(__node_value_type& __to, > > const __node_value_type& __from) const > > { > > if constexpr (__hash_cached::value) > > __to._M_hash_code = _M_hash_code_ext(__n); > > } > > > > > >> +#pragma GCC diagnostic pop > >> + > >> // Find and insert helper functions and types > >> > >> // Find the node before the one matching the criteria. > >> @@ -1587,7 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > >> __node_ptr __ht_n = __ht._M_begin(); > >> __node_ptr __this_n > >> = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); > >> - this->_M_copy_code(*__this_n, *__ht_n); > >> + _M_copy_code(*__this_n, *__ht_n); > >> _M_update_bbegin(__this_n); > >> > >> // Then deal with other nodes. > >> @@ -1596,7 +1626,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > >> { > >> __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); > >> __prev_n->_M_nxt = __this_n; > >> - this->_M_copy_code(*__this_n, *__ht_n); > >> + _M_copy_code(*__this_n, *__ht_n); > >> size_type __bkt = _M_bucket_index(*__this_n); > >> if (!_M_buckets[__bkt]) > >> _M_buckets[__bkt] = __prev_n; > >> @@ -2851,7 +2881,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > >> if constexpr (__unique_keys::value) > >> for (auto __x_n = _M_begin(); __x_n; __x_n = __x_n->_M_next()) > >> { > >> - std::size_t __ybkt = __other._M_bucket_index(*__x_n); > >> + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); > >> auto __prev_n = __other._M_buckets[__ybkt]; > >> if (!__prev_n) > >> return false; > >> @@ -2878,7 +2908,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > >> __x_n_end = __x_n_end->_M_next()) > >> ++__x_count; > >> > >> - std::size_t __ybkt = __other._M_bucket_index(*__x_n); > >> + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); > >> auto __y_prev_n = __other._M_buckets[__ybkt]; > >> if (!__y_prev_n) > >> return false; > >> diff --git a/libstdc++-v3/include/bits/hashtable_policy.h > >> b/libstdc++-v3/include/bits/hashtable_policy.h > >> index 1fa8c01d5e8..61c57651c4a 100644 > >> --- a/libstdc++-v3/include/bits/hashtable_policy.h > >> +++ b/libstdc++-v3/include/bits/hashtable_policy.h > >> @@ -1108,19 +1108,9 @@ namespace __detail > >> _M_store_code(_Hash_node_code_cache<false>&, __hash_code) const > >> { } > >> > >> - void > >> - _M_copy_code(_Hash_node_code_cache<false>&, > >> - const _Hash_node_code_cache<false>&) const > >> - { } > >> - > >> void > >> _M_store_code(_Hash_node_code_cache<true>& __n, __hash_code > >> __c) const > >> { __n._M_hash_code = __c; } > >> - > >> - void > >> - _M_copy_code(_Hash_node_code_cache<true>& __to, > >> - const _Hash_node_code_cache<true>& __from) const > >> - { __to._M_hash_code = __from._M_hash_code; } > >> }; > >> > >> /// Partial specialization used when nodes contain a cached hash code. > >> diff --git > >> a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc > >> b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc > >> index 10b61464243..010181f7038 100644 > >> --- > >> a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc > >> +++ > >> b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc > >> @@ -417,6 +417,51 @@ test09() > >> VERIFY( c2.size() == 3 ); > >> } > >> > >> +struct slow_stateful_hash > >> +{ > >> + size_t seed = 0; > >> + > >> + auto operator()(const int& i) const noexcept > >> + { return std::hash<int>()(i) + seed; } > >> +}; > >> + > >> +namespace std > >> +{ > >> + template<> > >> + struct __is_fast_hash<slow_stateful_hash> : public std::false_type > >> + { }; > >> +} > >> + > >> +void > >> +test10() > >> +{ > >> + using map_type = std::unordered_map<int, int, slow_stateful_hash>; > >> + map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, slow_stateful_hash{1}); > >> + map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, slow_stateful_hash{2}); > >> + const auto c3 = c2; > >> + > >> + c1.merge(c2); > >> + VERIFY( c1.size() == 6 ); > >> + VERIFY( c2.empty() ); > >> + > >> + c2 = c3; > >> + c1.clear(); > >> + c1.merge(std::move(c2)); > >> + VERIFY( c1 == c3 ); > >> + VERIFY( c2.empty() ); > >> + > >> + c2.merge(std::move(c1)); > >> + VERIFY( c1.empty() ); > >> + VERIFY( c2 == c3 ); > >> + > >> + c2.merge(c1); > >> + VERIFY( c1.empty() ); > >> + VERIFY( c2 == c3 ); > >> + > >> + c2.merge(c2); > >> + VERIFY( c2 == c3 ); > >> +} > >> + > >> int > >> main() > >> { > >> @@ -429,4 +474,5 @@ main() > >> test07(); > >> test08(); > >> test09(); > >> + test10(); > >> } > >
* François Dumont: > + // Get hash code for a node that comes from another _Hashtable. > + // Reuse a cached hash code if the hash function is stateless, > + // otherwise recalculate it using our own hash function. > + __hash_code > + _M_hash_code_ext(const __node_value_type& __from) const > + { > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > + return __from._M_hash_code; > + else > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); > + } Does C++ support stateful hash functions? I don't think so, and I don't see it documented as a GNU extension, either. Thanks, Florian
On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: > > * François Dumont: > > > + // Get hash code for a node that comes from another _Hashtable. > > + // Reuse a cached hash code if the hash function is stateless, > > + // otherwise recalculate it using our own hash function. > > + __hash_code > > + _M_hash_code_ext(const __node_value_type& __from) const > > + { > > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > > + return __from._M_hash_code; > > + else > > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); > > + } > > Does C++ support stateful hash functions? I don't think so, and I don't > see it documented as a GNU extension, either. It does, yes. That's why the hash function isn't required to be default constructible, and has to be stored in the container and why doing swap on two containers has to swap the hash functions as well.
* Jonathan Wakely: > On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: >> >> * François Dumont: >> >> > + // Get hash code for a node that comes from another _Hashtable. >> > + // Reuse a cached hash code if the hash function is stateless, >> > + // otherwise recalculate it using our own hash function. >> > + __hash_code >> > + _M_hash_code_ext(const __node_value_type& __from) const >> > + { >> > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) >> > + return __from._M_hash_code; >> > + else >> > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); >> > + } >> >> Does C++ support stateful hash functions? I don't think so, and I don't >> see it documented as a GNU extension, either. > > It does, yes. That's why the hash function isn't required to be > default constructible, and has to be stored in the container and why > doing swap on two containers has to swap the hash functions as well. Interesting. I have trouble reconciling this with the Cpp17Hash requirement that “The value” h(k) “shall depend only on the argument k for the duration of the program.” Thanks, Florian
On Thu, 13 Mar 2025 at 09:24, Florian Weimer <fweimer@redhat.com> wrote: > > * Jonathan Wakely: > > > On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: > >> > >> * François Dumont: > >> > >> > + // Get hash code for a node that comes from another _Hashtable. > >> > + // Reuse a cached hash code if the hash function is stateless, > >> > + // otherwise recalculate it using our own hash function. > >> > + __hash_code > >> > + _M_hash_code_ext(const __node_value_type& __from) const > >> > + { > >> > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > >> > + return __from._M_hash_code; > >> > + else > >> > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); > >> > + } > >> > >> Does C++ support stateful hash functions? I don't think so, and I don't > >> see it documented as a GNU extension, either. > > > > It does, yes. That's why the hash function isn't required to be > > default constructible, and has to be stored in the container and why > > doing swap on two containers has to swap the hash functions as well. > > Interesting. I have trouble reconciling this with the Cpp17Hash > requirement that “The value” h(k) “shall depend only on the argument k > for the duration of the program.” That's for a given value of the type, h. For any two values of the type h and h2, it's not required that h2(k) == h(k).
On Thu, 13 Mar 2025 at 09:54, Jonathan Wakely <jwakely.gcc@gmail.com> wrote: > > On Thu, 13 Mar 2025 at 09:24, Florian Weimer <fweimer@redhat.com> wrote: > > > > * Jonathan Wakely: > > > > > On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: > > >> > > >> * François Dumont: > > >> > > >> > + // Get hash code for a node that comes from another _Hashtable. > > >> > + // Reuse a cached hash code if the hash function is stateless, > > >> > + // otherwise recalculate it using our own hash function. > > >> > + __hash_code > > >> > + _M_hash_code_ext(const __node_value_type& __from) const > > >> > + { > > >> > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > > >> > + return __from._M_hash_code; > > >> > + else > > >> > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); > > >> > + } > > >> > > >> Does C++ support stateful hash functions? I don't think so, and I don't > > >> see it documented as a GNU extension, either. > > > > > > It does, yes. That's why the hash function isn't required to be > > > default constructible, and has to be stored in the container and why > > > doing swap on two containers has to swap the hash functions as well. > > > > Interesting. I have trouble reconciling this with the Cpp17Hash > > requirement that “The value” h(k) “shall depend only on the argument k > > for the duration of the program.” > > > That's for a given value of the type, h. For any two values of the > type h and h2, it's not required that h2(k) == h(k). In the original SGI STL, hash functions were required to be stateless: https://www.boost.org/sgi/stl/HashFunction.html I don't see any such requirement in C++11 and later, which added std::hash.
On Thu, 13 Mar 2025 at 09:58, Jonathan Wakely <jwakely.gcc@gmail.com> wrote: > > On Thu, 13 Mar 2025 at 09:54, Jonathan Wakely <jwakely.gcc@gmail.com> wrote: > > > > On Thu, 13 Mar 2025 at 09:24, Florian Weimer <fweimer@redhat.com> wrote: > > > > > > * Jonathan Wakely: > > > > > > > On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: > > > >> > > > >> * François Dumont: > > > >> > > > >> > + // Get hash code for a node that comes from another _Hashtable. > > > >> > + // Reuse a cached hash code if the hash function is stateless, > > > >> > + // otherwise recalculate it using our own hash function. > > > >> > + __hash_code > > > >> > + _M_hash_code_ext(const __node_value_type& __from) const > > > >> > + { > > > >> > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > > > >> > + return __from._M_hash_code; > > > >> > + else > > > >> > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); > > > >> > + } > > > >> > > > >> Does C++ support stateful hash functions? I don't think so, and I don't > > > >> see it documented as a GNU extension, either. > > > > > > > > It does, yes. That's why the hash function isn't required to be > > > > default constructible, and has to be stored in the container and why > > > > doing swap on two containers has to swap the hash functions as well. > > > > > > Interesting. I have trouble reconciling this with the Cpp17Hash > > > requirement that “The value” h(k) “shall depend only on the argument k > > > for the duration of the program.” > > > > > > That's for a given value of the type, h. For any two values of the > > type h and h2, it's not required that h2(k) == h(k). > > In the original SGI STL, hash functions were required to be stateless: > https://www.boost.org/sgi/stl/HashFunction.html > > I don't see any such requirement in C++11 and later, which added std::hash. All three of libstdc++, libc++ and MSVC STL support a stateful hash function and use the value passed to the constructor: https://godbolt.org/z/cqj4fG6jP
* Jonathan Wakely: > On Thu, 13 Mar 2025 at 09:24, Florian Weimer <fweimer@redhat.com> wrote: >> >> * Jonathan Wakely: >> >> > On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: >> >> >> >> * François Dumont: >> >> >> >> > + // Get hash code for a node that comes from another _Hashtable. >> >> > + // Reuse a cached hash code if the hash function is stateless, >> >> > + // otherwise recalculate it using our own hash function. >> >> > + __hash_code >> >> > + _M_hash_code_ext(const __node_value_type& __from) const >> >> > + { >> >> > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) >> >> > + return __from._M_hash_code; >> >> > + else >> >> > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); >> >> > + } >> >> >> >> Does C++ support stateful hash functions? I don't think so, and I don't >> >> see it documented as a GNU extension, either. >> > >> > It does, yes. That's why the hash function isn't required to be >> > default constructible, and has to be stored in the container and why >> > doing swap on two containers has to swap the hash functions as well. >> >> Interesting. I have trouble reconciling this with the Cpp17Hash >> requirement that “The value” h(k) “shall depend only on the argument k >> for the duration of the program.” > > > That's for a given value of the type, h. For any two values of the > type h and h2, it's not required that h2(k) == h(k). But the wording for the comp object for ordered containers is very different and makes it clear that the instance matters. I still think this is more confusing than necessary. If there isn't some general rule that empty objects can be considered stateless, that should be added somewhere, too. 8-) Thanks, Florian
On Thu, 13 Mar 2025 at 11:51, Florian Weimer <fweimer@redhat.com> wrote: > > * Jonathan Wakely: > > > On Thu, 13 Mar 2025 at 09:24, Florian Weimer <fweimer@redhat.com> wrote: > >> > >> * Jonathan Wakely: > >> > >> > On Thu, 13 Mar 2025 at 06:50, Florian Weimer <fweimer@redhat.com> wrote: > >> >> > >> >> * François Dumont: > >> >> > >> >> > + // Get hash code for a node that comes from another _Hashtable. > >> >> > + // Reuse a cached hash code if the hash function is stateless, > >> >> > + // otherwise recalculate it using our own hash function. > >> >> > + __hash_code > >> >> > + _M_hash_code_ext(const __node_value_type& __from) const > >> >> > + { > >> >> > + if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value) > >> >> > + return __from._M_hash_code; > >> >> > + else > >> >> > + return this->_M_hash_code(_ExtractKey{}(__from._M_v())); > >> >> > + } > >> >> > >> >> Does C++ support stateful hash functions? I don't think so, and I don't > >> >> see it documented as a GNU extension, either. > >> > > >> > It does, yes. That's why the hash function isn't required to be > >> > default constructible, and has to be stored in the container and why > >> > doing swap on two containers has to swap the hash functions as well. > >> > >> Interesting. I have trouble reconciling this with the Cpp17Hash > >> requirement that “The value” h(k) “shall depend only on the argument k > >> for the duration of the program.” > > > > > > That's for a given value of the type, h. For any two values of the > > type h and h2, it's not required that h2(k) == h(k). > > But the wording for the comp object for ordered containers is very > different and makes it clear that the instance matters. > > I still think this is more confusing than necessary. If there isn't > some general rule that empty objects can be considered stateless, that > should be added somewhere, too. 8-) Could you submit an issue (or two)? https://cplusplus.github.io/LWG/lwg-active.html#submit_issue
* Jonathan Wakely: >> I still think this is more confusing than necessary. If there isn't >> some general rule that empty objects can be considered stateless, that >> should be added somewhere, too. 8-) > > Could you submit an issue (or two)? > https://cplusplus.github.io/LWG/lwg-active.html#submit_issue Do you still need this because there is already a PR? [hash.requirements] clarify that Cpp17Hash does not imply stateless <https://github.com/cplusplus/draft/pull/7733> Thanks, Florian
On Fri, 14 Mar 2025 at 08:55, Florian Weimer <fweimer@redhat.com> wrote: > > * Jonathan Wakely: > > >> I still think this is more confusing than necessary. If there isn't > >> some general rule that empty objects can be considered stateless, that > >> should be added somewhere, too. 8-) > > > > Could you submit an issue (or two)? > > https://cplusplus.github.io/LWG/lwg-active.html#submit_issue > > Do you still need this because there is already a PR? > > [hash.requirements] clarify that Cpp17Hash does not imply stateless > <https://github.com/cplusplus/draft/pull/7733> I think we do need an LWG issue instead of an editorial tweak, but I'll take care of it.
diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h index d6d76a743bb..b3c1d7aac24 100644 --- a/libstdc++-v3/include/bits/hashtable.h +++ b/libstdc++-v3/include/bits/hashtable.h @@ -808,6 +808,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_bucket_index(__hash_code __c) const { return __hash_code_base::_M_bucket_index(__c, _M_bucket_count); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr + // Like _M_bucket_index but when the node is coming from another + // container instance. + size_type + _M_bucket_index_ex(const __node_value_type& __n) const + { + if constexpr (__hash_cached::value) + if constexpr (std::is_empty<_Hash>::value) + return _RangeHash{}(__n._M_hash_code, _M_bucket_count); + + return _RangeHash{} + (this->_M_hash_code(_ExtractKey{}(__n._M_v())), _M_bucket_count); + } + + void + _M_copy_code(__node_value_type& __to, + const __node_value_type& __from) const + { + if constexpr (__hash_cached::value) + { + if constexpr (std::is_empty<_Hash>::value) + __to._M_hash_code = __from._M_hash_code; + else + __to._M_hash_code = + this->_M_hash_code(_ExtractKey{}(__from._M_v())); + } + } +#pragma GCC diagnostic pop + // Find and insert helper functions and types // Find the node before the one matching the criteria. @@ -1587,7 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __node_ptr __ht_n = __ht._M_begin(); __node_ptr __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); - this->_M_copy_code(*__this_n, *__ht_n); + _M_copy_code(*__this_n, *__ht_n); _M_update_bbegin(__this_n); // Then deal with other nodes. @@ -1596,7 +1626,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v())); __prev_n->_M_nxt = __this_n; - this->_M_copy_code(*__this_n, *__ht_n); + _M_copy_code(*__this_n, *__ht_n); size_type __bkt = _M_bucket_index(*__this_n); if (!_M_buckets[__bkt]) _M_buckets[__bkt] = __prev_n; @@ -2851,7 +2881,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if constexpr (__unique_keys::value) for (auto __x_n = _M_begin(); __x_n; __x_n = __x_n->_M_next()) { - std::size_t __ybkt = __other._M_bucket_index(*__x_n); + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); auto __prev_n = __other._M_buckets[__ybkt]; if (!__prev_n) return false; @@ -2878,7 +2908,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __x_n_end = __x_n_end->_M_next()) ++__x_count; - std::size_t __ybkt = __other._M_bucket_index(*__x_n); + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n); auto __y_prev_n = __other._M_buckets[__ybkt]; if (!__y_prev_n) return false; diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 1fa8c01d5e8..61c57651c4a 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -1108,19 +1108,9 @@ namespace __detail _M_store_code(_Hash_node_code_cache<false>&, __hash_code) const { } - void - _M_copy_code(_Hash_node_code_cache<false>&, - const _Hash_node_code_cache<false>&) const - { } - void _M_store_code(_Hash_node_code_cache<true>& __n, __hash_code __c) const { __n._M_hash_code = __c; } - - void - _M_copy_code(_Hash_node_code_cache<true>& __to, - const _Hash_node_code_cache<true>& __from) const - { __to._M_hash_code = __from._M_hash_code; } }; /// Partial specialization used when nodes contain a cached hash code. diff --git a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc index 10b61464243..010181f7038 100644 --- a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc +++ b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc @@ -417,6 +417,51 @@ test09() VERIFY( c2.size() == 3 ); } +struct slow_stateful_hash +{ + size_t seed = 0; + + auto operator()(const int& i) const noexcept + { return std::hash<int>()(i) + seed; } +}; + +namespace std +{ + template<> + struct __is_fast_hash<slow_stateful_hash> : public std::false_type + { }; +} + +void +test10() +{ + using map_type = std::unordered_map<int, int, slow_stateful_hash>; + map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, slow_stateful_hash{1}); + map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, slow_stateful_hash{2}); + const auto c3 = c2; + + c1.merge(c2); + VERIFY( c1.size() == 6 ); + VERIFY( c2.empty() ); + + c2 = c3; + c1.clear(); + c1.merge(std::move(c2)); + VERIFY( c1 == c3 ); + VERIFY( c2.empty() ); + + c2.merge(std::move(c1)); + VERIFY( c1.empty() ); + VERIFY( c2 == c3 ); + + c2.merge(c1); + VERIFY( c1.empty() ); + VERIFY( c2 == c3 ); + + c2.merge(c2); + VERIFY( c2 == c3 ); +} + int main() { @@ -429,4 +474,5 @@ main() test07(); test08(); test09(); + test10(); }