Message ID | Z1W9EfQnjf4kR7zx@kam.mff.cuni.cz |
---|---|
State | New |
Headers | show |
Series | libstdc++: Optimize std::vector<bool> | expand |
On Sun, 8 Dec 2024 at 15:36, Jan Hubicka <hubicka@ucw.cz> wrote: > > Hi, > std::vector has independent implementation for bool which has its won > size/capacity functions. I updated them to add __builtin_unreachable to > announce that size is never more than max_size. However while testing the code > I noticed that even construction of unused copy is not optimized out. Main > problem is that the vector copying loops copies the tail of vector using loop > that copies bit by bit. We eventually pattern match it to bit operations (that > surprises me) but we need to unroll it and run through loop optimization that > happens late. > > This patch also updates copy_aglined to use bit operations. However for = operation > we can do better since it is not necessary to preserve original bits (those are > undefined anyway). So I added copy_aglined_trail (better name would be welcome) > that does not care about trailing bits of the copied block. > > As a result the following functions are optimized to empty functions: > #include <vector> > bool > empty(std::vector<bool> src) > { > std::vector <bool> data=src; > return false; > } > bool > empty2(std::vector<bool> src) > { > std::vector <bool> data; > data.reserve(1); > return data.size (); > } > bool > empty3() > { > std::vector <bool> data; > data.push_back (true); > return data[0]; > } > > Finally I mirrored changes to push_back from normal vectors to bit vectors. > This involve separating append from insert and breaking out the resizing (and > cold) path to separate function. > > Here are two little benchmarks on push_back: > > #include <vector> > > __attribute__ ((noipa)) > std::vector<bool> > test() > { > std::vector <bool> t; > t.push_back (1); > t.push_back (2); > return t; > } > int > main() > { > for (int i = 0; i < 100000000; i++) > { > test(); > } > return 0; > } > > runtime(s) .text size of a.out > gcc -O2 1.04 1606 > gcc -O2 + patch 0.98 1315 > gcc -O3 0.98 1249 > gcc -O3 + patch 0.96 1138 > gcc -O3 + patch --param max-inline-insns-auto=5000 0.96 1138 > clang -O2 1.56 1823 > clang -O3 1.56 1839 > clang -O2 + libc++ 2.31 4272 > clang -O3 + libc++ 2.76 4262 > > push_back is still too large to inline fully at -O3. If parameters are bumped up > for small vectors this makes it possible to propagate bit positions. > This variant of benchmark > > #include <vector> > > __attribute__ ((noipa)) > std::vector<bool> > test() > { > std::vector <bool> t; > t.push_back (1); > t.push_back (2); > return t; > } > int > main() > { > for (int i = 0; i < 100000000; i++) > { > test(); > } > return 0; > } > > > runtime(s) .text size of a.out > gcc -O2 1.48 1574 > gcc -O2 + patch 1.30 1177 > gcc -O3 1.46 1515 > gcc -O3 + patch 1.27 1069 > gcc -O3 + patch --param max-inline-insns-auto=5000 1.10 388 > clang -O2 1.48 1823 > clang -O3 1.46 1711 > clang -O2 + libc++ 1.20 1001 > clang -O3 + libc++ 1.17 1001 > > Note that clang does not suport noipa attribute, so it has some advantage here. > Bootstrapped/regtested x86_64-linux, OK? > > libstdc++-v3/ChangeLog: > > * include/bits/stl_bvector.h (vector<bool, _Alloc>::operator=): Use > _M_copy_aligned_trail. > (vector<bool, _Alloc>::copy_aglined_trail): New function. Spelling > (vector<bool, _Alloc>::copy_aglined): Implement copying of tail manually. Spelling > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::capacity): Add check > that size is at most max_size. > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::push_back): Use > _M_append_aux. > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_append_aux): Declare. > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_append_aux): Declare. > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_insert_aux): Declare. > * include/bits/vector.tcc > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_append_aux): Implement. > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_append_aux): Implement. > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_insert_aux): Break out from ... > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_insert_aux): ... here. > > gcc/testsuite/ChangeLog: > > * g++.dg/tree-ssa/bvector-1.C: New test. > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C > new file mode 100644 > index 00000000000..4197ff8c4b8 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C > @@ -0,0 +1,28 @@ > +// { dg-do compile } > +// { dg-options "-O2 -fdump-tree-optimized" } > +// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } } > +#include <vector> > +bool > +empty(std::vector<bool> src) > +{ > + std::vector <bool> data=src; > + return false; > +} > +bool > +empty2(std::vector<bool> src) > +{ > + std::vector <bool> data; > + data.reserve(1); > + return data.size (); > +} > +bool > +empty3() > +{ > + std::vector <bool> data; > + data.push_back (true); > + return data[0]; > +} > +// All references to src should be optimized out, so there should be no name for it > +// { dg-final { scan-tree-dump-not "src_" optimized } } > +// { dg-final { scan-tree-dump-not "data_" optimized } } > +// { dg-final { scan-tree-dump-not "new" optimized } } > diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h > index 341eee33b21..ce1c5121b38 100644 > --- a/libstdc++-v3/include/bits/stl_bvector.h > +++ b/libstdc++-v3/include/bits/stl_bvector.h > @@ -946,8 +946,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > this->_M_deallocate(); > _M_initialize(__x.size()); > } > - this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(), > - begin()); > + this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(), > + begin()); > return *this; > } > > @@ -971,8 +971,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > this->_M_deallocate(); > _M_initialize(__x.size()); > } > - this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(), > - begin()); > + this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(), > + begin()); > __x.clear(); > } > return *this; > @@ -1101,7 +1101,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > size_type > size() const _GLIBCXX_NOEXCEPT > - { return size_type(end() - begin()); } > + { > + const size_type __sz = size_type (end() - begin()); > + if (__sz > max_size ()) > + __builtin_unreachable (); > + return __sz; > + } > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > size_type > @@ -1119,8 +1124,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > size_type > capacity() const _GLIBCXX_NOEXCEPT > - { return size_type(const_iterator(this->_M_impl._M_end_addr(), 0) > - - begin()); } > + { > + const size_type __sz = size_type(const_iterator(this->_M_impl._M_end_addr(), 0) - begin()); > + if (__sz > max_size ()) > + __builtin_unreachable (); > + return __sz; > + } > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > bool > @@ -1221,7 +1230,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr()) > *this->_M_impl._M_finish++ = __x; > else > - _M_insert_aux(end(), __x); > + _M_append_aux(__x); > } > > _GLIBCXX20_CONSTEXPR > @@ -1484,8 +1493,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > iterator __result) > { > _Bit_type* __q = std::copy(__first._M_p, __last._M_p, __result._M_p); > - return std::copy(const_iterator(__last._M_p, 0), __last, > - iterator(__q, 0)); > + if (!__last._M_offset) > + return iterator(__q, 0); > + _Bit_type __mask = ~0ul << __last._M_offset; > + *__q = (*__q & __mask) | (*__last._M_p & ~__mask); > + return iterator(__q, __last._M_offset); > + } Newline between functions please. This has the same precondition as _M_copy_aligned, right? It should repeat the comment above _M_copy_aligned. It would also be nice to have a comment saying what it does, and I agree a better name would help. Maybe "Copies [first,last2) to result, where last2 is the first word boundary not less than last" or something like that? How about _M_copy_aligned_ceil ? Since it can be considered to do a "ceil" operation on the _M_last iterator, copying past its offset when non-zero? > + _GLIBCXX20_CONSTEXPR > + iterator > + _M_copy_aligned_trail(const_iterator __first, const_iterator __last, > + iterator __result) > + { > + _Bit_type* __q = std::copy(__first._M_p, __last._M_p + (__last._M_offset != 0), __result._M_p); > + return iterator(__q - (__last._M_offset != 0), __last._M_offset); > } > > _GLIBCXX20_CONSTEXPR > @@ -1669,6 +1689,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > void > _M_insert_aux(iterator __position, bool __x); > > + _GLIBCXX20_CONSTEXPR > + void > + _M_realloc_insert_aux(iterator __position, bool __x); > + > + _GLIBCXX20_CONSTEXPR > + void > + _M_append_aux(bool __x); > + > + _GLIBCXX20_CONSTEXPR > + void > + _M_realloc_append_aux(bool __x); > + > _GLIBCXX20_CONSTEXPR > size_type > _M_check_len(size_type __n, const char* __s) const > diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc > index 542a66173a3..f9cc78d6dcf 100644 > --- a/libstdc++-v3/include/bits/vector.tcc > +++ b/libstdc++-v3/include/bits/vector.tcc > @@ -1182,6 +1182,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > } > } > } > + template<typename _Alloc> > + _GLIBCXX20_CONSTEXPR > + void > + vector<bool, _Alloc>:: > + _M_realloc_insert_aux(iterator __position, bool __x) > + { > + const size_type __len = > + _M_check_len(size_type(1), "vector<bool>::_M_realloc_insert_aux"); > + _Bit_pointer __q = this->_M_allocate(__len); > + iterator __start(std::__addressof(*__q), 0); > + iterator __i = _M_copy_aligned(begin(), __position, __start); > + *__i++ = __x; > + iterator __finish = std::copy(__position, end(), __i); > + this->_M_deallocate(); > + this->_M_impl._M_end_of_storage = __q + _S_nword(__len); > + this->_M_impl._M_start = __start; > + this->_M_impl._M_finish = __finish; > + } > > template<typename _Alloc> > _GLIBCXX20_CONSTEXPR > @@ -1197,19 +1215,40 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > ++this->_M_impl._M_finish; > } > else > + _M_realloc_insert_aux (__position, __x); > + } > + > + template<typename _Alloc> > + _GLIBCXX20_CONSTEXPR > + void > + vector<bool, _Alloc>:: > + _M_realloc_append_aux(bool __x) > + { > + const size_type __len = > + _M_check_len(size_type(1), "vector<bool>::_M_realloc_append_aux"); > + _Bit_pointer __q = this->_M_allocate(__len); > + iterator __start(std::__addressof(*__q), 0); > + iterator __i = _M_copy_aligned_trail(begin(), end(), __start); > + *__i++ = __x; > + this->_M_deallocate(); > + this->_M_impl._M_end_of_storage = __q + _S_nword(__len); > + this->_M_impl._M_start = __start; > + this->_M_impl._M_finish = __i; > + } > + > + template<typename _Alloc> > + _GLIBCXX20_CONSTEXPR > + void > + vector<bool, _Alloc>:: > + _M_append_aux(bool __x) > + { > + if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr()) > { > - const size_type __len = > - _M_check_len(size_type(1), "vector<bool>::_M_insert_aux"); > - _Bit_pointer __q = this->_M_allocate(__len); > - iterator __start(std::__addressof(*__q), 0); > - iterator __i = _M_copy_aligned(begin(), __position, __start); > - *__i++ = __x; > - iterator __finish = std::copy(__position, end(), __i); > - this->_M_deallocate(); > - this->_M_impl._M_end_of_storage = __q + _S_nword(__len); > - this->_M_impl._M_start = __start; > - this->_M_impl._M_finish = __finish; > + *end() = __x; > + ++this->_M_impl._M_finish; > } > + else > + _M_realloc_append_aux (__x); > } > > template<typename _Alloc> >
On Fri, 10 Jan 2025 at 14:51, Jonathan Wakely <jwakely@redhat.com> wrote: > > On Sun, 8 Dec 2024 at 15:36, Jan Hubicka <hubicka@ucw.cz> wrote: > > > > Hi, > > std::vector has independent implementation for bool which has its won > > size/capacity functions. I updated them to add __builtin_unreachable to > > announce that size is never more than max_size. However while testing the code > > I noticed that even construction of unused copy is not optimized out. Main > > problem is that the vector copying loops copies the tail of vector using loop > > that copies bit by bit. We eventually pattern match it to bit operations (that > > surprises me) but we need to unroll it and run through loop optimization that > > happens late. > > > > This patch also updates copy_aglined to use bit operations. However for = operation > > we can do better since it is not necessary to preserve original bits (those are > > undefined anyway). So I added copy_aglined_trail (better name would be welcome) > > that does not care about trailing bits of the copied block. > > > > As a result the following functions are optimized to empty functions: > > #include <vector> > > bool > > empty(std::vector<bool> src) > > { > > std::vector <bool> data=src; > > return false; > > } > > bool > > empty2(std::vector<bool> src) > > { > > std::vector <bool> data; > > data.reserve(1); > > return data.size (); > > } > > bool > > empty3() > > { > > std::vector <bool> data; > > data.push_back (true); > > return data[0]; > > } > > > > Finally I mirrored changes to push_back from normal vectors to bit vectors. > > This involve separating append from insert and breaking out the resizing (and > > cold) path to separate function. > > > > Here are two little benchmarks on push_back: > > > > #include <vector> > > > > __attribute__ ((noipa)) > > std::vector<bool> > > test() > > { > > std::vector <bool> t; > > t.push_back (1); > > t.push_back (2); > > return t; > > } > > int > > main() > > { > > for (int i = 0; i < 100000000; i++) > > { > > test(); > > } > > return 0; > > } > > > > runtime(s) .text size of a.out > > gcc -O2 1.04 1606 > > gcc -O2 + patch 0.98 1315 > > gcc -O3 0.98 1249 > > gcc -O3 + patch 0.96 1138 > > gcc -O3 + patch --param max-inline-insns-auto=5000 0.96 1138 > > clang -O2 1.56 1823 > > clang -O3 1.56 1839 > > clang -O2 + libc++ 2.31 4272 > > clang -O3 + libc++ 2.76 4262 > > > > push_back is still too large to inline fully at -O3. If parameters are bumped up > > for small vectors this makes it possible to propagate bit positions. > > This variant of benchmark > > > > #include <vector> > > > > __attribute__ ((noipa)) > > std::vector<bool> > > test() > > { > > std::vector <bool> t; > > t.push_back (1); > > t.push_back (2); > > return t; > > } > > int > > main() > > { > > for (int i = 0; i < 100000000; i++) > > { > > test(); > > } > > return 0; > > } > > > > > > runtime(s) .text size of a.out > > gcc -O2 1.48 1574 > > gcc -O2 + patch 1.30 1177 > > gcc -O3 1.46 1515 > > gcc -O3 + patch 1.27 1069 > > gcc -O3 + patch --param max-inline-insns-auto=5000 1.10 388 > > clang -O2 1.48 1823 > > clang -O3 1.46 1711 > > clang -O2 + libc++ 1.20 1001 > > clang -O3 + libc++ 1.17 1001 > > > > Note that clang does not suport noipa attribute, so it has some advantage here. > > Bootstrapped/regtested x86_64-linux, OK? > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/stl_bvector.h (vector<bool, _Alloc>::operator=): Use > > _M_copy_aligned_trail. > > (vector<bool, _Alloc>::copy_aglined_trail): New function. > > Spelling > > > (vector<bool, _Alloc>::copy_aglined): Implement copying of tail manually. > > Spelling > > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::capacity): Add check > > that size is at most max_size. > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::push_back): Use > > _M_append_aux. > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_append_aux): Declare. > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_append_aux): Declare. > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_insert_aux): Declare. > > * include/bits/vector.tcc > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_append_aux): Implement. > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_append_aux): Implement. > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_realloc_insert_aux): Break out from ... > > (vector<bool, _Alloc>::size,vector<bool, _Alloc>::_M_insert_aux): ... here. > > > > gcc/testsuite/ChangeLog: > > > > * g++.dg/tree-ssa/bvector-1.C: New test. > > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C > > new file mode 100644 > > index 00000000000..4197ff8c4b8 > > --- /dev/null > > +++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C > > @@ -0,0 +1,28 @@ > > +// { dg-do compile } > > +// { dg-options "-O2 -fdump-tree-optimized" } > > +// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } } > > +#include <vector> > > +bool > > +empty(std::vector<bool> src) > > +{ > > + std::vector <bool> data=src; > > + return false; > > +} > > +bool > > +empty2(std::vector<bool> src) > > +{ > > + std::vector <bool> data; > > + data.reserve(1); > > + return data.size (); > > +} > > +bool > > +empty3() > > +{ > > + std::vector <bool> data; > > + data.push_back (true); > > + return data[0]; > > +} > > +// All references to src should be optimized out, so there should be no name for it > > +// { dg-final { scan-tree-dump-not "src_" optimized } } > > +// { dg-final { scan-tree-dump-not "data_" optimized } } > > +// { dg-final { scan-tree-dump-not "new" optimized } } > > diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h > > index 341eee33b21..ce1c5121b38 100644 > > --- a/libstdc++-v3/include/bits/stl_bvector.h > > +++ b/libstdc++-v3/include/bits/stl_bvector.h > > @@ -946,8 +946,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > this->_M_deallocate(); > > _M_initialize(__x.size()); > > } > > - this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(), > > - begin()); > > + this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(), > > + begin()); > > return *this; > > } > > > > @@ -971,8 +971,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > this->_M_deallocate(); > > _M_initialize(__x.size()); > > } > > - this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(), > > - begin()); > > + this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(), > > + begin()); > > __x.clear(); > > } > > return *this; > > @@ -1101,7 +1101,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > > size_type > > size() const _GLIBCXX_NOEXCEPT > > - { return size_type(end() - begin()); } > > + { > > + const size_type __sz = size_type (end() - begin()); > > + if (__sz > max_size ()) > > + __builtin_unreachable (); > > + return __sz; > > + } > > > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > > size_type > > @@ -1119,8 +1124,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > > size_type > > capacity() const _GLIBCXX_NOEXCEPT > > - { return size_type(const_iterator(this->_M_impl._M_end_addr(), 0) > > - - begin()); } > > + { > > + const size_type __sz = size_type(const_iterator(this->_M_impl._M_end_addr(), 0) - begin()); > > + if (__sz > max_size ()) > > + __builtin_unreachable (); > > + return __sz; > > + } > > > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > > bool > > @@ -1221,7 +1230,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr()) > > *this->_M_impl._M_finish++ = __x; > > else > > - _M_insert_aux(end(), __x); > > + _M_append_aux(__x); > > } > > > > _GLIBCXX20_CONSTEXPR > > @@ -1484,8 +1493,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > iterator __result) > > { > > _Bit_type* __q = std::copy(__first._M_p, __last._M_p, __result._M_p); > > - return std::copy(const_iterator(__last._M_p, 0), __last, > > - iterator(__q, 0)); > > + if (!__last._M_offset) > > + return iterator(__q, 0); > > + _Bit_type __mask = ~0ul << __last._M_offset; > > + *__q = (*__q & __mask) | (*__last._M_p & ~__mask); > > + return iterator(__q, __last._M_offset); > > + } > > Newline between functions please. > > This has the same precondition as _M_copy_aligned, right? It should > repeat the comment above _M_copy_aligned. It would also be nice to > have a comment saying what it does, and I agree a better name would > help. > > Maybe "Copies [first,last2) to result, where last2 is the first word > boundary not less than last" or something like that? And maybe mention that it copies "unused" capacity in [last,last2) which are values we don't actually care about, but are safe to copy, and doing so is faster than shifting and masking. > > How about _M_copy_aligned_ceil ? Since it can be considered to do a > "ceil" operation on the _M_last iterator, copying past its offset when > non-zero? > > > > + _GLIBCXX20_CONSTEXPR > > + iterator > > + _M_copy_aligned_trail(const_iterator __first, const_iterator __last, > > + iterator __result) > > + { > > + _Bit_type* __q = std::copy(__first._M_p, __last._M_p + (__last._M_offset != 0), __result._M_p); > > + return iterator(__q - (__last._M_offset != 0), __last._M_offset); > > } > > > > _GLIBCXX20_CONSTEXPR > > @@ -1669,6 +1689,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > void > > _M_insert_aux(iterator __position, bool __x); > > > > + _GLIBCXX20_CONSTEXPR > > + void > > + _M_realloc_insert_aux(iterator __position, bool __x); > > + > > + _GLIBCXX20_CONSTEXPR > > + void > > + _M_append_aux(bool __x); > > + > > + _GLIBCXX20_CONSTEXPR > > + void > > + _M_realloc_append_aux(bool __x); > > + > > _GLIBCXX20_CONSTEXPR > > size_type > > _M_check_len(size_type __n, const char* __s) const > > diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc > > index 542a66173a3..f9cc78d6dcf 100644 > > --- a/libstdc++-v3/include/bits/vector.tcc > > +++ b/libstdc++-v3/include/bits/vector.tcc > > @@ -1182,6 +1182,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > } > > } > > } > > + template<typename _Alloc> > > + _GLIBCXX20_CONSTEXPR > > + void > > + vector<bool, _Alloc>:: > > + _M_realloc_insert_aux(iterator __position, bool __x) > > + { > > + const size_type __len = > > + _M_check_len(size_type(1), "vector<bool>::_M_realloc_insert_aux"); > > + _Bit_pointer __q = this->_M_allocate(__len); > > + iterator __start(std::__addressof(*__q), 0); > > + iterator __i = _M_copy_aligned(begin(), __position, __start); > > + *__i++ = __x; > > + iterator __finish = std::copy(__position, end(), __i); > > + this->_M_deallocate(); > > + this->_M_impl._M_end_of_storage = __q + _S_nword(__len); > > + this->_M_impl._M_start = __start; > > + this->_M_impl._M_finish = __finish; > > + } > > > > template<typename _Alloc> > > _GLIBCXX20_CONSTEXPR > > @@ -1197,19 +1215,40 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > ++this->_M_impl._M_finish; > > } > > else > > + _M_realloc_insert_aux (__position, __x); > > + } > > + > > + template<typename _Alloc> > > + _GLIBCXX20_CONSTEXPR > > + void > > + vector<bool, _Alloc>:: > > + _M_realloc_append_aux(bool __x) > > + { > > + const size_type __len = > > + _M_check_len(size_type(1), "vector<bool>::_M_realloc_append_aux"); > > + _Bit_pointer __q = this->_M_allocate(__len); > > + iterator __start(std::__addressof(*__q), 0); > > + iterator __i = _M_copy_aligned_trail(begin(), end(), __start); > > + *__i++ = __x; > > + this->_M_deallocate(); > > + this->_M_impl._M_end_of_storage = __q + _S_nword(__len); > > + this->_M_impl._M_start = __start; > > + this->_M_impl._M_finish = __i; > > + } > > + > > + template<typename _Alloc> > > + _GLIBCXX20_CONSTEXPR > > + void > > + vector<bool, _Alloc>:: > > + _M_append_aux(bool __x) > > + { > > + if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr()) > > { > > - const size_type __len = > > - _M_check_len(size_type(1), "vector<bool>::_M_insert_aux"); > > - _Bit_pointer __q = this->_M_allocate(__len); > > - iterator __start(std::__addressof(*__q), 0); > > - iterator __i = _M_copy_aligned(begin(), __position, __start); > > - *__i++ = __x; > > - iterator __finish = std::copy(__position, end(), __i); > > - this->_M_deallocate(); > > - this->_M_impl._M_end_of_storage = __q + _S_nword(__len); > > - this->_M_impl._M_start = __start; > > - this->_M_impl._M_finish = __finish; > > + *end() = __x; > > + ++this->_M_impl._M_finish; > > } > > + else > > + _M_realloc_append_aux (__x); > > } > > > > template<typename _Alloc> > >
diff --git a/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C new file mode 100644 index 00000000000..4197ff8c4b8 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/bvector-1.C @@ -0,0 +1,28 @@ +// { dg-do compile } +// { dg-options "-O2 -fdump-tree-optimized" } +// { dg-skip-if "requires hosted libstdc++ for vector" { ! hostedlib } } +#include <vector> +bool +empty(std::vector<bool> src) +{ + std::vector <bool> data=src; + return false; +} +bool +empty2(std::vector<bool> src) +{ + std::vector <bool> data; + data.reserve(1); + return data.size (); +} +bool +empty3() +{ + std::vector <bool> data; + data.push_back (true); + return data[0]; +} +// All references to src should be optimized out, so there should be no name for it +// { dg-final { scan-tree-dump-not "src_" optimized } } +// { dg-final { scan-tree-dump-not "data_" optimized } } +// { dg-final { scan-tree-dump-not "new" optimized } } diff --git a/libstdc++-v3/include/bits/stl_bvector.h b/libstdc++-v3/include/bits/stl_bvector.h index 341eee33b21..ce1c5121b38 100644 --- a/libstdc++-v3/include/bits/stl_bvector.h +++ b/libstdc++-v3/include/bits/stl_bvector.h @@ -946,8 +946,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER this->_M_deallocate(); _M_initialize(__x.size()); } - this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(), - begin()); + this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(), + begin()); return *this; } @@ -971,8 +971,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER this->_M_deallocate(); _M_initialize(__x.size()); } - this->_M_impl._M_finish = _M_copy_aligned(__x.begin(), __x.end(), - begin()); + this->_M_impl._M_finish = _M_copy_aligned_trail(__x.begin(), __x.end(), + begin()); __x.clear(); } return *this; @@ -1101,7 +1101,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type size() const _GLIBCXX_NOEXCEPT - { return size_type(end() - begin()); } + { + const size_type __sz = size_type (end() - begin()); + if (__sz > max_size ()) + __builtin_unreachable (); + return __sz; + } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type @@ -1119,8 +1124,12 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR size_type capacity() const _GLIBCXX_NOEXCEPT - { return size_type(const_iterator(this->_M_impl._M_end_addr(), 0) - - begin()); } + { + const size_type __sz = size_type(const_iterator(this->_M_impl._M_end_addr(), 0) - begin()); + if (__sz > max_size ()) + __builtin_unreachable (); + return __sz; + } _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR bool @@ -1221,7 +1230,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr()) *this->_M_impl._M_finish++ = __x; else - _M_insert_aux(end(), __x); + _M_append_aux(__x); } _GLIBCXX20_CONSTEXPR @@ -1484,8 +1493,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER iterator __result) { _Bit_type* __q = std::copy(__first._M_p, __last._M_p, __result._M_p); - return std::copy(const_iterator(__last._M_p, 0), __last, - iterator(__q, 0)); + if (!__last._M_offset) + return iterator(__q, 0); + _Bit_type __mask = ~0ul << __last._M_offset; + *__q = (*__q & __mask) | (*__last._M_p & ~__mask); + return iterator(__q, __last._M_offset); + } + _GLIBCXX20_CONSTEXPR + iterator + _M_copy_aligned_trail(const_iterator __first, const_iterator __last, + iterator __result) + { + _Bit_type* __q = std::copy(__first._M_p, __last._M_p + (__last._M_offset != 0), __result._M_p); + return iterator(__q - (__last._M_offset != 0), __last._M_offset); } _GLIBCXX20_CONSTEXPR @@ -1669,6 +1689,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER void _M_insert_aux(iterator __position, bool __x); + _GLIBCXX20_CONSTEXPR + void + _M_realloc_insert_aux(iterator __position, bool __x); + + _GLIBCXX20_CONSTEXPR + void + _M_append_aux(bool __x); + + _GLIBCXX20_CONSTEXPR + void + _M_realloc_append_aux(bool __x); + _GLIBCXX20_CONSTEXPR size_type _M_check_len(size_type __n, const char* __s) const diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc index 542a66173a3..f9cc78d6dcf 100644 --- a/libstdc++-v3/include/bits/vector.tcc +++ b/libstdc++-v3/include/bits/vector.tcc @@ -1182,6 +1182,24 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER } } } + template<typename _Alloc> + _GLIBCXX20_CONSTEXPR + void + vector<bool, _Alloc>:: + _M_realloc_insert_aux(iterator __position, bool __x) + { + const size_type __len = + _M_check_len(size_type(1), "vector<bool>::_M_realloc_insert_aux"); + _Bit_pointer __q = this->_M_allocate(__len); + iterator __start(std::__addressof(*__q), 0); + iterator __i = _M_copy_aligned(begin(), __position, __start); + *__i++ = __x; + iterator __finish = std::copy(__position, end(), __i); + this->_M_deallocate(); + this->_M_impl._M_end_of_storage = __q + _S_nword(__len); + this->_M_impl._M_start = __start; + this->_M_impl._M_finish = __finish; + } template<typename _Alloc> _GLIBCXX20_CONSTEXPR @@ -1197,19 +1215,40 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER ++this->_M_impl._M_finish; } else + _M_realloc_insert_aux (__position, __x); + } + + template<typename _Alloc> + _GLIBCXX20_CONSTEXPR + void + vector<bool, _Alloc>:: + _M_realloc_append_aux(bool __x) + { + const size_type __len = + _M_check_len(size_type(1), "vector<bool>::_M_realloc_append_aux"); + _Bit_pointer __q = this->_M_allocate(__len); + iterator __start(std::__addressof(*__q), 0); + iterator __i = _M_copy_aligned_trail(begin(), end(), __start); + *__i++ = __x; + this->_M_deallocate(); + this->_M_impl._M_end_of_storage = __q + _S_nword(__len); + this->_M_impl._M_start = __start; + this->_M_impl._M_finish = __i; + } + + template<typename _Alloc> + _GLIBCXX20_CONSTEXPR + void + vector<bool, _Alloc>:: + _M_append_aux(bool __x) + { + if (this->_M_impl._M_finish._M_p != this->_M_impl._M_end_addr()) { - const size_type __len = - _M_check_len(size_type(1), "vector<bool>::_M_insert_aux"); - _Bit_pointer __q = this->_M_allocate(__len); - iterator __start(std::__addressof(*__q), 0); - iterator __i = _M_copy_aligned(begin(), __position, __start); - *__i++ = __x; - iterator __finish = std::copy(__position, end(), __i); - this->_M_deallocate(); - this->_M_impl._M_end_of_storage = __q + _S_nword(__len); - this->_M_impl._M_start = __start; - this->_M_impl._M_finish = __finish; + *end() = __x; + ++this->_M_impl._M_finish; } + else + _M_realloc_append_aux (__x); } template<typename _Alloc>