Define std::codecvt<char16_t,...> and std::codecvt<char32_t,...>

Message ID	20150116150226.GG3360@redhat.com
State	New
Headers	show Return-Path: <gcc-patches-return-389563-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:date :from:to:subject:message-id:mime-version:content-type :content-transfer-encoding; q=dns; s=default; b=A28wBuazyTTPdLEL KEWpgrYrNdU8huoDUgUUAp/KFs63Qi2siu4CTYdnHJN1wad5SFNYJhNTjwWEPhIv 1p5pfbf+Cc7Nd0DRmIvWILvuZMYv7Ao2XtJeEUSHW6pHuTfW4vJaLN1MZBXocbvg IpASqhe38VZoEVZF2cBSSJIqt68= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org Date: Fri, 16 Jan 2015 15:02:26 +0000 From: Jonathan Wakely <jwakely@redhat.com> To: libstdc++@gcc.gnu.org, gcc-patches@gcc.gnu.org Subject: [patch] Define std::codecvt<char16_t, ...> and std::codecvt<char32_t, ...> Message-ID: <20150116150226.GG3360@redhat.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="uuKVzAmB+c+zQlhu" Content-Disposition: inline Content-Transfer-Encoding: 8bit User-Agent: Mutt/1.5.23 (2014-03-12)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index db357d6..74e8eaf 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -1777,7 +1777,7 @@ AC_DEFUN([GLIBCXX_CHECK_C99_TR1], [ <tr1/cinttypes> in namespace std::tr1.]) fi - # Check for the existence of whcar_t <inttypes.h> functions (NB: doesn't + # Check for the existence of wchar_t <inttypes.h> functions (NB: doesn't # make sense if the glibcxx_cv_c99_stdint_tr1 check fails, per C99, 7.8/1). ac_c99_inttypes_wchar_t_tr1=no; if test x"$glibcxx_cv_c99_stdint_tr1" = x"yes"; then diff --git a/libstdc++-v3/config/abi/pre/gnu.ver b/libstdc++-v3/config/abi/pre/gnu.ver index 700da18..83f4e8c 100644 --- a/libstdc++-v3/config/abi/pre/gnu.ver +++ b/libstdc++-v3/config/abi/pre/gnu.ver @@ -1759,6 +1759,11 @@ GLIBCXX_3.4.21 { _ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE3getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmPK[cw]SC_; _ZNKSt8time_getI[cw]St19istreambuf_iteratorI[cw]St11char_traitsI[cw]EEE6do_getES3_S3_RSt8ios_baseRSt12_Ios_IostateP2tmcc; + # codecvt<char16_t, char, mbstate_t>, codecvt<char32_t, char, mbstate_t> + _ZNKSt7codecvtID[is]c11__mbstate_t*; + _ZNSt7codecvtID[is]c11__mbstate_t*; + _ZT[ISV]St7codecvtID[is]c11__mbstate_tE; + } GLIBCXX_3.4.20; diff --git a/libstdc++-v3/include/bits/codecvt.h b/libstdc++-v3/include/bits/codecvt.h index 1eee1cc..a6e59b5 100644 --- a/libstdc++-v3/include/bits/codecvt.h +++ b/libstdc++-v3/include/bits/codecvt.h @@ -393,7 +393,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; #ifdef _GLIBCXX_USE_WCHAR_T - /// class codecvt<wchar_t, char, mbstate_t> specialization. + /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization. + * + * Converts between narrow and wide characters in the native character set + */ template<> class codecvt<wchar_t, char, mbstate_t> : public __codecvt_abstract_base<wchar_t, char, mbstate_t> @@ -455,6 +458,125 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION }; #endif //_GLIBCXX_USE_WCHAR_T +#if __cplusplus >= 201103L +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + /** @brief Class codecvt<char16_t, char, mbstate_t> specialization. + * + * Converts between UTF-16 and UTF-8. + */ + template<> + class codecvt<char16_t, char, mbstate_t> + : public __codecvt_abstract_base<char16_t, char, mbstate_t> + { + public: + // Types: + typedef char16_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + public: + static locale::id id; + + explicit + codecvt(size_t __refs = 0) + : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { } + + protected: + virtual + ~codecvt(); + + virtual result + do_out(state_type& __state, const intern_type* __from, + const intern_type* __from_end, const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_unshift(state_type& __state, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_in(state_type& __state, + const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const; + + virtual + int do_encoding() const throw(); + + virtual + bool do_always_noconv() const throw(); + + virtual + int do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const; + + virtual int + do_max_length() const throw(); + }; + + /** @brief Class codecvt<char32_t, char, mbstate_t> specialization. + * + * Converts between UTF-32 and UTF-8. + */ + template<> + class codecvt<char32_t, char, mbstate_t> + : public __codecvt_abstract_base<char32_t, char, mbstate_t> + { + public: + // Types: + typedef char32_t intern_type; + typedef char extern_type; + typedef mbstate_t state_type; + + public: + static locale::id id; + + explicit + codecvt(size_t __refs = 0) + : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { } + + protected: + virtual + ~codecvt(); + + virtual result + do_out(state_type& __state, const intern_type* __from, + const intern_type* __from_end, const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_unshift(state_type& __state, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const; + + virtual result + do_in(state_type& __state, + const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const; + + virtual + int do_encoding() const throw(); + + virtual + bool do_always_noconv() const throw(); + + virtual + int do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const; + + virtual int + do_max_length() const throw(); + }; + +#endif // _GLIBCXX_USE_C99_STDINT_TR1 +#endif // C++11 + /// class codecvt_byname [22.2.1.6]. template<typename _InternT, typename _ExternT, typename _StateT> class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> diff --git a/libstdc++-v3/include/bits/locale_facets.h b/libstdc++-v3/include/bits/locale_facets.h index a5fc45e..77932a5 100644 --- a/libstdc++-v3/include/bits/locale_facets.h +++ b/libstdc++-v3/include/bits/locale_facets.h @@ -59,6 +59,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION # define _GLIBCXX_NUM_FACETS 14 # define _GLIBCXX_NUM_CXX11_FACETS 8 #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 +# define _GLIBCXX_NUM_UNICODE_FACETS 2 +#else +# define _GLIBCXX_NUM_UNICODE_FACETS 0 +#endif // Convert string to numeric value of type _Tp and store results. // NB: This is specialized for all required types, there is no diff --git a/libstdc++-v3/src/c++11/Makefile.am b/libstdc++-v3/src/c++11/Makefile.am index 4cba983..b57e552 100644 --- a/libstdc++-v3/src/c++11/Makefile.am +++ b/libstdc++-v3/src/c++11/Makefile.am @@ -53,6 +53,7 @@ endif sources = \ chrono.cc \ + codecvt.cc \ condition_variable.cc \ cow-stdexcept.cc \ ctype.cc \ diff --git a/libstdc++-v3/src/c++11/codecvt.cc b/libstdc++-v3/src/c++11/codecvt.cc new file mode 100644 index 0000000..fdd4972 --- /dev/null +++ b/libstdc++-v3/src/c++11/codecvt.cc @@ -0,0 +1,461 @@ +// Locale support (codecvt) -*- C++ -*- + +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. + +#include <bits/locale_classes.h> +#include <bits/codecvt.h> +#include <bits/stl_algobase.h> // std::max +#include <cstring> // std::memcpy, std::memcmp + +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 +namespace std _GLIBCXX_VISIBILITY(default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + +namespace +{ + // Largest code point that fits in a single UTF-16 code unit. + const char32_t max_single_utf16_unit = 0xFFFF; + const char32_t max_code_point = 0x10FFFF; + + template<typename Elem> + struct range + { + Elem* next; + Elem* end; + + Elem operator*() const { return *next; } + + range& operator++() { ++next; return *this; } + + size_t size() const { return end - next; } + }; + + char32_t + read_utf8_code_point(range<const char>& from, unsigned long maxcode) + { + size_t avail = from.size(); + if (avail == 0) + return -1; + unsigned char c1 = from.next[0]; + // https://en.wikipedia.org/wiki/UTF-8#Sample_code + if (c1 < 0x80) + { + ++from.next; + return c1; + } + else if (c1 < 0xC2) // continuation or overlong 2-byte sequence + return -1; + else if (c1 < 0xE0) // 2-byte sequence + { + if (avail < 2) + return -1; + unsigned char c2 = from.next[1]; + if ((c2 & 0xC0) != 0x80) + return -1; + char32_t c = (c1 << 6) + c2 - 0x3080; + if (c > maxcode) + return -1; + from.next += 2; + return c; + } + else if (c1 < 0xF0) // 3-byte sequence + { + if (avail < 3) + return -1; + unsigned char c2 = from.next[1]; + if ((c2 & 0xC0) != 0x80) + return -1; + if (c1 == 0xE0 && c2 < 0xA0) // overlong + return -1; + unsigned char c3 = from.next[2]; + if ((c3 & 0xC0) != 0x80) + return -1; + char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080; + if (c > maxcode) + return -1; + from.next += 3; + return c; + } + else if (c1 < 0xF5) // 4-byte sequence + { + if (avail < 4) + return -1; + unsigned char c2 = from.next[1]; + if ((c2 & 0xC0) != 0x80) + return -1; + if (c1 == 0xF0 && c2 < 0x90) // overlong + return -1; + if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF + return -1; + unsigned char c3 = from.next[2]; + if ((c3 & 0xC0) != 0x80) + return -1; + unsigned char c4 = from.next[3]; + if ((c4 & 0xC0) != 0x80) + return -1; + char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080; + if (c > maxcode) + return -1; + from.next += 4; + return c; + } + else // > U+10FFFF + return -1; + } + + bool + write_utf8_code_point(range<char>& to, char32_t code_point) + { + if (code_point < 0x80) + { + if (to.size() < 1) + return false; + *to.next++ = code_point; + } + else if (code_point <= 0x7FF) + { + if (to.size() < 2) + return false; + *to.next++ = (code_point >> 6) + 0xC0; + *to.next++ = (code_point & 0x3F) + 0x80; + } + else if (code_point <= 0xFFFF) + { + if (to.size() < 3) + return false; + *to.next++ = (code_point >> 12) + 0xE0; + *to.next++ = ((code_point >> 6) & 0x3F) + 0x80; + *to.next++ = (code_point & 0x3F) + 0x80; + } + else if (code_point <= 0x10FFFF) + { + if (to.size() < 4) + return false; + *to.next++ = (code_point >> 18) + 0xF0; + *to.next++ = ((code_point >> 12) & 0x3F) + 0x80; + *to.next++ = ((code_point >> 6) & 0x3F) + 0x80; + *to.next++ = (code_point & 0x3F) + 0x80; + } + else + return false; + return true; + } + + bool + write_utf16_code_point(range<char16_t>& to, char32_t codepoint) + { + if (codepoint < max_single_utf16_unit) + { + if (to.size() > 0) + { + *to.next = codepoint; + ++to.next; + return true; + } + } + else if (to.size() > 1) + { + // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4 + const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10); + const char32_t SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; + char16_t lead = LEAD_OFFSET + (codepoint >> 10); + char16_t trail = 0xDC00 + (codepoint & 0x3FF); + char32_t utf16bytes = (lead << 10) + trail + SURROGATE_OFFSET; + + to.next[0] = utf16bytes >> 16; + to.next[1] = utf16bytes & 0xFFFF; + to.next += 2; + return true; + } + return false; + } + + // utf8 -> ucs4 + codecvt_base::result + ucs4_in(range<const char>& from, range<char32_t>& to, + unsigned long maxcode = max_code_point) + { + while (from.size() && to.size()) + { + const char32_t codepoint = read_utf8_code_point(from, maxcode); + if (codepoint == char32_t(-1) || codepoint > maxcode) + return codecvt_base::error; + *to.next++ = codepoint; + } + return from.size() ? codecvt_base::partial : codecvt_base::ok; + } + + // ucs4 -> utf8 + codecvt_base::result + ucs4_out(range<const char32_t>& from, range<char>& to, + unsigned long maxcode = max_code_point) + { + while (from.size()) + { + const char32_t c = from.next[0]; + if (c > maxcode) + return codecvt_base::error; + if (!write_utf8_code_point(to, c)) + return codecvt_base::partial; + ++from.next; + } + return codecvt_base::ok; + } + + // utf8 -> utf16 + codecvt_base::result + utf16_in(range<const char>& from, range<char16_t>& to, + unsigned long maxcode = max_code_point) + { + while (from.size() && to.size()) + { + const char* first = from.next; + if ((unsigned char)*first >= 0xF0 && to.size() < 2) + return codecvt_base::partial; + const char32_t codepoint = read_utf8_code_point(from, maxcode); + if (codepoint == char32_t(-1) || codepoint > maxcode) + return codecvt_base::error; + if (!write_utf16_code_point(to, codepoint)) + { + from.next = first; + return codecvt_base::partial; + } + } + return codecvt_base::ok; + } + + // utf16 -> utf8 + codecvt_base::result + utf16_out(range<const char16_t>& from, range<char>& to, + unsigned long maxcode = max_code_point) + { + while (from.size()) + { + char32_t c = from.next[0]; + int inc = 1; + if (c >= 0xD800 && c < 0xDBFF) // start of surrogate pair + { + if (from.size() < 2) + return codecvt_base::ok; // stop converting at this point + + const char32_t c2 = from.next[1]; + if (c2 >= 0xDC00 && c2 <= 0xDFFF) + { + inc = 2; + c = (c << 10) + c2 - 0x35FDC00; + } + else + return codecvt_base::error; + } + if (c > maxcode) + return codecvt_base::error; + if (!write_utf8_code_point(to, c)) + return codecvt_base::partial; + from.next += inc; + } + return codecvt_base::ok; + } + + // return pos such that [begin,pos) is valid UTF-16 string no longer than max + int + utf16_len(const char* begin, const char* end, size_t max, + char32_t maxcode = max_code_point) + { + range<const char> from{ begin, end }; + size_t count = 0; + while (count+1 < max) + { + char32_t c = read_utf8_code_point(from, maxcode); + if (c == char32_t(-1)) + break; + else if (c > max_single_utf16_unit) + ++count; + ++count; + } + if (count+1 == max) // take one more character if it fits in a single unit + read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode)); + return from.next - begin; + } + + // return pos such that [begin,pos) is valid UCS-4 string no longer than max + int + ucs4_len(const char* begin, const char* end, size_t max, + char32_t maxcode = max_code_point) + { + range<const char> from{ begin, end }; + size_t count = 0; + while (count < max) + { + char32_t c = read_utf8_code_point(from, maxcode); + if (c == char32_t(-1)) + break; + ++count; + } + return from.next - begin; + } +} + +// Define members of codecvt<char16_t, char, mbstate_t> specialization. +// Converts from UTF-8 to UTF-16. + +locale::id codecvt<char16_t, char, mbstate_t>::id; + +codecvt<char16_t, char, mbstate_t>::~codecvt() { } + +codecvt_base::result +codecvt<char16_t, char, mbstate_t>:: +do_out(state_type&, + const intern_type* __from, + const intern_type* __from_end, const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const +{ + range<const char16_t> from{ __from, __from_end }; + range<char> to{ __to, __to_end }; + auto res = utf16_out(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +codecvt_base::result +codecvt<char16_t, char, mbstate_t>:: +do_unshift(state_type&, extern_type* __to, extern_type*, + extern_type*& __to_next) const +{ + __to_next = __to; + return noconv; // we don't use mbstate_t for the unicode facets +} + +codecvt_base::result +codecvt<char16_t, char, mbstate_t>:: +do_in(state_type&, const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const +{ + range<const char> from{ __from, __from_end }; + range<char16_t> to{ __to, __to_end }; + auto res = utf16_in(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +int +codecvt<char16_t, char, mbstate_t>::do_encoding() const throw() +{ return 0; } + +bool +codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw() +{ return false; } + +int +codecvt<char16_t, char, mbstate_t>:: +do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const +{ + return utf16_len(__from, __end, __max); +} + +int +codecvt<char16_t, char, mbstate_t>::do_max_length() const throw() +{ + // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit, + // whereas 4 byte sequences require two 16-bit code units. + return 3; +} + +// Define members of codecvt<char32_t, char, mbstate_t> specialization. +// Converts from UTF-8 to UTF-32 (aka UCS-4). + +locale::id codecvt<char32_t, char, mbstate_t>::id; + +codecvt<char32_t, char, mbstate_t>::~codecvt() { } + +codecvt_base::result +codecvt<char32_t, char, mbstate_t>:: +do_out(state_type&, const intern_type* __from, const intern_type* __from_end, + const intern_type*& __from_next, + extern_type* __to, extern_type* __to_end, + extern_type*& __to_next) const +{ + range<const char32_t> from{ __from, __from_end }; + range<char> to{ __to, __to_end }; + auto res = ucs4_out(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +codecvt_base::result +codecvt<char32_t, char, mbstate_t>:: +do_unshift(state_type&, extern_type* __to, extern_type*, + extern_type*& __to_next) const +{ + __to_next = __to; + return noconv; +} + +codecvt_base::result +codecvt<char32_t, char, mbstate_t>:: +do_in(state_type&, const extern_type* __from, const extern_type* __from_end, + const extern_type*& __from_next, + intern_type* __to, intern_type* __to_end, + intern_type*& __to_next) const +{ + range<const char> from{ __from, __from_end }; + range<char32_t> to{ __to, __to_end }; + auto res = ucs4_in(from, to); + __from_next = from.next; + __to_next = to.next; + return res; +} + +int +codecvt<char32_t, char, mbstate_t>::do_encoding() const throw() +{ return 0; } + +bool +codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw() +{ return false; } + +int +codecvt<char32_t, char, mbstate_t>:: +do_length(state_type&, const extern_type* __from, + const extern_type* __end, size_t __max) const +{ + return ucs4_len(__from, __end, __max); +} + +int +codecvt<char32_t, char, mbstate_t>::do_max_length() const throw() +{ return 4; } + +inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>; +inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>; + +_GLIBCXX_END_NAMESPACE_VERSION +} +#endif // _GLIBCXX_USE_C99_STDINT_TR1 diff --git a/libstdc++-v3/src/c++98/Makefile.am b/libstdc++-v3/src/c++98/Makefile.am index 6dd7a72..e348dfb 100644 --- a/libstdc++-v3/src/c++98/Makefile.am +++ b/libstdc++-v3/src/c++98/Makefile.am @@ -176,6 +176,16 @@ numeric_members_cow.o: numeric_members_cow.cc $(CXXCOMPILE) $(GLIBCXX_ABI_FLAGS) -fimplicit-templates -c $< endif +# XXX TODO move locale_init.cc and localename.cc to src/c++11 +locale_init.lo: locale_init.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +locale_init.o: locale_init.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +localename.lo: localename.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< +localename.o: localename.cc + $(LTCXXCOMPILE) -std=gnu++11 -c $< + # Use special rules for the deprecated source files so that they find # deprecated include files. GLIBCXX_INCLUDE_DIR=$(glibcxx_builddir)/include diff --git a/libstdc++-v3/src/c++98/locale_init.cc b/libstdc++-v3/src/c++98/locale_init.cc index c45eff3..0a95b9f 100644 --- a/libstdc++-v3/src/c++98/locale_init.cc +++ b/libstdc++-v3/src/c++98/locale_init.cc @@ -57,7 +57,7 @@ _GLIBCXX_LOC_ID(_ZNSt8messagesIwE2idE); namespace { - const int num_facets = _GLIBCXX_NUM_FACETS + const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS + (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0); __gnu_cxx::__mutex& @@ -201,6 +201,16 @@ namespace fake_messages_w messages_w; #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + typedef char fake_codecvt_c16[sizeof(codecvt<char16_t, char, mbstate_t>)] + __attribute__ ((aligned(__alignof__(codecvt<char16_t, char, mbstate_t>)))); + fake_codecvt_c16 codecvt_c16; + + typedef char fake_codecvt_c32[sizeof(codecvt<char32_t, char, mbstate_t>)] + __attribute__ ((aligned(__alignof__(codecvt<char32_t, char, mbstate_t>)))); + fake_codecvt_c32 codecvt_c32; +#endif + // Storage for "C" locale caches. typedef char fake_num_cache_c[sizeof(std::__numpunct_cache<char>)] __attribute__ ((aligned(__alignof__(std::__numpunct_cache<char>)))); @@ -319,6 +329,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION &std::ctype<wchar_t>::id, &codecvt<wchar_t, char, mbstate_t>::id, #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + &codecvt<char16_t, char, mbstate_t>::id, + &codecvt<char32_t, char, mbstate_t>::id, +#endif 0 }; @@ -522,6 +536,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_init_facet(new (&messages_w) std::messages<wchar_t>(1)); #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + _M_init_facet(new (&codecvt_c16) codecvt<char16_t, char, mbstate_t>(1)); + _M_init_facet(new (&codecvt_c32) codecvt<char32_t, char, mbstate_t>(1)); +#endif + #if _GLIBCXX_USE_DUAL_ABI facet* extra[] = { __npc, __mpcf, __mpct # ifdef _GLIBCXX_USE_WCHAR_T diff --git a/libstdc++-v3/src/c++98/localename.cc b/libstdc++-v3/src/c++98/localename.cc index c42a217..2884bee 100644 --- a/libstdc++-v3/src/c++98/localename.cc +++ b/libstdc++-v3/src/c++98/localename.cc @@ -171,7 +171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } -const int num_facets = _GLIBCXX_NUM_FACETS +const int num_facets = _GLIBCXX_NUM_FACETS + _GLIBCXX_NUM_UNICODE_FACETS + (_GLIBCXX_USE_DUAL_ABI ? _GLIBCXX_NUM_CXX11_FACETS : 0); // Construct named _Impl. @@ -267,7 +267,12 @@ const int num_facets = _GLIBCXX_NUM_FACETS _M_init_facet(new time_get<wchar_t>); _M_init_facet(new time_put<wchar_t>); _M_init_facet(new std::messages<wchar_t>(__cloc, __s)); -#endif +#endif + +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + _M_init_facet(new codecvt<char16_t, char, mbstate_t>); + _M_init_facet(new codecvt<char32_t, char, mbstate_t>); +#endif #if _GLIBCXX_USE_DUAL_ABI _M_init_extra(&__cloc, &__clocm, __s, __smon); diff --git a/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc b/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc new file mode 100644 index 0000000..987233a --- /dev/null +++ b/libstdc++-v3/testsuite/22_locale/codecvt/utf8.cc @@ -0,0 +1,76 @@ +// Copyright (C) 2015 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// { dg-require-cstdint "" } +// { dg-options "-std=gnu++11" } + +#include <locale> +#include <iterator> +#include <string> +#include <testsuite_hooks.h> + +const char expected[] = u8"£¥€"; +const std::size_t expected_len = std::char_traits<char>::length(expected); + +template<typename C> +void test(const C* from) +{ + auto len = std::char_traits<C>::length(from); + std::mbstate_t state{}; + char buf[16] = { }; + using test_type = std::codecvt<C, char, std::mbstate_t>; + const test_type& cvt = std::use_facet<test_type>(std::locale::classic()); + auto from_end = from + len; + auto from_next = from; + auto buf_end = std::end(buf); + auto buf_next = buf; + auto res = cvt.out(state, from, from_end, from_next, buf, buf_end, buf_next); + VERIFY( res == std::codecvt_base::ok ); + VERIFY( from_next == from_end ); + VERIFY( (buf_next - buf) == expected_len ); + VERIFY( 0 == std::char_traits<char>::compare(buf, expected, expected_len) ); + + C buf2[16]; + auto exp_end = expected + expected_len; + auto exp_next = expected; + auto buf2_end = std::end(buf2); + auto buf2_next = buf2; + res = cvt.in(state, expected, exp_end, exp_next, buf2, buf2_end, buf2_next); + VERIFY( res == std::codecvt_base::ok ); + VERIFY( exp_next == exp_end ); + VERIFY( (buf2_next - buf2) == len ); + VERIFY( 0 == std::char_traits<C>::compare(buf2, from, len) ); +} + +void +test01() +{ + test(u"£¥€"); +} + +void +test02() +{ + test(U"£¥€"); +} + +int +main() +{ + test01(); + test02(); +} diff --git a/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc b/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc index b6f8c8b..33b5a8a 100644 --- a/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc +++ b/libstdc++-v3/testsuite/22_locale/locale/cons/unicode.cc @@ -1,4 +1,5 @@ // { dg-require-iconv "ISO-8859-1" } +// { dg-options "-std=gnu++11" } // Copyright (C) 2006-2015 Free Software Foundation, Inc. // @@ -32,6 +33,11 @@ typedef std::codecvt<char, char, std::mbstate_t> c_codecvt; typedef std::codecvt<wchar_t, char, std::mbstate_t> w_codecvt; #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 +typedef std::codecvt<char16_t, char, std::mbstate_t> u16_codecvt; +typedef std::codecvt<char32_t, char, std::mbstate_t> u32_codecvt; +#endif + class gnu_facet: public std::locale::facet { public: @@ -61,6 +67,10 @@ void test01() #ifdef _GLIBCXX_USE_WCHAR_T VERIFY( has_facet<w_codecvt>(loc13) ); #endif +#ifdef _GLIBCXX_USE_C99_STDINT_TR1 + VERIFY( has_facet<u16_codecvt>(loc13) ); + VERIFY( has_facet<u32_codecvt>(loc13) ); +#endif VERIFY( has_facet<unicode_codecvt>(loc13) ); } catch(...)

Define std::codecvt<char16_t,...> and std::codecvt<char32_t,...>

Commit Message

Patch